Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index e63940b..8b98c50 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -7,5 +7,4 @@
obj-$(CONFIG_APPLDATA_BASE) += appldata/
obj-y += net/
obj-$(CONFIG_PCI) += pci/
-obj-$(CONFIG_NUMA) += numa/
obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0023b78..896b68e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -30,7 +30,7 @@
def_bool y
config GENERIC_LOCKBREAK
- def_bool y if PREEMPT
+ def_bool y if PREEMPTION
config PGSTE
def_bool y if KVM
@@ -59,6 +59,8 @@
config S390
def_bool y
select ARCH_BINFMT_ELF_STATE
+ select ARCH_HAS_DEBUG_VM_PGTABLE
+ select ARCH_HAS_DEBUG_WX
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
@@ -72,6 +74,7 @@
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_VDSO_DATA
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_INLINE_READ_LOCK
select ARCH_INLINE_READ_LOCK_BH
@@ -101,22 +104,24 @@
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
- select ARCH_KEEP_MEMBLOCK
- select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+ select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_IPC_PARSE_VERSION
- select BUILDTIME_EXTABLE_SORT
+ select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS2
+ select DMA_OPS if PCI
select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select GENERIC_FIND_FIRST_BIT
+ select GENERIC_GETTIMEOFDAY
+ select GENERIC_PTDUMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
@@ -124,6 +129,7 @@
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN
+ select HAVE_ARCH_KASAN_VMALLOC
select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
@@ -134,7 +140,6 @@
select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
- select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
@@ -143,10 +148,14 @@
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FENTRY
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
+ select HAVE_GENERIC_VDSO
+ select HAVE_IOREMAP_PROT if PCI
+ select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
@@ -155,21 +164,22 @@
select HAVE_KERNEL_UNCOMPRESSED
select HAVE_KERNEL_XZ
select HAVE_KPROBES
+ select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
select HAVE_KVM
select HAVE_LIVEPATCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP
- select HAVE_MMU_GATHER_NO_GATHER
+ select MMU_GATHER_NO_GATHER
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NOP_MCOUNT
select HAVE_OPROFILE
select HAVE_PCI
select HAVE_PERF_EVENTS
- select HAVE_RCU_TABLE_FREE
+ select MMU_GATHER_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
@@ -182,6 +192,8 @@
select OLD_SIGSUSPEND3
select PCI_DOMAINS if PCI
select PCI_MSI if PCI
+ select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
+ select SET_FS
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
@@ -192,6 +204,7 @@
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select SWIOTLB
select GENERIC_ALLOCATOR
+ imply IMA_SECURE_AND_OR_TRUSTED_BOOT
config SCHED_OMIT_FRAME_POINTER
@@ -246,8 +259,8 @@
config MARCH_Z900
bool "IBM zSeries model z800 and z900"
- depends on !CC_IS_CLANG
select HAVE_MARCH_Z900_FEATURES
+ depends on $(cc-option,-march=z900)
help
Select this to enable optimizations for model z800/z900 (2064 and
2066 series). This will enable some optimizations that are not
@@ -255,8 +268,8 @@
config MARCH_Z990
bool "IBM zSeries model z890 and z990"
- depends on !CC_IS_CLANG
select HAVE_MARCH_Z990_FEATURES
+ depends on $(cc-option,-march=z990)
help
Select this to enable optimizations for model z890/z990 (2084 and
2086 series). The kernel will be slightly faster but will not work
@@ -264,8 +277,8 @@
config MARCH_Z9_109
bool "IBM System z9"
- depends on !CC_IS_CLANG
select HAVE_MARCH_Z9_109_FEATURES
+ depends on $(cc-option,-march=z9-109)
help
Select this to enable optimizations for IBM System z9 (2094 and
2096 series). The kernel will be slightly faster but will not work
@@ -274,6 +287,7 @@
config MARCH_Z10
bool "IBM System z10"
select HAVE_MARCH_Z10_FEATURES
+ depends on $(cc-option,-march=z10)
help
Select this to enable optimizations for IBM System z10 (2097 and
2098 series). The kernel will be slightly faster but will not work
@@ -282,6 +296,7 @@
config MARCH_Z196
bool "IBM zEnterprise 114 and 196"
select HAVE_MARCH_Z196_FEATURES
+ depends on $(cc-option,-march=z196)
help
Select this to enable optimizations for IBM zEnterprise 114 and 196
(2818 and 2817 series). The kernel will be slightly faster but will
@@ -290,6 +305,7 @@
config MARCH_ZEC12
bool "IBM zBC12 and zEC12"
select HAVE_MARCH_ZEC12_FEATURES
+ depends on $(cc-option,-march=zEC12)
help
Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
2827 series). The kernel will be slightly faster but will not work on
@@ -298,6 +314,7 @@
config MARCH_Z13
bool "IBM z13s and z13"
select HAVE_MARCH_Z13_FEATURES
+ depends on $(cc-option,-march=z13)
help
Select this to enable optimizations for IBM z13s and z13 (2965 and
2964 series). The kernel will be slightly faster but will not work on
@@ -306,6 +323,7 @@
config MARCH_Z14
bool "IBM z14 ZR1 and z14"
select HAVE_MARCH_Z14_FEATURES
+ depends on $(cc-option,-march=z14)
help
Select this to enable optimizations for IBM z14 ZR1 and z14 (3907
and 3906 series). The kernel will be slightly faster but will not
@@ -314,6 +332,7 @@
config MARCH_Z15
bool "IBM z15"
select HAVE_MARCH_Z15_FEATURES
+ depends on $(cc-option,-march=z15)
help
Select this to enable optimizations for IBM z15 (8562
and 8561 series). The kernel will be slightly faster but will not
@@ -367,33 +386,39 @@
config TUNE_Z900
bool "IBM zSeries model z800 and z900"
- depends on !CC_IS_CLANG
+ depends on $(cc-option,-mtune=z900)
config TUNE_Z990
bool "IBM zSeries model z890 and z990"
- depends on !CC_IS_CLANG
+ depends on $(cc-option,-mtune=z990)
config TUNE_Z9_109
bool "IBM System z9"
- depends on !CC_IS_CLANG
+ depends on $(cc-option,-mtune=z9-109)
config TUNE_Z10
bool "IBM System z10"
+ depends on $(cc-option,-mtune=z10)
config TUNE_Z196
bool "IBM zEnterprise 114 and 196"
+ depends on $(cc-option,-mtune=z196)
config TUNE_ZEC12
bool "IBM zBC12 and zEC12"
+ depends on $(cc-option,-mtune=zEC12)
config TUNE_Z13
- bool "IBM z13"
+ bool "IBM z13s and z13"
+ depends on $(cc-option,-mtune=z13)
config TUNE_Z14
- bool "IBM z14"
+ bool "IBM z14 ZR1 and z14"
+ depends on $(cc-option,-mtune=z14)
config TUNE_Z15
bool "IBM z15"
+ depends on $(cc-option,-mtune=z15)
endchoice
@@ -414,9 +439,6 @@
(and some other stuff like libraries and such) is needed for
executing 31 bit applications. It is safe to say "Y".
-config COMPAT_VDSO
- def_bool COMPAT && !CC_IS_CLANG
-
config SYSVIPC_COMPAT
def_bool y if COMPAT && SYSVIPC
@@ -438,14 +460,6 @@
config HOTPLUG_CPU
def_bool y
-# Some NUMA nodes have memory ranges that span
-# other nodes. Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node. See memmap_init_zone()
-# for details. <- They meant memory holes!
-config NODES_SPAN_OTHER_NODES
- def_bool NUMA
-
config NUMA
bool "NUMA support"
depends on SCHED_TOPOLOGY
@@ -455,58 +469,10 @@
This option adds NUMA support to the kernel.
- An operation mode can be selected by appending
- numa=<method> to the kernel command line.
-
- The default behaviour is identical to appending numa=plain to
- the command line. This will create just one node with all
- available memory and all CPUs in it.
-
config NODES_SHIFT
- int "Maximum NUMA nodes (as a power of 2)"
- range 1 10
- depends on NUMA
- default "4"
- help
- Specify the maximum number of NUMA nodes available on the target
- system. Increases memory reserved to accommodate various tables.
-
-menu "Select NUMA modes"
- depends on NUMA
-
-config NUMA_EMU
- bool "NUMA emulation"
- default y
- help
- Numa emulation mode will split the available system memory into
- equal chunks which then are distributed over the configured number
- of nodes in a round-robin manner.
-
- The number of fake nodes is limited by the number of available memory
- chunks (i.e. memory size / fake size) and the number of supported
- nodes in the kernel.
-
- The CPUs are assigned to the nodes in a way that partially respects
- the original machine topology (if supported by the machine).
- Fair distribution of the CPUs is not guaranteed.
-
-config EMU_SIZE
- hex "NUMA emulation memory chunk size"
- default 0x10000000
- range 0x400000 0x100000000
- depends on NUMA_EMU
- help
- Select the default size by which the memory is chopped and then
- assigned to emulated NUMA nodes.
-
- This can be overridden by specifying
-
- emu_size=<n>
-
- on the kernel command line where also suffixes K, M, G, and T are
- supported.
-
-endmenu
+ int
+ depends on NEED_MULTIPLE_NODES
+ default "1"
config SCHED_SMT
def_bool n
@@ -669,10 +635,6 @@
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y
-config FORCE_MAX_ZONEORDER
- int
- default "9"
-
config MAX_PHYSMEM_BITS
int "Maximum size of supported physical memory in bits (42-53)"
range 42 53
@@ -740,7 +702,7 @@
config QDIO
def_tristate y
prompt "QDIO support"
- ---help---
+ help
This driver provides the Queued Direct I/O base support for
IBM System z.
@@ -812,6 +774,7 @@
def_tristate n
prompt "VFIO support for AP devices"
depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
+ depends on ZCRYPT
help
This driver grants access to Adjunct Processor (AP) devices
via the VFIO mediated device interface.
@@ -837,32 +800,6 @@
endmenu
-config SECCOMP
- def_bool y
- prompt "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y.
-
-menu "Power Management"
-
-config ARCH_HIBERNATION_POSSIBLE
- def_bool y
-
-source "kernel/power/Kconfig"
-
-endmenu
-
config CCW
def_bool y
@@ -875,6 +812,7 @@
config PROTECTED_VIRTUALIZATION_GUEST
def_bool n
prompt "Protected virtualization guest support"
+ select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
help
Select this option, if you want to be able to run this
kernel as a protected virtualization KVM guest.
@@ -997,7 +935,6 @@
select TTY
select VIRTUALIZATION
select VIRTIO
- select VIRTIO_CONSOLE
help
Enabling this option adds support for virtio based paravirtual device
drivers on s390.
@@ -1006,3 +943,17 @@
the KVM hypervisor.
endmenu
+
+menu "Selftests"
+
+config S390_UNWIND_SELFTEST
+ def_tristate n
+ prompt "Test unwind functions"
+ help
+ This option enables s390 specific stack unwinder testing kernel
+ module. This option is not useful for distributions or general
+ kernels, but only for kernel developers working on architecture code.
+
+ Say N if you are unsure.
+
+endmenu
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 1905275..ab48b69 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -3,17 +3,5 @@
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config S390_PTDUMP
- bool "Export kernel pagetable layout to userspace via debugfs"
- depends on DEBUG_KERNEL
- select DEBUG_FS
- ---help---
- Say Y here if you want to show the kernel pagetable layout in a
- debugfs file. This information is only useful for kernel developers
- who are working in architecture specific areas of the kernel.
- It is probably not a good idea to enable this feature in a production
- kernel.
- If in doubt, say "N"
-
config EARLY_PRINTK
def_bool y
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 2faaf45..9250691 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -27,7 +27,8 @@
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
@@ -157,7 +158,6 @@
vdso_install:
$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
- $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index aa738ca..d74a4c7 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -51,10 +51,9 @@
*/
static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
static int appldata_timer_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
static int appldata_interval_handler(struct ctl_table *ctl, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
+ void *buffer, size_t *lenp, loff_t *ppos);
static struct ctl_table_header *appldata_sysctl_header;
static struct ctl_table appldata_table[] = {
@@ -217,7 +216,7 @@
*/
static int
appldata_timer_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int timer_active = appldata_timer_active;
int rc;
@@ -250,7 +249,7 @@
*/
static int
appldata_interval_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int interval = appldata_interval;
int rc;
@@ -280,7 +279,7 @@
*/
static int
appldata_generic_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
struct appldata_ops *ops = NULL, *tmp_ops;
struct list_head *lh;
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index e68136c..21c3147 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -29,10 +29,6 @@
* the structure version (product ID, see appldata_base.c) needs to be changed
* as well and all documentation and z/VM applications using it must be
* updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
struct appldata_mem_data {
u64 timestamp;
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 8bc14b0..59c282c 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -25,10 +25,6 @@
* This is accessed as binary data by z/VM. If changes to it can't be avoided,
* the structure version (product ID, see appldata_base.c) needs to be changed
* as well and all documentation and z/VM applications using it must be updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
struct appldata_net_sum_data {
u64 timestamp;
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 54f3756..a363d30 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -32,10 +32,6 @@
* the structure version (product ID, see appldata_base.c) needs to be changed
* as well and all documentation and z/VM applications using it must be
* updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
*/
struct appldata_os_per_cpu {
u32 per_cpu_user; /* timer ticks spent in user mode */
@@ -75,7 +71,7 @@
(waiting for I/O) */
/* per cpu data */
- struct appldata_os_per_cpu os_cpu[0];
+ struct appldata_os_per_cpu os_cpu[];
} __attribute__((packed));
static struct appldata_os_data *appldata_os_data;
@@ -133,8 +129,7 @@
os_data->nr_cpus = j;
- new_size = sizeof(struct appldata_os_data) +
- (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu));
+ new_size = struct_size(os_data, os_cpu, os_data->nr_cpus);
if (ops.size != new_size) {
if (ops.active) {
rc = appldata_diag(APPLDATA_RECORD_OS_ID,
@@ -169,8 +164,7 @@
{
int rc, max_size;
- max_size = sizeof(struct appldata_os_data) +
- (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
+ max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus());
if (max_size > APPLDATA_MAX_REC_SIZE) {
pr_err("Maximum OS record size %i exceeds the maximum "
"record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index 16ff906..b265bfe 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
image
bzImage
section_cmp.*
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 0ff9261..41a64b8 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,7 +37,7 @@
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o text_dma.o
-obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
@@ -73,7 +73,3 @@
install:
sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"
-
-chkbss := $(obj-y)
-chkbss-target := startup.a
-include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
index e72fcd7..765a08f 100644
--- a/arch/s390/boot/compressed/.gitignore
+++ b/arch/s390/boot/compressed/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
vmlinux
vmlinux.lds
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index fa529c5..b235ed9 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -62,7 +62,3 @@
OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
$(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
$(call if_changed,objcopy)
-
-chkbss := $(filter-out piggy.o info.o, $(obj-y))
-chkbss-target := vmlinux.bin
-include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c
index 4504663..3061b11 100644
--- a/arch/s390/boot/compressed/decompressor.c
+++ b/arch/s390/boot/compressed/decompressor.c
@@ -16,7 +16,6 @@
* gzip declarations
*/
#define STATIC static
-#define STATIC_RW_DATA static __section(.data)
#undef memset
#undef memcpy
@@ -30,13 +29,13 @@
extern unsigned char _compressed_end[];
#ifdef CONFIG_HAVE_KERNEL_BZIP2
-#define HEAP_SIZE 0x400000
+#define BOOT_HEAP_SIZE 0x400000
#else
-#define HEAP_SIZE 0x10000
+#define BOOT_HEAP_SIZE 0x10000
#endif
static unsigned long free_mem_ptr = (unsigned long) _end;
-static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE;
+static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
#ifdef CONFIG_KERNEL_GZIP
#include "../../../../lib/decompress_inflate.c"
@@ -62,7 +61,7 @@
#include "../../../../lib/decompress_unxz.c"
#endif
-#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE)
+#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
unsigned long mem_safe_offset(void)
{
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 44561b2..9427e2c 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -59,6 +59,19 @@
BOOT_DATA_PRESERVED
/*
+ * This is the BSS section of the decompressor and not of the decompressed Linux kernel.
+ * It will consume place in the decompressor's image.
+ */
+ . = ALIGN(8);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ _ebss = .;
+ }
+
+ /*
* uncompressed image info used by the decompressor it should match
* struct vmlinux_info. It comes from .vmlinux.info section of
* uncompressed vmlinux in a form of info.o
@@ -81,15 +94,6 @@
FILL(0xff);
. = ALIGN(4096);
}
- . = ALIGN(256);
- .bss : {
- _bss = . ;
- *(.bss)
- *(.bss.*)
- *(COMMON)
- . = ALIGN(8); /* For convenience during zeroing */
- _ebss = .;
- }
_end = .;
/* Sections to be discarded */
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index e6bf5f4..1a2c2b1 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -329,7 +329,7 @@
.quad .Lduct # cr5: primary-aste origin
.quad 0 # cr6: I/O interrupts
.quad 0 # cr7: secondary space segment table
- .quad 0 # cr8: access registers translation
+ .quad 0x0000000000008000 # cr8: access registers translation
.quad 0 # cr9: tracing off
.quad 0 # cr10: tracing off
.quad 0 # cr11: tracing off
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index bed227f..515b27a 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -21,15 +21,10 @@
if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
-# Default install - same as make zlilo
+echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
+ "bootloader config required" >&2
+if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi
+if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi
-if [ -f $4/vmlinuz ]; then
- mv $4/vmlinuz $4/vmlinuz.old
-fi
-
-if [ -f $4/System.map ]; then
- mv $4/System.map $4/System.old
-fi
-
-cat $2 > $4/vmlinuz
-cp $3 $4/System.map
+cat $2 > $4/vmlinuz-$1
+cp $3 $4/System.map-$1
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 75905b5..c56bbf5 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -2,25 +2,26 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/ctype.h>
+#include <linux/pgtable.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
#include <asm/facility.h>
-#include <asm/pgtable.h>
#include <asm/uv.h>
#include "boot.h"
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
struct ipl_parameter_block __bootdata_preserved(ipl_block);
int __bootdata_preserved(ipl_block_valid);
+unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE;
unsigned long __bootdata(memory_end);
int __bootdata(memory_end_set);
int __bootdata(noexec_disabled);
-int kaslr_enabled __section(.data);
+int kaslr_enabled;
static inline int __diag308(unsigned long subcode, void *addr)
{
@@ -72,30 +73,44 @@
static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
const struct ipl_parameter_block *ipb)
{
- size_t count;
- size_t i;
+ const __u8 *scp_data;
+ __u32 scp_data_len;
int has_lowercase;
+ size_t count = 0;
+ size_t i;
- count = min(size - 1, scpdata_length(ipb->fcp.scp_data,
- ipb->fcp.scp_data_len));
+ switch (ipb->pb0_hdr.pbt) {
+ case IPL_PBT_FCP:
+ scp_data_len = ipb->fcp.scp_data_len;
+ scp_data = ipb->fcp.scp_data;
+ break;
+ case IPL_PBT_NVME:
+ scp_data_len = ipb->nvme.scp_data_len;
+ scp_data = ipb->nvme.scp_data;
+ break;
+ default:
+ goto out;
+ }
+
+ count = min(size - 1, scpdata_length(scp_data, scp_data_len));
if (!count)
goto out;
has_lowercase = 0;
for (i = 0; i < count; i++) {
- if (!isascii(ipb->fcp.scp_data[i])) {
+ if (!isascii(scp_data[i])) {
count = 0;
goto out;
}
- if (!has_lowercase && islower(ipb->fcp.scp_data[i]))
+ if (!has_lowercase && islower(scp_data[i]))
has_lowercase = 1;
}
if (has_lowercase)
- memcpy(dest, ipb->fcp.scp_data, count);
+ memcpy(dest, scp_data, count);
else
for (i = 0; i < count; i++)
- dest[i] = tolower(ipb->fcp.scp_data[i]);
+ dest[i] = tolower(scp_data[i]);
out:
dest[count] = '\0';
return count;
@@ -117,6 +132,7 @@
parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
case IPL_PBT_FCP:
+ case IPL_PBT_NVME:
rc = ipl_block_get_ascii_scpdata(
parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
@@ -211,7 +227,7 @@
check_cleared_facilities();
}
-static char command_line_buf[COMMAND_LINE_SIZE] __section(.data);
+static char command_line_buf[COMMAND_LINE_SIZE];
void parse_boot_command_line(void)
{
char *param, *val;
@@ -232,6 +248,19 @@
if (!strcmp(param, "vmalloc") && val)
vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
+ if (!strcmp(param, "dfltcc") && val) {
+ if (!strcmp(val, "off"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED;
+ else if (!strcmp(val, "on"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_FULL;
+ else if (!strcmp(val, "def_only"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY;
+ else if (!strcmp(val, "inf_only"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY;
+ else if (!strcmp(val, "always"))
+ zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
+ }
+
if (!strcmp(param, "noexec")) {
rc = kstrtobool(val, &enabled);
if (!rc && !enabled)
@@ -243,17 +272,34 @@
if (!strcmp(param, "nokaslr"))
kaslr_enabled = 0;
+
+#if IS_ENABLED(CONFIG_KVM)
+ if (!strcmp(param, "prot_virt")) {
+ rc = kstrtobool(val, &enabled);
+ if (!rc && enabled)
+ prot_virt_host = 1;
+ }
+#endif
}
}
+static inline bool is_ipl_block_dump(void)
+{
+ if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+ ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
+ return true;
+ if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME &&
+ ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+ return true;
+ return false;
+}
+
void setup_memory_end(void)
{
#ifdef CONFIG_CRASH_DUMP
if (OLDMEM_BASE) {
kaslr_enabled = 0;
- } else if (ipl_block_valid &&
- ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
- ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
+ } else if (ipl_block_valid && is_ipl_block_dump()) {
kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
memory_end_set = 1;
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 5591243..d844a5e 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -2,8 +2,8 @@
/*
* Copyright IBM Corp. 2019
*/
+#include <linux/pgtable.h>
#include <asm/mem_detect.h>
-#include <asm/pgtable.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/sclp.h>
@@ -42,7 +42,7 @@
return PRNG_MODE_TDES;
}
-static unsigned long get_random(unsigned long limit)
+static int get_random(unsigned long limit, unsigned long *value)
{
struct prng_parm prng = {
/* initial parameter block for tdes mode, copied from libica */
@@ -84,19 +84,101 @@
(u8 *) &random, sizeof(random));
break;
default:
- random = 0;
+ return -1;
}
- return random % limit;
+ *value = random % limit;
+ return 0;
+}
+
+/*
+ * To randomize kernel base address we have to consider several facts:
+ * 1. physical online memory might not be continuous and have holes. mem_detect
+ * info contains list of online memory ranges we should consider.
+ * 2. we have several memory regions which are occupied and we should not
+ * overlap and destroy them. Currently safe_addr tells us the border below
+ * which all those occupied regions are. We are safe to use anything above
+ * safe_addr.
+ * 3. the upper limit might apply as well, even if memory above that limit is
+ * online. Currently those limitations are:
+ * 3.1. Limit set by "mem=" kernel command line option
+ * 3.2. memory reserved at the end for kasan initialization.
+ * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
+ * Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
+ * (16 pages when the kernel is built with kasan enabled)
+ * Assumptions:
+ * 1. kernel size (including .bss size) and upper memory limit are page aligned.
+ * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE
+ * aligned (in practice memory configurations granularity on z/VM and LPAR
+ * is 1mb).
+ *
+ * To guarantee uniform distribution of kernel base address among all suitable
+ * addresses we generate random value just once. For that we need to build a
+ * continuous range in which every value would be suitable. We can build this
+ * range by simply counting all suitable addresses (let's call them positions)
+ * which would be valid as kernel base address. To count positions we iterate
+ * over online memory ranges. For each range which is big enough for the
+ * kernel image we count all suitable addresses we can put the kernel image at
+ * that is
+ * (end - start - kernel_size) / THREAD_SIZE + 1
+ * Two functions count_valid_kernel_positions and position_to_address help
+ * to count positions in memory range given and then convert position back
+ * to address.
+ */
+static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
+ unsigned long _min,
+ unsigned long _max)
+{
+ unsigned long start, end, pos = 0;
+ int i;
+
+ for_each_mem_detect_block(i, &start, &end) {
+ if (_min >= end)
+ continue;
+ if (start >= _max)
+ break;
+ start = max(_min, start);
+ end = min(_max, end);
+ if (end - start < kernel_size)
+ continue;
+ pos += (end - start - kernel_size) / THREAD_SIZE + 1;
+ }
+
+ return pos;
+}
+
+static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
+ unsigned long _min, unsigned long _max)
+{
+ unsigned long start, end;
+ int i;
+
+ for_each_mem_detect_block(i, &start, &end) {
+ if (_min >= end)
+ continue;
+ if (start >= _max)
+ break;
+ start = max(_min, start);
+ end = min(_max, end);
+ if (end - start < kernel_size)
+ continue;
+ if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
+ return start + (pos - 1) * THREAD_SIZE;
+ pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
+ }
+
+ return 0;
}
unsigned long get_random_base(unsigned long safe_addr)
{
- unsigned long memory_limit = memory_end_set ? memory_end : 0;
- unsigned long base, start, end, kernel_size;
- unsigned long block_sum, offset;
+ unsigned long memory_limit = get_mem_detect_end();
+ unsigned long base_pos, max_pos, kernel_size;
unsigned long kasan_needs;
int i;
+ if (memory_end_set)
+ memory_limit = min(memory_limit, memory_end);
+
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) {
if (safe_addr < INITRD_START + INITRD_SIZE)
safe_addr = INITRD_START + INITRD_SIZE;
@@ -126,45 +208,17 @@
}
kernel_size = vmlinux.image_size + vmlinux.bss_size;
- block_sum = 0;
- for_each_mem_detect_block(i, &start, &end) {
- if (memory_limit) {
- if (start >= memory_limit)
- break;
- if (end > memory_limit)
- end = memory_limit;
- }
- if (end - start < kernel_size)
- continue;
- block_sum += end - start - kernel_size;
- }
- if (!block_sum) {
+ if (safe_addr + kernel_size > memory_limit)
+ return 0;
+
+ max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit);
+ if (!max_pos) {
sclp_early_printk("KASLR disabled: not enough memory\n");
return 0;
}
- base = get_random(block_sum);
- if (base == 0)
+ /* we need a value in the range [1, base_pos] inclusive */
+ if (get_random(max_pos, &base_pos))
return 0;
- if (base < safe_addr)
- base = safe_addr;
- block_sum = offset = 0;
- for_each_mem_detect_block(i, &start, &end) {
- if (memory_limit) {
- if (start >= memory_limit)
- break;
- if (end > memory_limit)
- end = memory_limit;
- }
- if (end - start < kernel_size)
- continue;
- block_sum += end - start - kernel_size;
- if (base <= block_sum) {
- base = start + base - offset;
- base = ALIGN_DOWN(base, THREAD_SIZE);
- break;
- }
- offset = block_sum;
- }
- return base;
+ return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
}
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 83b5b79..a3c9862 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -2,6 +2,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <asm/lowcore.h>
+#include <asm/setup.h>
#include <asm/sclp.h>
#include "boot.h"
@@ -32,7 +33,8 @@
char *p;
add_str(buf, "Linux version ");
- strlcat(buf, kernel_version, sizeof(buf));
+ strlcat(buf, kernel_version, sizeof(buf) - 1);
+ strlcat(buf, "\n", sizeof(buf));
sclp_early_printk(buf);
p = add_str(buf, "Kernel fault: interruption code ");
@@ -42,6 +44,13 @@
add_str(p, "\n");
sclp_early_printk(buf);
+ if (kaslr_enabled) {
+ p = add_str(buf, "Kernel random base: ");
+ p = add_val_as_hex(p, __kaslr_offset);
+ add_str(p, "\n");
+ sclp_early_printk(buf);
+ }
+
p = add_str(buf, "PSW : ");
p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask);
p = add_str(p, " ");
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index fa01502..cc96b04 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -48,8 +48,6 @@
};
static struct diag210 _diag210_tmp_dma __section(".dma.data");
struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
-void _swsusp_reset_dma(void);
-unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
void error(char *x)
{
@@ -120,6 +118,9 @@
}
}
+/*
+ * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
+ */
static void clear_bss_section(void)
{
memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size);
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
index ea93314..5ff5fee 100644
--- a/arch/s390/boot/text_dma.S
+++ b/arch/s390/boot/text_dma.S
@@ -86,23 +86,6 @@
ENDPROC(_diag0c_dma)
/*
- * void _swsusp_reset_dma(void)
- */
-ENTRY(_swsusp_reset_dma)
- larl %r1,restart_entry
- larl %r2,.Lrestart_diag308_psw
- og %r1,0(%r2)
- stg %r1,0(%r0)
- lghi %r0,0
- diag %r0,%r0,0x308
-restart_entry:
- lhi %r1,1
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- sam64
- BR_EX_DMA_r14
-ENDPROC(_swsusp_reset_dma)
-
-/*
* void _diag308_reset_dma(void)
*
* Calls diag 308 subcode 1 and continues execution
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 3f50115..b3501ea 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -3,7 +3,14 @@
#include <asm/facility.h>
#include <asm/sections.h>
+/* will be used in arch/s390/kernel/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+#endif
+struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
{
@@ -19,7 +26,22 @@
if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
return;
+ if (IS_ENABLED(CONFIG_KVM)) {
+ memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list));
+ uv_info.uv_base_stor_len = uvcb.uv_base_stor_len;
+ uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len;
+ uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len;
+ uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len;
+ uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len;
+ uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
+ uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
+ uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
+ uv_info.uv_feature_indications = uvcb.uv_feature_indications;
+ }
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
+#endif
}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 38d6403..fe6f529 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -1,5 +1,7 @@
+CONFIG_UAPI_HEADER_TEST=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
@@ -14,7 +16,6 @@
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
@@ -31,9 +32,9 @@
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_BPF_LSM=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
@@ -51,18 +52,15 @@
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
+CONFIG_S390_UNWIND_SELFTEST=y
CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_STATIC_KEYS_SELFTEST=y
-CONFIG_REFCOUNT_FULL=y
CONFIG_LOCK_EVENT_COUNTS=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
@@ -77,6 +75,8 @@
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
@@ -94,10 +94,10 @@
CONFIG_FRONTSWAP=y
CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
+CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
-CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
@@ -130,6 +130,7 @@
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
CONFIG_INET_IPCOMP=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
@@ -144,6 +145,7 @@
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
CONFIG_IPV6_VTI=m
@@ -151,7 +153,10 @@
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
@@ -317,6 +322,7 @@
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
CONFIG_BRIDGE=m
+CONFIG_BRIDGE_MRP=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
@@ -341,6 +347,7 @@
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
@@ -364,6 +371,7 @@
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
@@ -372,14 +380,14 @@
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
-# CONFIG_NET_DROP_MONITOR is not set
CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
CONFIG_PCI_DEBUG=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_DEVTMPFS=y
CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
+CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
@@ -435,6 +443,7 @@
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
@@ -448,6 +457,8 @@
CONFIG_IFB=m
CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
@@ -475,14 +486,12 @@
# CONFIG_NET_VENDOR_EMULEX is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
-# CONFIG_NET_VENDOR_HP is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-# CONFIG_MLXFW is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -515,6 +524,7 @@
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -533,6 +543,7 @@
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
@@ -561,6 +572,8 @@
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -608,11 +621,13 @@
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
@@ -650,8 +665,8 @@
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_FORTIFY_SOURCE=y
@@ -675,8 +690,12 @@
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
@@ -685,14 +704,13 @@
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -702,6 +720,7 @@
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
@@ -720,6 +739,9 @@
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -732,6 +754,7 @@
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_CRC32_SELFTEST=y
CONFIG_CRC4=m
@@ -749,12 +772,13 @@
CONFIG_GDB_SCRIPTS=y
CONFIG_FRAME_WARN=1024
CONFIG_HEADERS_INSTALL=y
-CONFIG_HEADERS_CHECK=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_PAGE_OWNER=y
CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_SELFTEST=y
CONFIG_DEBUG_OBJECTS_FREE=y
@@ -773,9 +797,10 @@
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_DEBUG_PER_CPU_MAPS=y
CONFIG_DEBUG_SHIRQ=y
+CONFIG_PANIC_ON_OOPS=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_PANIC_ON_OOPS=y
+CONFIG_TEST_LOCKUP=m
CONFIG_DEBUG_TIMEKEEPING=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
@@ -784,36 +809,43 @@
CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_DEBUG_CREDENTIALS=y
CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=300
+# CONFIG_RCU_TRACE is not set
+CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_HIST_TRIGGERS=y
CONFIG_NOTIFIER_ERROR_INJECTION=m
CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAULT_INJECTION_USERCOPY=y
CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAIL_FUNCTION=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
-CONFIG_LATENCYTOP=y
-CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPT_TRACER=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_HIST_TRIGGERS=y
CONFIG_LKDTM=m
CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_MIN_HEAP=y
CONFIG_TEST_SORT=y
CONFIG_KPROBES_SANITY_TEST=y
CONFIG_RBTREE_TEST=y
CONFIG_INTERVAL_TREE_TEST=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BITOPS=m
CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 25f7998..17d5df2 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -1,5 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
@@ -13,7 +14,6 @@
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
@@ -30,9 +30,9 @@
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
+CONFIG_BPF_LSM=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
@@ -41,7 +41,6 @@
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
-# CONFIG_NUMA_EMU is not set
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_SIG=y
@@ -51,13 +50,11 @@
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
+CONFIG_S390_UNWIND_SELFTEST=m
CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
@@ -73,6 +70,8 @@
CONFIG_BLK_WBT=y
CONFIG_BLK_CGROUP_IOLATENCY=y
CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
@@ -88,10 +87,10 @@
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
+CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
-CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
@@ -124,6 +123,7 @@
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
CONFIG_INET_IPCOMP=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
@@ -138,6 +138,7 @@
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
CONFIG_IPV6_VTI=m
@@ -145,7 +146,10 @@
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
@@ -310,6 +314,7 @@
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
CONFIG_BRIDGE=m
+CONFIG_BRIDGE_MRP=y
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
@@ -334,6 +339,7 @@
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
@@ -357,6 +363,7 @@
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
@@ -365,14 +372,14 @@
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
-# CONFIG_NET_DROP_MONITOR is not set
CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
CONFIG_DEVTMPFS=y
CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
+CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
@@ -429,6 +436,7 @@
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
@@ -443,6 +451,8 @@
CONFIG_IFB=m
CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
@@ -470,14 +480,12 @@
# CONFIG_NET_VENDOR_EMULEX is not set
# CONFIG_NET_VENDOR_EZCHIP is not set
# CONFIG_NET_VENDOR_GOOGLE is not set
-# CONFIG_NET_VENDOR_HP is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
-# CONFIG_MLXFW is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROCHIP is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -510,6 +518,7 @@
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
@@ -528,6 +537,7 @@
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
@@ -556,6 +566,8 @@
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
CONFIG_S390_CCW_IOMMU=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
@@ -599,11 +611,13 @@
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
@@ -641,8 +655,8 @@
CONFIG_DLM=m
CONFIG_UNICODE=y
CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
@@ -666,8 +680,12 @@
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
@@ -677,14 +695,13 @@
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -694,6 +711,7 @@
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
@@ -712,6 +730,9 @@
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@@ -724,7 +745,9 @@
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
+CONFIG_PRIME_NUMBERS=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
@@ -737,20 +760,25 @@
CONFIG_FRAME_WARN=1024
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
+CONFIG_TEST_LOCKUP=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
CONFIG_HIST_TRIGGERS=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 20c51e5..a302630 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -17,11 +17,11 @@
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
CONFIG_CRASH_DUMP=y
-# CONFIG_SECCOMP is not set
# CONFIG_PFAULT is not set
# CONFIG_S390_HYPFS_FS is not set
# CONFIG_VIRTUALIZATION is not set
# CONFIG_S390_GUEST is not set
+# CONFIG_SECCOMP is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -30,6 +30,7 @@
# CONFIG_BOUNCE is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
+# CONFIG_ETHTOOL_NETLINK is not set
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_BLK_DEV_XPRAM is not set
@@ -55,6 +56,8 @@
# CONFIG_MONWRITER is not set
# CONFIG_S390_VMUR is not set
# CONFIG_HID is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
@@ -62,12 +65,15 @@
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_LSM="yama,loadpin,safesetid,integrity"
+# CONFIG_ZLIB_DFLTCC is not set
CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_TRACE is not set
# CONFIG_FTRACE is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9803e96..7304463 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@
int key_len;
unsigned long fc;
union {
- struct crypto_sync_skcipher *blk;
+ struct crypto_skcipher *skcipher;
struct crypto_cipher *cip;
} fallback;
};
@@ -54,7 +54,7 @@
u8 pcc_key[32];
int key_len;
unsigned long fc;
- struct crypto_sync_skcipher *fallback;
+ struct crypto_skcipher *fallback;
};
struct gcm_sg_walk {
@@ -72,19 +72,12 @@
unsigned int key_len)
{
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- int ret;
sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags &
CRYPTO_TFM_REQ_MASK);
- ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
- if (ret) {
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags &
- CRYPTO_TFM_RES_MASK);
- }
- return ret;
+ return crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
}
static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@@ -178,66 +171,36 @@
}
};
-static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
- unsigned int len)
+static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- unsigned int ret;
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
- crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
- CRYPTO_TFM_REQ_MASK);
- crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
- CRYPTO_TFM_REQ_MASK);
-
- ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
-
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
- CRYPTO_TFM_RES_MASK;
-
- return ret;
+ crypto_skcipher_clear_flags(sctx->fallback.skcipher,
+ CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(sctx->fallback.skcipher,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ return crypto_skcipher_setkey(sctx->fallback.skcipher, key, len);
}
-static int fallback_blk_dec(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx,
+ struct skcipher_request *req,
+ unsigned long modifier)
{
- unsigned int ret;
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
- skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_decrypt(req);
-
- skcipher_request_zero(req);
- return ret;
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, sctx->fallback.skcipher);
+ return (modifier & CPACF_DECRYPT) ?
+ crypto_skcipher_decrypt(subreq) :
+ crypto_skcipher_encrypt(subreq);
}
-static int fallback_blk_enc(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- unsigned int ret;
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
- skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_encrypt(req);
- return ret;
-}
-
-static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -248,111 +211,92 @@
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
return 0;
}
-static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, modifier);
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_km(sctx->fc | modifier, sctx->key,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
-
return ret;
}
-static int ecb_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_aes_encrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_aes_crypt(desc, 0, &walk);
+ return ecb_aes_crypt(req, 0);
}
-static int ecb_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_aes_decrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return ecb_aes_crypt(req, CPACF_DECRYPT);
}
-static int fallback_init_blk(struct crypto_tfm *tfm)
+static int fallback_init_skcipher(struct crypto_skcipher *tfm)
{
- const char *name = tfm->__crt_alg->cra_name;
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ const char *name = crypto_tfm_alg_name(&tfm->base);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
- sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
- if (IS_ERR(sctx->fallback.blk)) {
+ if (IS_ERR(sctx->fallback.skcipher)) {
pr_err("Allocating AES fallback algorithm %s failed\n",
name);
- return PTR_ERR(sctx->fallback.blk);
+ return PTR_ERR(sctx->fallback.skcipher);
}
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(sctx->fallback.skcipher));
return 0;
}
-static void fallback_exit_blk(struct crypto_tfm *tfm)
+static void fallback_exit_skcipher(struct crypto_skcipher *tfm)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
- crypto_free_sync_skcipher(sctx->fallback.blk);
+ crypto_free_skcipher(sctx->fallback.skcipher);
}
-static struct crypto_alg ecb_aes_alg = {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-aes-s390",
- .cra_priority = 401, /* combo: aes + ecb + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = ecb_aes_set_key,
- .encrypt = ecb_aes_encrypt,
- .decrypt = ecb_aes_decrypt,
- }
- }
+static struct skcipher_alg ecb_aes_alg = {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ecb_aes_set_key,
+ .encrypt = ecb_aes_encrypt,
+ .decrypt = ecb_aes_decrypt,
};
-static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -363,17 +307,18 @@
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
return 0;
}
-static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
struct {
@@ -381,134 +326,70 @@
u8 key[AES_MAX_KEY_SIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, modifier);
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
memcpy(param.key, sctx->key, sctx->key_len);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_kmc(sctx->fc | modifier, ¶m,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
- memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
+ memzero_explicit(¶m, sizeof(param));
return ret;
}
-static int cbc_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_aes_encrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, 0, &walk);
+ return cbc_aes_crypt(req, 0);
}
-static int cbc_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_aes_decrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return cbc_aes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg cbc_aes_alg = {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_aes_set_key,
- .encrypt = cbc_aes_encrypt,
- .decrypt = cbc_aes_decrypt,
- }
- }
+static struct skcipher_alg cbc_aes_alg = {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_aes_set_key,
+ .encrypt = cbc_aes_encrypt,
+ .decrypt = cbc_aes_decrypt,
};
-static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int len)
+static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
- unsigned int ret;
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
- crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
- CRYPTO_TFM_REQ_MASK);
- crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
- CRYPTO_TFM_REQ_MASK);
-
- ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
-
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
- CRYPTO_TFM_RES_MASK;
-
- return ret;
+ crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(xts_ctx->fallback,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ return crypto_skcipher_setkey(xts_ctx->fallback, key, len);
}
-static int xts_fallback_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
- unsigned int ret;
-
- skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_decrypt(req);
-
- skcipher_request_zero(req);
- return ret;
-}
-
-static int xts_fallback_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
- unsigned int ret;
-
- skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_encrypt(req);
-
- skcipher_request_zero(req);
- return ret;
-}
-
-static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
int err;
@@ -517,10 +398,8 @@
return err;
/* In fips mode only 128 bit or 256 bit keys are valid */
- if (fips_enabled && key_len != 32 && key_len != 64) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ if (fips_enabled && key_len != 32 && key_len != 64)
return -EINVAL;
- }
/* Pick the correct function code based on the key length */
fc = (key_len == 32) ? CPACF_KM_XTS_128 :
@@ -539,10 +418,11 @@
return 0;
}
-static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int offset, nbytes, n;
int ret;
struct {
@@ -557,113 +437,102 @@
u8 init[16];
} xts_param;
- ret = blkcipher_walk_virt(desc, walk);
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+ return (modifier & CPACF_DECRYPT) ?
+ crypto_skcipher_decrypt(subreq) :
+ crypto_skcipher_encrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
offset = xts_ctx->key_len & 0x10;
memset(pcc_param.block, 0, sizeof(pcc_param.block));
memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
- memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
memcpy(xts_param.init, pcc_param.xts, 16);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
+ memzero_explicit(&pcc_param, sizeof(pcc_param));
+ memzero_explicit(&xts_param, sizeof(xts_param));
return ret;
}
-static int xts_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_aes_encrypt(struct skcipher_request *req)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (!nbytes)
- return -EINVAL;
-
- if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
- return xts_fallback_encrypt(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_aes_crypt(desc, 0, &walk);
+ return xts_aes_crypt(req, 0);
}
-static int xts_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_aes_decrypt(struct skcipher_request *req)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (!nbytes)
- return -EINVAL;
-
- if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
- return xts_fallback_decrypt(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return xts_aes_crypt(req, CPACF_DECRYPT);
}
-static int xts_fallback_init(struct crypto_tfm *tfm)
+static int xts_fallback_init(struct crypto_skcipher *tfm)
{
- const char *name = tfm->__crt_alg->cra_name;
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ const char *name = crypto_tfm_alg_name(&tfm->base);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
- xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
if (IS_ERR(xts_ctx->fallback)) {
pr_err("Allocating XTS fallback algorithm %s failed\n",
name);
return PTR_ERR(xts_ctx->fallback);
}
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(xts_ctx->fallback));
return 0;
}
-static void xts_fallback_exit(struct crypto_tfm *tfm)
+static void xts_fallback_exit(struct crypto_skcipher *tfm)
{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
- crypto_free_sync_skcipher(xts_ctx->fallback);
+ crypto_free_skcipher(xts_ctx->fallback);
}
-static struct crypto_alg xts_aes_alg = {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_xts_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = xts_fallback_init,
- .cra_exit = xts_fallback_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_aes_set_key,
- .encrypt = xts_aes_encrypt,
- .decrypt = xts_aes_decrypt,
- }
- }
+static struct skcipher_alg xts_aes_alg = {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = xts_fallback_init,
+ .exit = xts_fallback_exit,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_aes_set_key,
+ .encrypt = xts_aes_encrypt,
+ .decrypt = xts_aes_decrypt,
};
-static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -674,7 +543,7 @@
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
@@ -696,30 +565,34 @@
return n;
}
-static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int n, nbytes;
int ret, locked;
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, 0);
+
locked = mutex_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
n = AES_BLOCK_SIZE;
+
if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
- cpacf_kmctr(sctx->fc | modifier, sctx->key,
- walk->dst.virt.addr, walk->src.virt.addr,
- n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE,
AES_BLOCK_SIZE);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (locked)
mutex_unlock(&ctrblk_lock);
@@ -727,67 +600,33 @@
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
- cpacf_kmctr(sctx->fc | modifier, sctx->key,
- buf, walk->src.virt.addr,
- AES_BLOCK_SIZE, walk->iv);
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+ AES_BLOCK_SIZE, walk.iv);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_aes_crypt(desc, 0, &walk);
-}
-
-static int ctr_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_aes_alg = {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_aes_set_key,
- .encrypt = ctr_aes_encrypt,
- .decrypt = ctr_aes_decrypt,
- }
- }
+static struct skcipher_alg ctr_aes_alg = {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_aes_set_key,
+ .encrypt = ctr_aes_crypt,
+ .decrypt = ctr_aes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
};
static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1116,24 +955,27 @@
},
};
-static struct crypto_alg *aes_s390_algs_ptr[5];
-static int aes_s390_algs_num;
+static struct crypto_alg *aes_s390_alg;
+static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static int aes_s390_skciphers_num;
static struct aead_alg *aes_s390_aead_alg;
-static int aes_s390_register_alg(struct crypto_alg *alg)
+static int aes_s390_register_skcipher(struct skcipher_alg *alg)
{
int ret;
- ret = crypto_register_alg(alg);
+ ret = crypto_register_skcipher(alg);
if (!ret)
- aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+ aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg;
return ret;
}
static void aes_s390_fini(void)
{
- while (aes_s390_algs_num--)
- crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+ if (aes_s390_alg)
+ crypto_unregister_alg(aes_s390_alg);
+ while (aes_s390_skciphers_num--)
+ crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]);
if (ctrblk)
free_page((unsigned long) ctrblk);
@@ -1154,10 +996,11 @@
if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
- ret = aes_s390_register_alg(&aes_alg);
+ ret = crypto_register_alg(&aes_alg);
if (ret)
goto out_err;
- ret = aes_s390_register_alg(&ecb_aes_alg);
+ aes_s390_alg = &aes_alg;
+ ret = aes_s390_register_skcipher(&ecb_aes_alg);
if (ret)
goto out_err;
}
@@ -1165,14 +1008,14 @@
if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
- ret = aes_s390_register_alg(&cbc_aes_alg);
+ ret = aes_s390_register_skcipher(&cbc_aes_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
- ret = aes_s390_register_alg(&xts_aes_alg);
+ ret = aes_s390_register_skcipher(&xts_aes_alg);
if (ret)
goto out_err;
}
@@ -1185,7 +1028,7 @@
ret = -ENOMEM;
goto out_err;
}
- ret = aes_s390_register_alg(&ctr_aes_alg);
+ ret = aes_s390_register_skcipher(&ctr_aes_alg);
if (ret)
goto out_err;
}
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 423ee05..fafecad 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -111,10 +111,8 @@
{
struct crc_ctx *mctx = crypto_shash_ctx(tfm);
- if (newkeylen != sizeof(mctx->key)) {
- crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ if (newkeylen != sizeof(mctx->key))
return -EINVAL;
- }
mctx->key = le32_to_cpu(*(__le32 *)newkey);
return 0;
}
@@ -124,10 +122,8 @@
{
struct crc_ctx *mctx = crypto_shash_ctx(tfm);
- if (newkeylen != sizeof(mctx->key)) {
- crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ if (newkeylen != sizeof(mctx->key))
return -EINVAL;
- }
mctx->key = be32_to_cpu(*(__be32 *)newkey);
return 0;
}
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 439b100..bfbafd3 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -17,6 +17,7 @@
#include <linux/mutex.h>
#include <crypto/algapi.h>
#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
#include <asm/cpacf.h>
#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
@@ -45,6 +46,12 @@
return 0;
}
+static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ return des_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -79,28 +86,30 @@
}
};
-static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(DES_BLOCK_SIZE - 1);
- cpacf_km(fc, ctx->key, walk->dst.virt.addr,
- walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ cpacf_km(fc, ctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
return ret;
}
-static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
struct {
@@ -108,99 +117,69 @@
u8 key[DES3_KEY_SIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, DES_BLOCK_SIZE);
memcpy(param.key, ctx->key, DES3_KEY_SIZE);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(DES_BLOCK_SIZE - 1);
- cpacf_kmc(fc, ¶m, walk->dst.virt.addr,
- walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ cpacf_kmc(fc, ¶m, walk.dst.virt.addr,
+ walk.src.virt.addr, n);
+ memcpy(walk.iv, param.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
- memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
return ret;
}
-static int ecb_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_DEA);
}
-static int ecb_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT);
}
-static struct crypto_alg ecb_des_alg = {
- .cra_name = "ecb(des)",
- .cra_driver_name = "ecb-des-s390",
- .cra_priority = 400, /* combo: des + ecb */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .setkey = des_setkey,
- .encrypt = ecb_des_encrypt,
- .decrypt = ecb_des_decrypt,
- }
- }
+static struct skcipher_alg ecb_des_alg = {
+ .base.cra_name = "ecb(des)",
+ .base.cra_driver_name = "ecb-des-s390",
+ .base.cra_priority = 400, /* combo: des + ecb */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = ecb_des_encrypt,
+ .decrypt = ecb_des_decrypt,
};
-static int cbc_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_DEA);
}
-static int cbc_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT);
}
-static struct crypto_alg cbc_des_alg = {
- .cra_name = "cbc(des)",
- .cra_driver_name = "cbc-des-s390",
- .cra_priority = 400, /* combo: des + cbc */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_setkey,
- .encrypt = cbc_des_encrypt,
- .decrypt = cbc_des_decrypt,
- }
- }
+static struct skcipher_alg cbc_des_alg = {
+ .base.cra_name = "cbc(des)",
+ .base.cra_driver_name = "cbc-des-s390",
+ .base.cra_priority = 400, /* combo: des + cbc */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = cbc_des_encrypt,
+ .decrypt = cbc_des_decrypt,
};
/*
@@ -232,6 +211,12 @@
return 0;
}
+static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -266,87 +251,53 @@
}
};
-static int ecb_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des3_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_TDEA_192);
}
-static int ecb_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des3_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT);
}
-static struct crypto_alg ecb_des3_alg = {
- .cra_name = "ecb(des3_ede)",
- .cra_driver_name = "ecb-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + ecb */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .setkey = des3_setkey,
- .encrypt = ecb_des3_encrypt,
- .decrypt = ecb_des3_decrypt,
- }
- }
+static struct skcipher_alg ecb_des3_alg = {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "ecb-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + ecb */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = ecb_des3_encrypt,
+ .decrypt = ecb_des3_decrypt,
};
-static int cbc_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des3_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_TDEA_192);
}
-static int cbc_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des3_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT);
}
-static struct crypto_alg cbc_des3_alg = {
- .cra_name = "cbc(des3_ede)",
- .cra_driver_name = "cbc-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + cbc */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des3_setkey,
- .encrypt = cbc_des3_encrypt,
- .decrypt = cbc_des3_decrypt,
- }
- }
+static struct skcipher_alg cbc_des3_alg = {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cbc-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + cbc */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = cbc_des3_encrypt,
+ .decrypt = cbc_des3_decrypt,
};
static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -364,128 +315,90 @@
return n;
}
-static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 buf[DES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int n, nbytes;
int ret, locked;
locked = mutex_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) {
n = DES_BLOCK_SIZE;
if (nbytes >= 2*DES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
- cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
- walk->src.virt.addr, n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE,
DES_BLOCK_SIZE);
- crypto_inc(walk->iv, DES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (locked)
mutex_unlock(&ctrblk_lock);
/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
if (nbytes) {
- cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
- DES_BLOCK_SIZE, walk->iv);
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, DES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr,
+ DES_BLOCK_SIZE, walk.iv);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_des_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
+ return ctr_desall_crypt(req, CPACF_KMCTR_DEA);
}
-static int ctr_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_des_alg = {
- .cra_name = "ctr(des)",
- .cra_driver_name = "ctr-des-s390",
- .cra_priority = 400, /* combo: des + ctr */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_setkey,
- .encrypt = ctr_des_encrypt,
- .decrypt = ctr_des_decrypt,
- }
- }
+static struct skcipher_alg ctr_des_alg = {
+ .base.cra_name = "ctr(des)",
+ .base.cra_driver_name = "ctr-des-s390",
+ .base.cra_priority = 400, /* combo: des + ctr */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = ctr_des_crypt,
+ .decrypt = ctr_des_crypt,
+ .chunksize = DES_BLOCK_SIZE,
};
-static int ctr_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_des3_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
+ return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192);
}
-static int ctr_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
- &walk);
-}
-
-static struct crypto_alg ctr_des3_alg = {
- .cra_name = "ctr(des3_ede)",
- .cra_driver_name = "ctr-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + ede */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des3_setkey,
- .encrypt = ctr_des3_encrypt,
- .decrypt = ctr_des3_decrypt,
- }
- }
+static struct skcipher_alg ctr_des3_alg = {
+ .base.cra_name = "ctr(des3_ede)",
+ .base.cra_driver_name = "ctr-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + ede */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = ctr_des3_crypt,
+ .decrypt = ctr_des3_crypt,
+ .chunksize = DES_BLOCK_SIZE,
};
-static struct crypto_alg *des_s390_algs_ptr[8];
+static struct crypto_alg *des_s390_algs_ptr[2];
static int des_s390_algs_num;
+static struct skcipher_alg *des_s390_skciphers_ptr[6];
+static int des_s390_skciphers_num;
static int des_s390_register_alg(struct crypto_alg *alg)
{
@@ -497,10 +410,22 @@
return ret;
}
+static int des_s390_register_skcipher(struct skcipher_alg *alg)
+{
+ int ret;
+
+ ret = crypto_register_skcipher(alg);
+ if (!ret)
+ des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg;
+ return ret;
+}
+
static void des_s390_exit(void)
{
while (des_s390_algs_num--)
crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+ while (des_s390_skciphers_num--)
+ crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]);
if (ctrblk)
free_page((unsigned long) ctrblk);
}
@@ -518,12 +443,12 @@
ret = des_s390_register_alg(&des_alg);
if (ret)
goto out_err;
- ret = des_s390_register_alg(&ecb_des_alg);
+ ret = des_s390_register_skcipher(&ecb_des_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
- ret = des_s390_register_alg(&cbc_des_alg);
+ ret = des_s390_register_skcipher(&cbc_des_alg);
if (ret)
goto out_err;
}
@@ -531,12 +456,12 @@
ret = des_s390_register_alg(&des3_alg);
if (ret)
goto out_err;
- ret = des_s390_register_alg(&ecb_des3_alg);
+ ret = des_s390_register_skcipher(&ecb_des3_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
- ret = des_s390_register_alg(&cbc_des3_alg);
+ ret = des_s390_register_skcipher(&cbc_des3_alg);
if (ret)
goto out_err;
}
@@ -551,12 +476,12 @@
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
- ret = des_s390_register_alg(&ctr_des_alg);
+ ret = des_s390_register_skcipher(&ctr_des_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
- ret = des_s390_register_alg(&ctr_des3_alg);
+ ret = des_s390_register_skcipher(&ctr_des3_alg);
if (ret)
goto out_err;
}
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index a3e7400..6b07a2f 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -43,10 +43,8 @@
{
struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
- if (keylen != GHASH_BLOCK_SIZE) {
- crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
- }
memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 6184dce..f3caeb1 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,7 +5,7 @@
* s390 implementation of the AES Cipher Algorithm with protected keys.
*
* s390 Version:
- * Copyright IBM Corp. 2017,2019
+ * Copyright IBM Corp. 2017,2020
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
* Harald Freudenberger <freude@de.ibm.com>
*/
@@ -20,7 +20,9 @@
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <linux/init.h>
+#include <linux/mutex.h>
#include <linux/spinlock.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <asm/cpacf.h>
#include <asm/pkey.h>
@@ -31,11 +33,11 @@
* is called. As paes can handle different kinds of key blobs
* and padding is also possible, the limits need to be generous.
*/
-#define PAES_MIN_KEYSIZE 64
-#define PAES_MAX_KEYSIZE 256
+#define PAES_MIN_KEYSIZE 16
+#define PAES_MAX_KEYSIZE 320
static u8 *ctrblk;
-static DEFINE_SPINLOCK(ctrblk_lock);
+static DEFINE_MUTEX(ctrblk_lock);
static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
@@ -52,19 +54,46 @@
unsigned int keylen;
};
-static inline int _copy_key_to_kb(struct key_blob *kb,
- const u8 *key,
- unsigned int keylen)
+static inline int _key_to_kb(struct key_blob *kb,
+ const u8 *key,
+ unsigned int keylen)
{
- if (keylen <= sizeof(kb->keybuf))
+ struct clearkey_header {
+ u8 type;
+ u8 res0[3];
+ u8 version;
+ u8 res1[3];
+ u32 keytype;
+ u32 len;
+ } __packed * h;
+
+ switch (keylen) {
+ case 16:
+ case 24:
+ case 32:
+ /* clear key value, prepare pkey clear key token in keybuf */
+ memset(kb->keybuf, 0, sizeof(kb->keybuf));
+ h = (struct clearkey_header *) kb->keybuf;
+ h->version = 0x02; /* TOKVER_CLEAR_KEY */
+ h->keytype = (keylen - 8) >> 3;
+ h->len = keylen;
+ memcpy(kb->keybuf + sizeof(*h), key, keylen);
+ kb->keylen = sizeof(*h) + keylen;
kb->key = kb->keybuf;
- else {
- kb->key = kmalloc(keylen, GFP_KERNEL);
- if (!kb->key)
- return -ENOMEM;
+ break;
+ default:
+ /* other key material, let pkey handle this */
+ if (keylen <= sizeof(kb->keybuf))
+ kb->key = kb->keybuf;
+ else {
+ kb->key = kmalloc(keylen, GFP_KERNEL);
+ if (!kb->key)
+ return -ENOMEM;
+ }
+ memcpy(kb->key, key, keylen);
+ kb->keylen = keylen;
+ break;
}
- memcpy(kb->key, key, keylen);
- kb->keylen = keylen;
return 0;
}
@@ -81,16 +110,18 @@
struct s390_paes_ctx {
struct key_blob kb;
struct pkey_protkey pk;
+ spinlock_t pk_lock;
unsigned long fc;
};
struct s390_pxts_ctx {
struct key_blob kb[2];
struct pkey_protkey pk[2];
+ spinlock_t pk_lock;
unsigned long fc;
};
-static inline int __paes_convert_key(struct key_blob *kb,
+static inline int __paes_keyblob2pkey(struct key_blob *kb,
struct pkey_protkey *pk)
{
int i, ret;
@@ -105,11 +136,42 @@
return ret;
}
-static int __paes_set_key(struct s390_paes_ctx *ctx)
+static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
+{
+ struct pkey_protkey pkey;
+
+ if (__paes_keyblob2pkey(&ctx->kb, &pkey))
+ return -EINVAL;
+
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(&ctx->pk, &pkey, sizeof(pkey));
+ spin_unlock_bh(&ctx->pk_lock);
+
+ return 0;
+}
+
+static int ecb_paes_init(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ ctx->kb.key = NULL;
+ spin_lock_init(&ctx->pk_lock);
+
+ return 0;
+}
+
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
+{
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ _free_kb_keybuf(&ctx->kb);
+}
+
+static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->kb, &ctx->pk))
+ if (__paes_convert_key(ctx))
return -EINVAL;
/* Pick the correct function code based on the protected key type */
@@ -123,128 +185,106 @@
return ctx->fc ? 0 : -EINVAL;
}
-static int ecb_paes_init(struct crypto_tfm *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- ctx->kb.key = NULL;
-
- return 0;
-}
-
-static void ecb_paes_exit(struct crypto_tfm *tfm)
-{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- _free_kb_keybuf(&ctx->kb);
-}
-
-static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
- rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
+ rc = _key_to_kb(&ctx->kb, in_key, key_len);
if (rc)
return rc;
- if (__paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
- return 0;
+ return __ecb_paes_set_key(ctx);
}
-static int ecb_paes_crypt(struct blkcipher_desc *desc,
- unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret;
+ struct {
+ u8 key[MAXPROTKEYSIZE];
+ } param;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ spin_unlock_bh(&ctx->pk_lock);
+
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
- k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
- walk->dst.virt.addr, walk->src.virt.addr, n);
+ k = cpacf_km(ctx->fc | modifier, ¶m,
+ walk.dst.virt.addr, walk.src.virt.addr, n);
if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ ret = skcipher_walk_done(&walk, nbytes - k);
if (k < n) {
- if (__paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ if (__paes_convert_key(ctx))
+ return skcipher_walk_done(&walk, -EIO);
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ spin_unlock_bh(&ctx->pk_lock);
}
}
return ret;
}
-static int ecb_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+ return ecb_paes_crypt(req, 0);
}
-static int ecb_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return ecb_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg ecb_paes_alg = {
- .cra_name = "ecb(paes)",
- .cra_driver_name = "ecb-paes-s390",
- .cra_priority = 401, /* combo: aes + ecb + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ecb_paes_alg.cra_list),
- .cra_init = ecb_paes_init,
- .cra_exit = ecb_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .setkey = ecb_paes_set_key,
- .encrypt = ecb_paes_encrypt,
- .decrypt = ecb_paes_decrypt,
- }
- }
+static struct skcipher_alg ecb_paes_alg = {
+ .base.cra_name = "ecb(paes)",
+ .base.cra_driver_name = "ecb-paes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
+ .init = ecb_paes_init,
+ .exit = ecb_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .setkey = ecb_paes_set_key,
+ .encrypt = ecb_paes_encrypt,
+ .decrypt = ecb_paes_decrypt,
};
-static int cbc_paes_init(struct crypto_tfm *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
+ spin_lock_init(&ctx->pk_lock);
return 0;
}
-static void cbc_paes_exit(struct crypto_tfm *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
-static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->kb, &ctx->pk))
+ if (__paes_convert_key(ctx))
return -EINVAL;
/* Pick the correct function code based on the protected key type */
@@ -258,28 +298,25 @@
return ctx->fc ? 0 : -EINVAL;
}
-static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
- rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
+ rc = _key_to_kb(&ctx->kb, in_key, key_len);
if (rc)
return rc;
- if (__cbc_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
- return 0;
+ return __cbc_paes_set_key(ctx);
}
-static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret;
struct {
@@ -287,94 +324,103 @@
u8 key[MAXPROTKEYSIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+
+ memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
+ spin_lock_bh(&ctx->pk_lock);
memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ spin_unlock_bh(&ctx->pk_lock);
+
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_kmc(ctx->fc | modifier, ¶m,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ if (k) {
+ memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - k);
+ }
if (k < n) {
- if (__cbc_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ if (__paes_convert_key(ctx))
+ return skcipher_walk_done(&walk, -EIO);
+ spin_lock_bh(&ctx->pk_lock);
memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ spin_unlock_bh(&ctx->pk_lock);
}
}
- memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
return ret;
}
-static int cbc_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_paes_crypt(desc, 0, &walk);
+ return cbc_paes_crypt(req, 0);
}
-static int cbc_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return cbc_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg cbc_paes_alg = {
- .cra_name = "cbc(paes)",
- .cra_driver_name = "cbc-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(cbc_paes_alg.cra_list),
- .cra_init = cbc_paes_init,
- .cra_exit = cbc_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_paes_set_key,
- .encrypt = cbc_paes_encrypt,
- .decrypt = cbc_paes_decrypt,
- }
- }
+static struct skcipher_alg cbc_paes_alg = {
+ .base.cra_name = "cbc(paes)",
+ .base.cra_driver_name = "cbc-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
+ .init = cbc_paes_init,
+ .exit = cbc_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_paes_set_key,
+ .encrypt = cbc_paes_encrypt,
+ .decrypt = cbc_paes_decrypt,
};
-static int xts_paes_init(struct crypto_tfm *tfm)
+static int xts_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb[0].key = NULL;
ctx->kb[1].key = NULL;
+ spin_lock_init(&ctx->pk_lock);
return 0;
}
-static void xts_paes_exit(struct crypto_tfm *tfm)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb[0]);
_free_kb_keybuf(&ctx->kb[1]);
}
-static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
+{
+ struct pkey_protkey pkey0, pkey1;
+
+ if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) ||
+ __paes_keyblob2pkey(&ctx->kb[1], &pkey1))
+ return -EINVAL;
+
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(&ctx->pk[0], &pkey0, sizeof(pkey0));
+ memcpy(&ctx->pk[1], &pkey1, sizeof(pkey1));
+ spin_unlock_bh(&ctx->pk_lock);
+
+ return 0;
+}
+
+static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->kb[0], &ctx->pk[0]) ||
- __paes_convert_key(&ctx->kb[1], &ctx->pk[1]))
+ if (__xts_paes_convert_key(ctx))
return -EINVAL;
if (ctx->pk[0].type != ctx->pk[1].type)
@@ -391,11 +437,11 @@
return ctx->fc ? 0 : -EINVAL;
}
-static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int xts_key_len)
{
int rc;
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 ckey[2 * AES_MAX_KEY_SIZE];
unsigned int ckey_len, key_len;
@@ -406,17 +452,16 @@
_free_kb_keybuf(&ctx->kb[0]);
_free_kb_keybuf(&ctx->kb[1]);
- rc = _copy_key_to_kb(&ctx->kb[0], in_key, key_len);
+ rc = _key_to_kb(&ctx->kb[0], in_key, key_len);
if (rc)
return rc;
- rc = _copy_key_to_kb(&ctx->kb[1], in_key + key_len, key_len);
+ rc = _key_to_kb(&ctx->kb[1], in_key + key_len, key_len);
if (rc)
return rc;
- if (__xts_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
+ rc = __xts_paes_set_key(ctx);
+ if (rc)
+ return rc;
/*
* xts_check_key verifies the key length is not odd and makes
@@ -427,13 +472,14 @@
AES_KEYSIZE_128 : AES_KEYSIZE_256;
memcpy(ckey, ctx->pk[0].protkey, ckey_len);
memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
- return xts_check_key(tfm, ckey, 2*ckey_len);
+ return xts_verify_key(tfm, ckey, 2*ckey_len);
}
-static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int keylen, offset, nbytes, n, k;
int ret;
struct {
@@ -448,99 +494,92 @@
u8 init[16];
} xts_param;
- ret = blkcipher_walk_virt(desc, walk);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+
keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
-retry:
- memset(&pcc_param, 0, sizeof(pcc_param));
- memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
- memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
- cpacf_pcc(ctx->fc, pcc_param.key + offset);
+ memset(&pcc_param, 0, sizeof(pcc_param));
+ memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
+ spin_unlock_bh(&ctx->pk_lock);
+ cpacf_pcc(ctx->fc, pcc_param.key + offset);
memcpy(xts_param.init, pcc_param.xts, 16);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
- walk->dst.virt.addr, walk->src.virt.addr, n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ ret = skcipher_walk_done(&walk, nbytes - k);
if (k < n) {
- if (__xts_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
- goto retry;
+ if (__xts_paes_convert_key(ctx))
+ return skcipher_walk_done(&walk, -EIO);
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(xts_param.key + offset,
+ ctx->pk[0].protkey, keylen);
+ spin_unlock_bh(&ctx->pk_lock);
}
}
+
return ret;
}
-static int xts_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_paes_crypt(desc, 0, &walk);
+ return xts_paes_crypt(req, 0);
}
-static int xts_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return xts_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg xts_paes_alg = {
- .cra_name = "xts(paes)",
- .cra_driver_name = "xts-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_pxts_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(xts_paes_alg.cra_list),
- .cra_init = xts_paes_init,
- .cra_exit = xts_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = 2 * PAES_MIN_KEYSIZE,
- .max_keysize = 2 * PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_paes_set_key,
- .encrypt = xts_paes_encrypt,
- .decrypt = xts_paes_decrypt,
- }
- }
+static struct skcipher_alg xts_paes_alg = {
+ .base.cra_name = "xts(paes)",
+ .base.cra_driver_name = "xts-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
+ .init = xts_paes_init,
+ .exit = xts_paes_exit,
+ .min_keysize = 2 * PAES_MIN_KEYSIZE,
+ .max_keysize = 2 * PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_paes_set_key,
+ .encrypt = xts_paes_encrypt,
+ .decrypt = xts_paes_decrypt,
};
-static int ctr_paes_init(struct crypto_tfm *tfm)
+static int ctr_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
+ spin_lock_init(&ctx->pk_lock);
return 0;
}
-static void ctr_paes_exit(struct crypto_tfm *tfm)
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
-static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
{
unsigned long fc;
- if (__paes_convert_key(&ctx->kb, &ctx->pk))
+ if (__paes_convert_key(ctx))
return -EINVAL;
/* Pick the correct function code based on the protected key type */
@@ -555,22 +594,18 @@
return ctx->fc ? 0 : -EINVAL;
}
-static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
- rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
+ rc = _key_to_kb(&ctx->kb, in_key, key_len);
if (rc)
return rc;
- if (__ctr_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
- return -EINVAL;
- }
- return 0;
+ return __ctr_paes_set_key(ctx);
}
static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -588,121 +623,111 @@
return n;
}
-static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ctr_paes_crypt(struct skcipher_request *req)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret, locked;
+ struct {
+ u8 key[MAXPROTKEYSIZE];
+ } param;
- locked = spin_trylock(&ctrblk_lock);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
- ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ spin_unlock_bh(&ctx->pk_lock);
+
+ locked = mutex_trylock(&ctrblk_lock);
+
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
n = AES_BLOCK_SIZE;
if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
- k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
- walk->dst.virt.addr, walk->src.virt.addr,
- n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ k = cpacf_kmctr(ctx->fc, ¶m, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (k) {
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
AES_BLOCK_SIZE);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - k);
}
if (k < n) {
- if (__ctr_paes_set_key(ctx) != 0) {
+ if (__paes_convert_key(ctx)) {
if (locked)
- spin_unlock(&ctrblk_lock);
- return blkcipher_walk_done(desc, walk, -EIO);
+ mutex_unlock(&ctrblk_lock);
+ return skcipher_walk_done(&walk, -EIO);
}
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ spin_unlock_bh(&ctx->pk_lock);
}
}
if (locked)
- spin_unlock(&ctrblk_lock);
+ mutex_unlock(&ctrblk_lock);
/*
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
while (1) {
- if (cpacf_kmctr(ctx->fc | modifier,
- ctx->pk.protkey, buf,
- walk->src.virt.addr, AES_BLOCK_SIZE,
- walk->iv) == AES_BLOCK_SIZE)
+ if (cpacf_kmctr(ctx->fc, ¶m, buf,
+ walk.src.virt.addr, AES_BLOCK_SIZE,
+ walk.iv) == AES_BLOCK_SIZE)
break;
- if (__ctr_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ if (__paes_convert_key(ctx))
+ return skcipher_walk_done(&walk, -EIO);
+ spin_lock_bh(&ctx->pk_lock);
+ memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+ spin_unlock_bh(&ctx->pk_lock);
}
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes);
}
return ret;
}
-static int ctr_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_paes_crypt(desc, 0, &walk);
-}
-
-static int ctr_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_paes_alg = {
- .cra_name = "ctr(paes)",
- .cra_driver_name = "ctr-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ctr_paes_alg.cra_list),
- .cra_init = ctr_paes_init,
- .cra_exit = ctr_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_paes_set_key,
- .encrypt = ctr_paes_encrypt,
- .decrypt = ctr_paes_decrypt,
- }
- }
+static struct skcipher_alg ctr_paes_alg = {
+ .base.cra_name = "ctr(paes)",
+ .base.cra_driver_name = "ctr-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
+ .init = ctr_paes_init,
+ .exit = ctr_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_paes_set_key,
+ .encrypt = ctr_paes_crypt,
+ .decrypt = ctr_paes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
};
-static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
{
- if (!list_empty(&alg->cra_list))
- crypto_unregister_alg(alg);
+ if (!list_empty(&alg->base.cra_list))
+ crypto_unregister_skcipher(alg);
}
static void paes_s390_fini(void)
{
+ __crypto_unregister_skcipher(&ctr_paes_alg);
+ __crypto_unregister_skcipher(&xts_paes_alg);
+ __crypto_unregister_skcipher(&cbc_paes_alg);
+ __crypto_unregister_skcipher(&ecb_paes_alg);
if (ctrblk)
free_page((unsigned long) ctrblk);
- __crypto_unregister_alg(&ctr_paes_alg);
- __crypto_unregister_alg(&xts_paes_alg);
- __crypto_unregister_alg(&cbc_paes_alg);
- __crypto_unregister_alg(&ecb_paes_alg);
}
static int __init paes_s390_init(void)
@@ -717,7 +742,7 @@
if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
- ret = crypto_register_alg(&ecb_paes_alg);
+ ret = crypto_register_skcipher(&ecb_paes_alg);
if (ret)
goto out_err;
}
@@ -725,14 +750,14 @@
if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
- ret = crypto_register_alg(&cbc_paes_alg);
+ ret = crypto_register_skcipher(&cbc_paes_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
- ret = crypto_register_alg(&xts_paes_alg);
+ ret = crypto_register_skcipher(&xts_paes_alg);
if (ret)
goto out_err;
}
@@ -740,14 +765,14 @@
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
- ret = crypto_register_alg(&ctr_paes_alg);
- if (ret)
- goto out_err;
ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
if (!ctrblk) {
ret = -ENOMEM;
goto out_err;
}
+ ret = crypto_register_skcipher(&ctr_paes_alg);
+ if (ret)
+ goto out_err;
}
return 0;
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index d977643..5057773 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -249,7 +249,7 @@
{
pr_debug("The prng module stopped "
"after running in triple DES mode\n");
- kzfree(prng_data);
+ kfree_sensitive(prng_data);
}
@@ -442,7 +442,7 @@
static void prng_sha512_deinstantiate(void)
{
pr_debug("The prng module stopped after running in SHA-512 mode\n");
- kzfree(prng_data);
+ kfree_sensitive(prng_data);
}
@@ -693,7 +693,7 @@
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
}
static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
@@ -712,7 +712,7 @@
counter = prng_data->prngws.byte_counter;
mutex_unlock(&prng_data->mutex);
- return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", counter);
}
static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
@@ -721,7 +721,7 @@
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+ return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
}
static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
@@ -731,9 +731,9 @@
char *buf)
{
if (prng_mode == PRNG_MODE_TDES)
- return snprintf(buf, PAGE_SIZE, "TDES\n");
+ return scnprintf(buf, PAGE_SIZE, "TDES\n");
else
- return snprintf(buf, PAGE_SIZE, "SHA512\n");
+ return scnprintf(buf, PAGE_SIZE, "SHA512\n");
}
static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
@@ -756,7 +756,7 @@
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
}
static ssize_t prng_reseed_limit_store(struct device *dev,
struct device_attribute *attr,
@@ -787,7 +787,7 @@
struct device_attribute *attr,
char *buf)
{
- return snprintf(buf, PAGE_SIZE, "256\n");
+ return scnprintf(buf, PAGE_SIZE, "256\n");
}
static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 7c15542..698b1e6 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -27,7 +27,7 @@
#include "sha.h"
-static int sha1_init(struct shash_desc *desc)
+static int s390_sha1_init(struct shash_desc *desc)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
@@ -42,7 +42,7 @@
return 0;
}
-static int sha1_export(struct shash_desc *desc, void *out)
+static int s390_sha1_export(struct shash_desc *desc, void *out)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
struct sha1_state *octx = out;
@@ -53,7 +53,7 @@
return 0;
}
-static int sha1_import(struct shash_desc *desc, const void *in)
+static int s390_sha1_import(struct shash_desc *desc, const void *in)
{
struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
const struct sha1_state *ictx = in;
@@ -67,11 +67,11 @@
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_init,
+ .init = s390_sha1_init,
.update = s390_sha_update,
.final = s390_sha_final,
- .export = sha1_export,
- .import = sha1_import,
+ .export = s390_sha1_export,
+ .import = s390_sha1_import,
.descsize = sizeof(struct s390_sha_ctx),
.statesize = sizeof(struct sha1_state),
.base = {
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index e1fcc03..a927adc 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -20,6 +20,7 @@
static char local_guest[] = " ";
static char all_guests[] = "* ";
+static char *all_groups = all_guests;
static char *guest_query;
struct diag2fc_data {
@@ -62,10 +63,11 @@
memcpy(parm_list.userid, query, NAME_LEN);
ASCEBC(parm_list.userid, NAME_LEN);
- parm_list.addr = (unsigned long) addr ;
+ memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
+ ASCEBC(parm_list.aci_grp, NAME_LEN);
+ parm_list.addr = (unsigned long)addr;
parm_list.size = size;
parm_list.fmt = 0x02;
- memset(parm_list.aci_grp, 0x40, NAME_LEN);
rc = -1;
diag_stat_inc(DIAG_STAT_X2FC);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 70139d0..5c97f48 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -209,17 +209,12 @@
enum { Opt_uid, Opt_gid, };
-static const struct fs_parameter_spec hypfs_param_specs[] = {
+static const struct fs_parameter_spec hypfs_fs_parameters[] = {
fsparam_u32("gid", Opt_gid),
fsparam_u32("uid", Opt_uid),
{}
};
-static const struct fs_parameter_description hypfs_fs_parameters = {
- .name = "hypfs",
- .specs = hypfs_param_specs,
-};
-
static int hypfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct hypfs_sb_info *hypfs_info = fc->s_fs_info;
@@ -228,7 +223,7 @@
kgid_t gid;
int opt;
- opt = fs_parse(fc, &hypfs_fs_parameters, param, &result);
+ opt = fs_parse(fc, hypfs_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -455,7 +450,7 @@
.owner = THIS_MODULE,
.name = "s390_hypfs",
.init_fs_context = hypfs_init_fs_context,
- .parameters = &hypfs_fs_parameters,
+ .parameters = hypfs_fs_parameters,
.kill_sb = hypfs_kill_super
};
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 2531f67..1a18d7b 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -5,22 +5,6 @@
generated-y += unistd_nr.h
generic-y += asm-offsets.h
-generic-y += cacheflush.h
-generic-y += device.h
-generic-y += dma-contiguous.h
-generic-y += dma-mapping.h
-generic-y += div64.h
-generic-y += emergency-restart.h
generic-y += export.h
-generic-y += fb.h
-generic-y += irq_regs.h
-generic-y += irq_work.h
-generic-y += kmap_types.h
-generic-y += local.h
-generic-y += local64.h
+generic-y += kvm_types.h
generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
-generic-y += trace_clock.h
-generic-y += unaligned.h
-generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index c2cf7bc..1c8a38f 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -139,10 +139,10 @@
* without volatile and memory clobber.
*/
#define alternative(oldinstr, altinstr, facility) \
- asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+ asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
- asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
+ asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
altinstr2, facility2) ::: "memory")
#endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index c67b82d..de61ce5 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -21,29 +21,17 @@
bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
-static inline bool arch_has_random(void)
+static inline bool __must_check arch_get_random_long(unsigned long *v)
{
return false;
}
-static inline bool arch_has_random_seed(void)
-{
- if (static_branch_likely(&s390_arch_random_available))
- return true;
- return false;
-}
-
-static inline bool arch_get_random_long(unsigned long *v)
+static inline bool __must_check arch_get_random_int(unsigned int *v)
{
return false;
}
-static inline bool arch_get_random_int(unsigned int *v)
-{
- return false;
-}
-
-static inline bool arch_get_random_seed_long(unsigned long *v)
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
if (static_branch_likely(&s390_arch_random_available)) {
return s390_arch_random_generate((u8 *)v, sizeof(*v));
@@ -51,7 +39,7 @@
return false;
}
-static inline bool arch_get_random_seed_int(unsigned int *v)
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
if (static_branch_likely(&s390_arch_random_available)) {
return s390_arch_random_generate((u8 *)v, sizeof(*v));
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
new file mode 100644
index 0000000..11f615e
--- /dev/null
+++ b/arch/s390/include/asm/asm-const.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ASM_CONST_H
+#define _ASM_S390_ASM_CONST_H
+
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+#else
+/* This version of stringify will deal with commas... */
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+#endif
+#endif /* _ASM_S390_ASM_CONST_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 491ad53..11c5952 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,8 +15,6 @@
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-#define ATOMIC_INIT(i) { (i) }
-
static inline int atomic_read(const atomic_t *v)
{
int c;
@@ -47,7 +45,11 @@
static inline void atomic_add(int i, atomic_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ /*
+ * Order of conditions is important to circumvent gcc 10 bug:
+ * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+ */
+ if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
__atomic_add_const(i, &v->counter);
return;
}
@@ -114,7 +116,11 @@
static inline void atomic64_add(s64 i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+ /*
+ * Order of conditions is important to circumvent gcc 10 bug:
+ * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+ */
+ if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
__atomic64_add_const(i, (long *)&v->counter);
return;
}
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index eb7eed4..431e208 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -241,7 +241,9 @@
arch___clear_bit(nr, ptr);
}
-#include <asm-generic/bitops-instrumented.h>
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
/*
* Functions which use MSB0 bit numbering.
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 713fc97..0b25f28 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -2,23 +2,22 @@
#ifndef _ASM_S390_BUG_H
#define _ASM_S390_BUG_H
-#include <linux/kernel.h>
+#include <linux/compiler.h>
#ifdef CONFIG_BUG
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define __EMIT_BUG(x) do { \
- asm volatile( \
- "0: j 0b+2\n" \
- "1:\n" \
+ asm_inline volatile( \
+ "0: mc 0,0\n" \
".section .rodata.str,\"aMS\",@progbits,1\n" \
- "2: .asciz \""__FILE__"\"\n" \
+ "1: .asciz \""__FILE__"\"\n" \
".previous\n" \
".section __bug_table,\"awM\",@progbits,%2\n" \
- "3: .long 1b-3b,2b-3b\n" \
+ "2: .long 0b-2b,1b-2b\n" \
" .short %0,%1\n" \
- " .org 3b+%2\n" \
+ " .org 2b+%2\n" \
".previous\n" \
: : "i" (__LINE__), \
"i" (x), \
@@ -28,13 +27,12 @@
#else /* CONFIG_DEBUG_BUGVERBOSE */
#define __EMIT_BUG(x) do { \
- asm volatile( \
- "0: j 0b+2\n" \
- "1:\n" \
+ asm_inline volatile( \
+ "0: mc 0,0\n" \
".section __bug_table,\"awM\",@progbits,%1\n" \
- "2: .long 1b-2b\n" \
+ "1: .long 0b-1b\n" \
" .short %0\n" \
- " .org 2b+%1\n" \
+ " .org 1b+%1\n" \
".previous\n" \
: : "i" (x), \
"i" (sizeof(struct bug_entry))); \
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index d5e22e8..0012817 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -14,6 +14,6 @@
#define L1_CACHE_SHIFT 8
#define NET_SKB_PAD 32
-#define __read_mostly __section(.data..read_mostly)
+#define __read_mostly __section(".data..read_mostly")
#endif
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 865ce1c..c0be5fe 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -11,6 +11,7 @@
#include <linux/device.h>
#include <linux/mod_devicetable.h>
+#include <asm/chsc.h>
#include <asm/fcx.h>
#include <asm/irq.h>
#include <asm/schid.h>
@@ -236,4 +237,11 @@
struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx);
+int ccw_device_pnso(struct ccw_device *cdev,
+ struct chsc_pnso_area *pnso_area, u8 oc,
+ struct chsc_pnso_resume_token resume_token, int cnc);
+int ccw_device_get_cssid(struct ccw_device *cdev, u8 *cssid);
+int ccw_device_get_iid(struct ccw_device *cdev, u8 *iid);
+int ccw_device_get_chpid(struct ccw_device *cdev, int chp_idx, u8 *chpid);
+int ccw_device_get_chid(struct ccw_device *cdev, int chp_idx, u16 *chid);
#endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 7293c13..ad3acb1 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -36,11 +36,6 @@
* @set_online: function called when device is set online
* @set_offline: function called when device is set offline
* @shutdown: function called when device is shut down
- * @prepare: prepare for pm state transition
- * @complete: undo work done in @prepare
- * @freeze: callback for freezing during hibernation snapshotting
- * @thaw: undo work done in @freeze
- * @restore: callback for restoring after hibernation
* @driver: embedded driver structure
* @ccw_driver: supported ccw_driver (optional)
*/
@@ -50,11 +45,6 @@
int (*set_online) (struct ccwgroup_device *);
int (*set_offline) (struct ccwgroup_device *);
void (*shutdown)(struct ccwgroup_device *);
- int (*prepare) (struct ccwgroup_device *);
- void (*complete) (struct ccwgroup_device *);
- int (*freeze)(struct ccwgroup_device *);
- int (*thaw) (struct ccwgroup_device *);
- int (*restore)(struct ccwgroup_device *);
struct device_driver driver;
struct ccw_driver *ccw_driver;
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 91e376b..a8c02cf 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -13,21 +13,21 @@
#define _S390_CHECKSUM_H
#include <linux/uaccess.h>
+#include <linux/in6.h>
/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
*
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
+ * Returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic.
*
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
+ * This function must be called with even lengths, except
+ * for the last fragment, which may be odd.
*
- * it's best to have buff aligned on a 32-bit boundary
+ * It's best to have buff aligned on a 32-bit boundary.
*/
-static inline __wsum
-csum_partial(const void *buff, int len, __wsum sum)
+static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
{
register unsigned long reg2 asm("2") = (unsigned long) buff;
register unsigned long reg3 asm("3") = (unsigned long) len;
@@ -40,100 +40,91 @@
}
/*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- *
- * Copy from userspace and compute checksum.
- */
-static inline __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
- int len, __wsum sum,
- int *err_ptr)
-{
- if (unlikely(copy_from_user(dst, src, len)))
- *err_ptr = -EFAULT;
- return csum_partial(dst, len, sum);
-}
-
-
-static inline __wsum
-csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
-{
- memcpy(dst,src,len);
- return csum_partial(dst, len, sum);
-}
-
-/*
- * Fold a partial checksum without adding pseudo headers
+ * Fold a partial checksum without adding pseudo headers.
*/
static inline __sum16 csum_fold(__wsum sum)
{
u32 csum = (__force u32) sum;
- csum += (csum >> 16) + (csum << 16);
+ csum += (csum >> 16) | (csum << 16);
csum >>= 16;
return (__force __sum16) ~csum;
}
/*
- * This is a version of ip_compute_csum() optimized for IP headers,
- * which always checksum on 4 octet boundaries.
- *
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksums on 4 octet boundaries.
*/
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
- return csum_fold(csum_partial(iph, ihl*4, 0));
+ __u64 csum = 0;
+ __u32 *ptr = (u32 *)iph;
+
+ csum += *ptr++;
+ csum += *ptr++;
+ csum += *ptr++;
+ csum += *ptr++;
+ ihl -= 4;
+ while (ihl--)
+ csum += *ptr++;
+ csum += (csum >> 32) | (csum << 32);
+ return csum_fold((__force __wsum)(csum >> 32));
}
/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 32-bit checksum
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 32-bit checksum.
*/
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
- __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
{
- __u32 csum = (__force __u32)sum;
+ __u64 csum = (__force __u64)sum;
csum += (__force __u32)saddr;
- if (csum < (__force __u32)saddr)
- csum++;
-
csum += (__force __u32)daddr;
- if (csum < (__force __u32)daddr)
- csum++;
-
- csum += len + proto;
- if (csum < len + proto)
- csum++;
-
- return (__force __wsum)csum;
+ csum += len;
+ csum += proto;
+ csum += (csum >> 32) | (csum << 32);
+ return (__force __wsum)(csum >> 32);
}
/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 16-bit checksum, already complemented.
*/
-
-static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
- __wsum sum)
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
{
- return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
+ * Used for miscellaneous IP-like checksums, mainly icmp.
*/
-
static inline __sum16 ip_compute_csum(const void *buff, int len)
{
return csum_fold(csum_partial(buff, len, 0));
}
+#define _HAVE_ARCH_IPV6_CSUM
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ __u64 sum = (__force __u64)csum;
+
+ sum += (__force __u32)saddr->s6_addr32[0];
+ sum += (__force __u32)saddr->s6_addr32[1];
+ sum += (__force __u32)saddr->s6_addr32[2];
+ sum += (__force __u32)saddr->s6_addr32[3];
+ sum += (__force __u32)daddr->s6_addr32[0];
+ sum += (__force __u32)daddr->s6_addr32[1];
+ sum += (__force __u32)daddr->s6_addr32[2];
+ sum += (__force __u32)daddr->s6_addr32[3];
+ sum += len;
+ sum += proto;
+ sum += (sum >> 32) | (sum << 32);
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+
#endif /* _S390_CHECKSUM_H */
-
-
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
new file mode 100644
index 0000000..ae4d254
--- /dev/null
+++ b/arch/s390/include/asm/chsc.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s): Alexandra Winter <wintera@linux.ibm.com>
+ *
+ * Interface for Channel Subsystem Call
+ */
+#ifndef _ASM_S390_CHSC_H
+#define _ASM_S390_CHSC_H
+
+#include <uapi/asm/chsc.h>
+
+/**
+ * Operation codes for CHSC PNSO:
+ * PNSO_OC_NET_BRIDGE_INFO - only addresses that are visible to a bridgeport
+ * PNSO_OC_NET_ADDR_INFO - all addresses
+ */
+#define PNSO_OC_NET_BRIDGE_INFO 0
+#define PNSO_OC_NET_ADDR_INFO 3
+/**
+ * struct chsc_pnso_naid_l2 - network address information descriptor
+ * @nit: Network interface token
+ * @addr_lnid: network address and logical network id (VLAN ID)
+ */
+struct chsc_pnso_naid_l2 {
+ u64 nit;
+ struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
+struct chsc_pnso_resume_token {
+ u64 t1;
+ u64 t2;
+} __packed;
+
+struct chsc_pnso_naihdr {
+ struct chsc_pnso_resume_token resume_token;
+ u32:32;
+ u32 instance;
+ u32:24;
+ u8 naids;
+ u32 reserved[3];
+} __packed;
+
+struct chsc_pnso_area {
+ struct chsc_header request;
+ u8:2;
+ u8 m:1;
+ u8:5;
+ u8:2;
+ u8 ssid:2;
+ u8 fmt:4;
+ u16 sch;
+ u8:8;
+ u8 cssid;
+ u16:16;
+ u8 oc;
+ u32:24;
+ struct chsc_pnso_resume_token resume_token;
+ u32 n:1;
+ u32:31;
+ u32 reserved[3];
+ struct chsc_header response;
+ u32:32;
+ struct chsc_pnso_naihdr naihdr;
+ struct chsc_pnso_naid_l2 entries[0];
+} __packed __aligned(PAGE_SIZE);
+
+#endif /* _ASM_S390_CHSC_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index b5bfb31..5c58756 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -356,7 +356,6 @@
return 8 - ffs(mask);
}
-void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
extern void *cio_dma_zalloc(size_t size);
@@ -372,6 +371,7 @@
/* Function from drivers/s390/cio/chsc.c */
int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);
+int chsc_stzi(void *page, void *result, size_t size);
int chsc_sgib(u32 origin);
#endif
diff --git a/arch/s390/include/asm/clocksource.h b/arch/s390/include/asm/clocksource.h
new file mode 100644
index 0000000..0343436
--- /dev/null
+++ b/arch/s390/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* s390-specific clocksource additions */
+
+#ifndef _ASM_S390_CLOCKSOURCE_H
+#define _ASM_S390_CLOCKSOURCE_H
+
+#endif /* _ASM_S390_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index 3925b0f..10919ee 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -5,6 +5,9 @@
/* CLP common request & response block size */
#define CLP_BLK_SIZE PAGE_SIZE
+/* Call Logical Processor - Command Code */
+#define CLP_SLPC 0x0001
+
#define CLP_LPS_BASE 0
#define CLP_LPS_PCI 2
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 63b46e3..ea5b9c3 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -63,8 +63,6 @@
typedef u16 compat_ipc_pid_t;
typedef u32 compat_caddr_t;
typedef __kernel_fsid_t compat_fsid_t;
-typedef s64 compat_s64;
-typedef u64 compat_u64;
typedef struct {
u32 mask;
@@ -177,11 +175,7 @@
{
return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
+#define compat_ptr(uptr) compat_ptr(uptr)
#ifdef CONFIG_COMPAT
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 819803a..0d90cbe 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -313,7 +313,7 @@
return (unsigned long *) ret;
}
-/* Return if the entry in the sample data block table (sdbt)
+/* Return true if the entry in the sample data block table (sdbt)
* is a link to the next sdbt */
static inline int is_link_entry(unsigned long *s)
{
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 480bb02..638137d 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -36,7 +36,9 @@
u64 alt_ssi : 1; /* bit 108 */
u64 : 1;
u64 narf : 1; /* bit 110 */
- u64 : 12;
+ u64 : 5;
+ u64 enarf: 1; /* bit 116 */
+ u64 : 6;
u64 util_str : 1;/* bit 123 */
} __packed;
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 60f9075..ed5efbb 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -11,6 +11,7 @@
#include <linux/bits.h>
#define CR0_CLOCK_COMPARATOR_SIGN BIT(63 - 10)
+#define CR0_LOW_ADDRESS_PROTECTION BIT(63 - 35)
#define CR0_EMERGENCY_SIGNAL_SUBMASK BIT(63 - 49)
#define CR0_EXTERNAL_CALL_SUBMASK BIT(63 - 50)
#define CR0_CLOCK_COMPARATOR_SUBMASK BIT(63 - 52)
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 3101340..c1b82bc 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -2,7 +2,7 @@
/*
* S/390 debug facility
*
- * Copyright IBM Corp. 1999, 2000
+ * Copyright IBM Corp. 1999, 2020
*/
#ifndef DEBUG_H
#define DEBUG_H
@@ -12,7 +12,7 @@
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/refcount.h>
-#include <uapi/asm/debug.h>
+#include <linux/fs.h>
#define DEBUG_MAX_LEVEL 6 /* debug levels range from 0 to 6 */
#define DEBUG_OFF_LEVEL -1 /* level where debug is switched off */
@@ -26,6 +26,16 @@
#define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */
/* the entry information */
+#define __DEBUG_FEATURE_VERSION 3 /* version of debug feature */
+
+struct __debug_entry {
+ unsigned long clock : 60;
+ unsigned long exception : 1;
+ unsigned long level : 3;
+ void *caller;
+ unsigned short cpu;
+} __packed;
+
typedef struct __debug_entry debug_entry_t;
struct debug_view;
@@ -82,7 +92,6 @@
};
extern struct debug_view debug_hex_ascii_view;
-extern struct debug_view debug_raw_view;
extern struct debug_view debug_sprintf_view;
/* do NOT use the _common functions */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 0036eab..ca8f85b 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -298,10 +298,8 @@
union diag318_info {
unsigned long val;
struct {
- unsigned int cpnc : 8;
- unsigned int cpvc_linux : 24;
- unsigned char cpvc_distro[3];
- unsigned char zero;
+ unsigned long cpnc : 8;
+ unsigned long cpvc : 56;
};
};
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index ae27f75..ce0db81 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -1,12 +1,20 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __S390_EXTABLE_H
#define __S390_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of three addresses:
+ *
+ * - Address of an instruction that is allowed to fault.
+ * - Address at which the program should continue.
+ * - Optional address of handler that takes pt_regs * argument and runs in
+ * interrupt context.
+ *
+ * No registers are modified, so it is entirely up to the continuation code
+ * to figure out what to do.
*
* All the routines below use bits of fixup code that are out of line
* with the main instruction path. This means when everything is well,
@@ -17,6 +25,7 @@
struct exception_table_entry
{
int insn, fixup;
+ long handler;
};
extern struct exception_table_entry *__start_dma_ex_table;
@@ -29,6 +38,44 @@
return (unsigned long)&x->fixup + x->fixup;
}
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+ struct pt_regs *);
+
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
+{
+ if (likely(!x->handler))
+ return NULL;
+ return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
+
+static inline bool ex_handle(const struct exception_table_entry *x,
+ struct pt_regs *regs)
+{
+ ex_handler_t handler = ex_fixup_handler(x);
+
+ if (unlikely(handler))
+ return handler(x, regs);
+ regs->psw.addr = extable_fixup(x);
+ return true;
+}
+
#define ARCH_HAS_RELATIVE_EXTABLE
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+ struct exception_table_entry *b,
+ struct exception_table_entry tmp,
+ int delta)
+{
+ a->fixup = b->fixup + delta;
+ b->fixup = tmp.fixup - delta;
+ a->handler = b->handler;
+ if (a->handler)
+ a->handler += delta;
+ b->handler = tmp.handler;
+ if (b->handler)
+ b->handler -= delta;
+}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
#endif
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5e97a43..26f9144 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -29,7 +29,6 @@
mm_segment_t old_fs;
old_fs = enable_sacf_uaccess();
- pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("lr %2,%5\n",
@@ -54,7 +53,6 @@
default:
ret = -ENOSYS;
}
- pagefault_enable();
disable_sacf_uaccess(old_fs);
if (!ret)
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 37f96b6..40264f6 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,7 @@
#ifndef _ASM_S390_GMAP_H
#define _ASM_S390_GMAP_H
+#include <linux/radix-tree.h>
#include <linux/refcount.h>
/* Generic bits for GMAP notification on DAT table entry changes. */
@@ -31,6 +32,7 @@
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @pfault_enabled: defines if pfaults are applicable for the guest
+ * @guest_handle: protected virtual machine handle for the ultravisor
* @host_to_rmap: radix tree with gmap_rmap lists
* @children: list of shadow gmap structures
* @pt_list: list of all page tables used in the shadow guest address space
@@ -54,6 +56,8 @@
unsigned long asce_end;
void *private;
bool pfault_enabled;
+ /* only set for protected virtual machines */
+ unsigned long guest_handle;
/* Additional data for shadow guest address spaces */
struct radix_tree_root host_to_rmap;
struct list_head children;
@@ -136,12 +140,12 @@
void gmap_register_pte_notifier(struct gmap_notifier *);
void gmap_unregister_pte_notifier(struct gmap_notifier *);
-void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
- unsigned long bits);
int gmap_mprotect_notify(struct gmap *, unsigned long start,
unsigned long len, int prot);
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
+int gmap_mark_unmergeable(void);
+void s390_reset_acc(struct mm_struct *mm);
#endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index de8f0bf..60f9241 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -9,8 +9,8 @@
#ifndef _ASM_S390_HUGETLB_H
#define _ASM_S390_HUGETLB_H
+#include <linux/pgtable.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#define hugetlb_free_pgd_range free_pgd_range
#define hugepages_supported() (MACHINE_HAS_EDAT1)
@@ -21,13 +21,6 @@
pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep);
-static inline bool is_hugepage_only_range(struct mm_struct *mm,
- unsigned long addr,
- unsigned long len)
-{
- return false;
-}
-
/*
* If the arch doesn't supply something else, assume that hugepage
* size aligned regions are ok without further preparation.
@@ -46,6 +39,7 @@
{
clear_bit(PG_arch_1, &page->flags);
}
+#define arch_clear_hugepage_flags arch_clear_hugepage_flags
static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long sz)
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index adae176..9078b5b 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -7,6 +7,5 @@
void __init init_airq_interrupts(void);
void __init init_cio_interrupts(void);
-void __init init_ext_interrupts(void);
#endif
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index ca42161..28664ee 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/pci_io.h>
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
@@ -26,11 +27,10 @@
#define IO_SPACE_LIMIT 0
-#define ioremap_nocache(addr, size) ioremap(addr, size)
-#define ioremap_wc ioremap_nocache
-#define ioremap_wt ioremap_nocache
-
-void __iomem *ioremap(unsigned long offset, unsigned long size);
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
+void __iomem *ioremap(phys_addr_t addr, size_t size);
+void __iomem *ioremap_wc(phys_addr_t addr, size_t size);
+void __iomem *ioremap_wt(phys_addr_t addr, size_t size);
void iounmap(volatile void __iomem *addr);
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
@@ -56,6 +56,10 @@
#define pci_iomap_wc pci_iomap_wc
#define pci_iomap_wc_range pci_iomap_wc_range
+#define ioremap ioremap
+#define ioremap_wt ioremap_wt
+#define ioremap_wc ioremap_wc
+
#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
#define memset_io(dst, val, count) zpci_memset_io(dst, val, count)
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 084e71b..a9e2c72 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -21,6 +21,7 @@
struct ipl_pb0_common common;
struct ipl_pb0_fcp fcp;
struct ipl_pb0_ccw ccw;
+ struct ipl_pb0_nvme nvme;
char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
};
} __packed __aligned(PAGE_SIZE);
@@ -30,6 +31,11 @@
#define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \
sizeof(struct ipl_pb0_fcp))
#define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp))
+
+#define IPL_BP_NVME_LEN (sizeof(struct ipl_pl_hdr) + \
+ sizeof(struct ipl_pb0_nvme))
+#define IPL_BP0_NVME_LEN (sizeof(struct ipl_pb0_nvme))
+
#define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \
sizeof(struct ipl_pb0_ccw))
#define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
@@ -59,6 +65,8 @@
IPL_TYPE_FCP = 4,
IPL_TYPE_FCP_DUMP = 8,
IPL_TYPE_NSS = 16,
+ IPL_TYPE_NVME = 32,
+ IPL_TYPE_NVME_DUMP = 64,
};
struct ipl_info
@@ -74,6 +82,10 @@
u64 lun;
} fcp;
struct {
+ u32 fid;
+ u32 nsid;
+ } nvme;
+ struct {
char name[NSS_NAME_SIZE + 1];
} nss;
} data;
@@ -83,6 +95,12 @@
extern void setup_ipl(void);
extern void set_os_info_reipl_block(void);
+static inline bool is_ipl_type_dump(void)
+{
+ return (ipl_info.type == IPL_TYPE_FCP_DUMP) ||
+ (ipl_info.type == IPL_TYPE_NVME_DUMP);
+}
+
struct ipl_report {
struct ipl_parameter_block *ipib;
struct list_head components;
@@ -119,6 +137,7 @@
DIAG308_LOAD_NORMAL_DUMP = 4,
DIAG308_SET = 5,
DIAG308_STORE = 6,
+ DIAG308_LOAD_NORMAL = 7,
};
enum diag308_rc {
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 39f747d..dcb1bba 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -10,7 +10,9 @@
#define JUMP_LABEL_NOP_SIZE 6
#define JUMP_LABEL_NOP_OFFSET 2
-#if __GNUC__ < 9
+#ifdef CONFIG_CC_IS_CLANG
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i"
+#elif __GNUC__ < 9
#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "X"
#else
#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "jdd"
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
index 70930fe..e9bf486 100644
--- a/arch/s390/include/asm/kasan.h
+++ b/arch/s390/include/asm/kasan.h
@@ -2,8 +2,6 @@
#ifndef __ASM_KASAN_H
#define __ASM_KASAN_H
-#include <asm/pgtable.h>
-
#ifdef CONFIG_KASAN
#define KASAN_SHADOW_SCALE_SHIFT 3
@@ -21,6 +19,7 @@
extern void kasan_early_init(void);
extern void kasan_copy_shadow(pgd_t *dst);
extern void kasan_free_early_identity(void);
+extern unsigned long kasan_vmax;
#else
static inline void kasan_early_init(void) { }
static inline void kasan_copy_shadow(pgd_t *dst) { }
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index ea398a0..7f3c9ac 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -74,6 +74,12 @@
int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
unsigned long addr);
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ void *ipl_buf;
+};
+
extern const struct kexec_file_ops s390_kexec_image_ops;
extern const struct kexec_file_ops s390_kexec_elf_ops;
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index b106aa2..09cdb63 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -54,7 +54,6 @@
struct arch_specific_insn {
/* copy of original instruction */
kprobe_opcode_t *insn;
- unsigned int is_ftrace_insn : 1;
};
struct prev_kprobe {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 4d59d11..171913b 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -122,6 +122,17 @@
__u32 reserved;
};
+#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
+ CR0_MEASUREMENT_ALERT_SUBMASK)
+#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
+ CR14_EXTERNAL_DAMAGE_SUBMASK)
+
+#define SIDAD_SIZE_MASK 0xff
+#define sida_origin(sie_block) \
+ ((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
@@ -155,7 +166,13 @@
__u8 reserved08[4]; /* 0x0008 */
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
- __u8 reserved10[16]; /* 0x0010 */
+ union {
+ __u8 reserved10[16]; /* 0x0010 */
+ struct {
+ __u64 pv_handle_cpu;
+ __u64 pv_handle_config;
+ };
+ };
#define PROG_BLOCK_SIE (1<<0)
#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
@@ -204,10 +221,23 @@
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
#define ICPT_KSS 0x5c
+#define ICPT_MCHKREQ 0x60
+#define ICPT_INT_ENABLE 0x64
+#define ICPT_PV_INSTR 0x68
+#define ICPT_PV_NOTIFY 0x6c
+#define ICPT_PV_PREF 0x70
__u8 icptcode; /* 0x0050 */
__u8 icptstatus; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
- __u8 reserved54[2]; /* 0x0054 */
+ __u8 reserved54; /* 0x0054 */
+#define IICTL_CODE_NONE 0x00
+#define IICTL_CODE_MCHK 0x01
+#define IICTL_CODE_EXT 0x02
+#define IICTL_CODE_IO 0x03
+#define IICTL_CODE_RESTART 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND 0x11
+ __u8 iictl; /* 0x0055 */
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
@@ -228,9 +258,10 @@
#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
- __u8 reserved68; /* 0x0068 */
+ __u8 sdf; /* 0x0068 */
__u8 epdx; /* 0x0069 */
- __u8 reserved6a[2]; /* 0x006a */
+ __u8 cpnc; /* 0x006a */
+ __u8 reserved6b; /* 0x006b */
__u32 todpr; /* 0x006c */
#define GISA_FORMAT1 0x00000001
__u32 gd; /* 0x0070 */
@@ -244,31 +275,58 @@
#define HPID_KVM 0x4
#define HPID_VSIE 0x5
__u8 hpid; /* 0x00b8 */
- __u8 reservedb9[11]; /* 0x00b9 */
- __u16 extcpuaddr; /* 0x00c4 */
- __u16 eic; /* 0x00c6 */
+ __u8 reservedb9[7]; /* 0x00b9 */
+ union {
+ struct {
+ __u32 eiparams; /* 0x00c0 */
+ __u16 extcpuaddr; /* 0x00c4 */
+ __u16 eic; /* 0x00c6 */
+ };
+ __u64 mcic; /* 0x00c0 */
+ } __packed;
__u32 reservedc8; /* 0x00c8 */
- __u16 pgmilc; /* 0x00cc */
- __u16 iprcc; /* 0x00ce */
- __u32 dxc; /* 0x00d0 */
- __u16 mcn; /* 0x00d4 */
- __u8 perc; /* 0x00d6 */
- __u8 peratmid; /* 0x00d7 */
+ union {
+ struct {
+ __u16 pgmilc; /* 0x00cc */
+ __u16 iprcc; /* 0x00ce */
+ };
+ __u32 edc; /* 0x00cc */
+ } __packed;
+ union {
+ struct {
+ __u32 dxc; /* 0x00d0 */
+ __u16 mcn; /* 0x00d4 */
+ __u8 perc; /* 0x00d6 */
+ __u8 peratmid; /* 0x00d7 */
+ };
+ __u64 faddr; /* 0x00d0 */
+ } __packed;
__u64 peraddr; /* 0x00d8 */
__u8 eai; /* 0x00e0 */
__u8 peraid; /* 0x00e1 */
__u8 oai; /* 0x00e2 */
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
- __u64 tecmc; /* 0x00e8 */
- __u8 reservedf0[12]; /* 0x00f0 */
+ union {
+ __u64 tecmc; /* 0x00e8 */
+ struct {
+ __u16 subchannel_id; /* 0x00e8 */
+ __u16 subchannel_nr; /* 0x00ea */
+ __u32 io_int_parm; /* 0x00ec */
+ __u32 io_int_word; /* 0x00f0 */
+ };
+ } __packed;
+ __u8 reservedf4[8]; /* 0x00f4 */
#define CRYCB_FORMAT_MASK 0x00000003
#define CRYCB_FORMAT0 0x00000000
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
- __u64 gbea; /* 0x0180 */
+ union {
+ __u64 gbea; /* 0x0180 */
+ __u64 sidad;
+ };
__u8 reserved188[8]; /* 0x0188 */
__u64 sdnxo; /* 0x0190 */
__u8 reserved198[8]; /* 0x0198 */
@@ -287,7 +345,7 @@
__u64 itdba; /* 0x01e8 */
__u64 riccbd; /* 0x01f0 */
__u64 gvrd; /* 0x01f8 */
-} __attribute__((packed));
+} __packed __aligned(512);
struct kvm_s390_itdb {
__u8 data[256];
@@ -296,7 +354,9 @@
struct sie_page {
struct kvm_s390_sie_block sie_block;
struct mcck_volatile_info mcck_info; /* 0x0200 */
- __u8 reserved218[1000]; /* 0x0218 */
+ __u8 reserved218[360]; /* 0x0218 */
+ __u64 pv_grregs[16]; /* 0x0380 */
+ __u8 reserved400[512]; /* 0x0400 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
};
@@ -316,6 +376,8 @@
u64 halt_poll_invalid;
u64 halt_no_poll_steal;
u64 halt_wakeup;
+ u64 halt_poll_success_ns;
+ u64 halt_poll_fail_ns;
u64 instruction_lctl;
u64 instruction_lctlg;
u64 instruction_stctl;
@@ -392,6 +454,7 @@
u64 diagnose_10;
u64 diagnose_44;
u64 diagnose_9c;
+ u64 diagnose_9c_ignored;
u64 diagnose_258;
u64 diagnose_308;
u64 diagnose_500;
@@ -470,6 +533,7 @@
IRQ_PEND_PFAULT_INIT,
IRQ_PEND_EXT_HOST,
IRQ_PEND_EXT_SERVICE,
+ IRQ_PEND_EXT_SERVICE_EV,
IRQ_PEND_EXT_TIMING,
IRQ_PEND_EXT_CPU_TIMER,
IRQ_PEND_EXT_CLOCK_COMP,
@@ -514,6 +578,7 @@
(1UL << IRQ_PEND_EXT_TIMING) | \
(1UL << IRQ_PEND_EXT_HOST) | \
(1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV) | \
(1UL << IRQ_PEND_VIRTIO) | \
(1UL << IRQ_PEND_PFAULT_INIT) | \
(1UL << IRQ_PEND_PFAULT_DONE))
@@ -530,6 +595,13 @@
#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
(1UL << IRQ_PEND_MCHK_EX))
+#define IRQ_PEND_EXT_II_MASK ((1UL << IRQ_PEND_EXT_CPU_TIMER) | \
+ (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+ (1UL << IRQ_PEND_EXT_EMERGENCY) | \
+ (1UL << IRQ_PEND_EXT_EXTERNAL) | \
+ (1UL << IRQ_PEND_EXT_SERVICE) | \
+ (1UL << IRQ_PEND_EXT_SERVICE_EV))
+
struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
@@ -588,6 +660,7 @@
struct kvm_s390_float_interrupt {
unsigned long pending_irqs;
+ unsigned long masked_irqs;
spinlock_t lock;
struct list_head lists[FIRQ_LIST_COUNT];
int counters[FIRQ_MAX_COUNT];
@@ -639,6 +712,11 @@
unsigned long last_bp;
};
+struct kvm_s390_pv_vcpu {
+ u64 handle;
+ unsigned long stor_base;
+};
+
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
/* if vsie is active, currently executed shadow sie control block */
@@ -667,6 +745,8 @@
__u64 cputm_start;
bool gs_enabled;
bool skey_enabled;
+ struct kvm_s390_pv_vcpu pv;
+ union diag318_info diag318_info;
};
struct kvm_vm_stat {
@@ -695,9 +775,6 @@
bool masked;
bool swap;
bool suppressible;
- struct rw_semaphore maps_lock;
- struct list_head maps;
- atomic_t nr_maps;
};
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
@@ -840,6 +917,13 @@
DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
};
+struct kvm_s390_pv {
+ u64 handle;
+ u64 guest_len;
+ unsigned long stor_base;
+ void *stor_var;
+};
+
struct kvm_arch{
void *sca;
int use_esca;
@@ -876,6 +960,7 @@
/* indexed by vcpu_idx */
DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
struct kvm_s390_gisa_interrupt gisa_int;
+ struct kvm_s390_pv pv;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -891,17 +976,19 @@
unsigned long pfault_token;
};
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
+static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {}
+
void kvm_arch_crypto_clear_masks(struct kvm *kvm);
void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
unsigned long *aqm, unsigned long *adm);
@@ -914,10 +1001,9 @@
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
- struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+ struct kvm_memory_slot *slot) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 7f22262..a0a7a2c 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -2,38 +2,27 @@
#ifndef __ASM_LINKAGE_H
#define __ASM_LINKAGE_H
+#include <asm/asm-const.h>
#include <linux/stringify.h>
#define __ALIGN .align 4, 0x07
#define __ALIGN_STR __stringify(__ALIGN)
-#ifndef __ASSEMBLY__
-
/*
* Helper macro for exception table entries
*/
-#define EX_TABLE(_fault, _target) \
- ".section __ex_table,\"a\"\n" \
- ".align 4\n" \
- ".long (" #_fault ") - .\n" \
- ".long (" #_target ") - .\n" \
- ".previous\n"
-#else /* __ASSEMBLY__ */
+#define __EX_TABLE(_section, _fault, _target) \
+ stringify_in_c(.section _section,"a";) \
+ stringify_in_c(.align 8;) \
+ stringify_in_c(.long (_fault) - .;) \
+ stringify_in_c(.long (_target) - .;) \
+ stringify_in_c(.quad 0;) \
+ stringify_in_c(.previous)
-#define EX_TABLE(_fault, _target) \
- .section __ex_table,"a" ; \
- .align 4 ; \
- .long (_fault) - . ; \
- .long (_target) - . ; \
- .previous
+#define EX_TABLE(_fault, _target) \
+ __EX_TABLE(__ex_table, _fault, _target)
+#define EX_TABLE_DMA(_fault, _target) \
+ __EX_TABLE(.dma.ex_table, _fault, _target)
-#define EX_TABLE_DMA(_fault, _target) \
- .section .dma.ex_table, "a" ; \
- .align 4 ; \
- .long (_fault) - . ; \
- .long (_target) - . ; \
- .previous
-
-#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bcfb637..e12ff0f 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@
unsigned long asce;
unsigned long asce_limit;
unsigned long vdso_base;
+ /* The mmu context belongs to a secure guest. */
+ atomic_t is_protected;
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
@@ -32,8 +34,6 @@
unsigned int uses_cmm:1;
/* The gmaps associated with this context are allowed to use huge pages. */
unsigned int allow_gmap_hpage_1m:1;
- /* The mmu context is for compat task */
- unsigned int compat_mm:1;
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8d04e6f..c9f3d8a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -18,14 +18,16 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ unsigned long asce_type, init_entry;
+
spin_lock_init(&mm->context.lock);
INIT_LIST_HEAD(&mm->context.pgtable_list);
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
+ atomic_set(&mm->context.is_protected, 0);
mm->context.gmap_asce = 0;
mm->context.flush_mm = 0;
- mm->context.compat_mm = test_thread_flag(TIF_31BIT);
#ifdef CONFIG_PGSTE
mm->context.alloc_pgste = page_table_allocate_pgste ||
test_thread_flag(TIF_PGSTE) ||
@@ -36,33 +38,34 @@
mm->context.allow_gmap_hpage_1m = 0;
#endif
switch (mm->context.asce_limit) {
- case _REGION2_SIZE:
+ default:
/*
- * forked 3-level task, fall through to set new asce with new
- * mm->pgd
+ * context created by exec, the value of asce_limit can
+ * only be zero in this case
*/
- case 0:
- /* context created by exec, set asce limit to 4TB */
- mm->context.asce_limit = STACK_TOP_MAX;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+ VM_BUG_ON(mm->context.asce_limit);
+ /* continue as 3-level task */
+ mm->context.asce_limit = _REGION2_SIZE;
+ fallthrough;
+ case _REGION2_SIZE:
+ /* forked 3-level task */
+ init_entry = _REGION3_ENTRY_EMPTY;
+ asce_type = _ASCE_TYPE_REGION3;
break;
- case -PAGE_SIZE:
- /* forked 5-level task, set new asce with new_mm->pgd */
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ case TASK_SIZE_MAX:
+ /* forked 5-level task */
+ init_entry = _REGION1_ENTRY_EMPTY;
+ asce_type = _ASCE_TYPE_REGION1;
break;
case _REGION1_SIZE:
- /* forked 4-level task, set new asce with new mm->pgd */
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ /* forked 4-level task */
+ init_entry = _REGION2_ENTRY_EMPTY;
+ asce_type = _ASCE_TYPE_REGION2;
break;
- case _REGION3_SIZE:
- /* forked 2-level compat task, set new asce with new mm->pgd */
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
}
- crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | asce_type;
+ crst_table_init((unsigned long *) mm->pgd, init_entry);
return 0;
}
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b160da8..5afee80 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -99,7 +99,7 @@
void nmi_free_per_cpu(struct lowcore *lc);
void s390_handle_mcck(void);
-void s390_do_machine_check(struct pt_regs *regs);
+int s390_do_machine_check(struct pt_regs *regs);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
index c759dcf..23cd5d1 100644
--- a/arch/s390/include/asm/numa.h
+++ b/arch/s390/include/asm/numa.h
@@ -13,23 +13,13 @@
#ifdef CONFIG_NUMA
#include <linux/numa.h>
-#include <linux/cpumask.h>
void numa_setup(void);
-int numa_pfn_to_nid(unsigned long pfn);
-void numa_update_cpu_topology(void);
-
-extern cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-extern int numa_debug_enabled;
#else
static inline void numa_setup(void) { }
-static inline void numa_update_cpu_topology(void) { }
-static inline int numa_pfn_to_nid(unsigned long pfn)
-{
- return 0;
-}
#endif /* CONFIG_NUMA */
+
#endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index e399102..cc98f9b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -153,6 +153,11 @@
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
+#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_page_accessible(struct page *page);
+#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+#endif
+
#endif /* !__ASSEMBLY__ */
#define __PAGE_OFFSET 0x0UL
@@ -161,25 +166,22 @@
#define __pa(x) ((unsigned long)(x))
#define __va(x) ((void *)(unsigned long)(x))
-#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
+#define phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys))
+#define page_to_phys(page) pfn_to_phys(page_to_pfn(page))
+
+#define pfn_to_virt(pfn) __va(pfn_to_phys(pfn))
+#define virt_to_pfn(kaddr) (phys_to_pfn(__pa(kaddr)))
#define pfn_to_kaddr(pfn) pfn_to_virt(pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define page_to_virt(page) pfn_to_virt(page_to_pfn(page))
-#define phys_to_pfn(kaddr) ((kaddr) >> PAGE_SHIFT)
-#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
-#define phys_to_page(kaddr) pfn_to_page(phys_to_pfn(kaddr))
-#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
-
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-
-#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define ARCH_ZONE_DMA_BITS 31
+#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6087a4e..1226971 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -2,12 +2,10 @@
#ifndef __ASM_S390_PCI_H
#define __ASM_S390_PCI_H
-/* must be set before including pci_clp.h */
-#define PCI_BAR_COUNT 6
-
#include <linux/pci.h>
#include <linux/mutex.h>
#include <linux/iommu.h>
+#include <linux/pci_hotplug.h>
#include <asm-generic/pci.h>
#include <asm/pci_clp.h>
#include <asm/pci_debug.h>
@@ -24,10 +22,16 @@
int pci_proc_domain(struct pci_bus *);
#define ZPCI_BUS_NR 0 /* default bus number */
-#define ZPCI_DEVFN 0 /* default device number */
#define ZPCI_NR_DMA_SPACES 1
#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
+#define ZPCI_DOMAIN_BITMAP_SIZE (1 << 16)
+
+#ifdef PCI
+#if (ZPCI_NR_DEVICES > ZPCI_DOMAIN_BITMAP_SIZE)
+# error ZPCI_NR_DEVICES can not be bigger than ZPCI_DOMAIN_BITMAP_SIZE
+#endif
+#endif /* PCI */
/* PCI Function Controls */
#define ZPCI_FC_FN_ENABLED 0x80
@@ -95,10 +99,27 @@
struct s390_domain;
+#define ZPCI_FUNCTIONS_PER_BUS 256
+struct zpci_bus {
+ struct kref kref;
+ struct pci_bus *bus;
+ struct zpci_dev *function[ZPCI_FUNCTIONS_PER_BUS];
+ struct list_head resources;
+ struct list_head bus_next;
+ struct resource bus_resource;
+ int pchid;
+ int domain_nr;
+ bool multifunction;
+ enum pci_bus_speed max_bus_speed;
+};
+
/* Private data per function */
struct zpci_dev {
- struct pci_bus *bus;
+ struct zpci_bus *zbus;
struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */
+ struct list_head bus_next;
+ struct kref kref;
+ struct hotplug_slot hotplug_slot;
enum zpci_state state;
u32 fid; /* function ID, used by sclp */
@@ -107,7 +128,13 @@
u16 pchid; /* physical channel ID */
u8 pfgid; /* function group ID */
u8 pft; /* pci function type */
- u16 domain;
+ u8 port;
+ u8 rid_available : 1;
+ u8 has_hp_slot : 1;
+ u8 is_physfn : 1;
+ u8 util_str_avail : 1;
+ u8 reserved : 4;
+ unsigned int devfn; /* DEVFN part of the RID*/
struct mutex lock;
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
@@ -138,7 +165,7 @@
char res_name[16];
bool mio_capable;
- struct zpci_bar_struct bars[PCI_BAR_COUNT];
+ struct zpci_bar_struct bars[PCI_STD_NUM_BARS];
u64 start_dma; /* Start of available DMA addresses */
u64 end_dma; /* End of available DMA addresses */
@@ -153,6 +180,7 @@
atomic64_t mapped_pages;
atomic64_t unmapped_pages;
+ u8 version;
enum pci_bus_speed max_bus_speed;
struct dentry *debugfs_dev;
@@ -167,28 +195,34 @@
extern const struct attribute_group *zpci_attr_groups[];
extern unsigned int s390_pci_force_floating __initdata;
+extern unsigned int s390_pci_no_rid;
/* -----------------------------------------------------------------------------
Prototypes
----------------------------------------------------------------------------- */
/* Base stuff */
-int zpci_create_device(struct zpci_dev *);
-void zpci_remove_device(struct zpci_dev *zdev);
+int zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error);
int zpci_enable_device(struct zpci_dev *);
int zpci_disable_device(struct zpci_dev *);
+void zpci_device_reserved(struct zpci_dev *zdev);
+bool zpci_is_device_configured(struct zpci_dev *zdev);
+
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
int zpci_unregister_ioat(struct zpci_dev *, u8);
void zpci_remove_reserved_devices(void);
/* CLP */
+int clp_setup_writeback_mio(void);
int clp_scan_pci_devices(void);
-int clp_rescan_pci_devices(void);
-int clp_rescan_pci_devices_simple(u32 *fid);
-int clp_add_pci_device(u32, u32, int);
+int clp_query_pci_fn(struct zpci_dev *zdev);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
int clp_get_state(u32 fid, enum zpci_state *state);
+/* UID */
+void update_uid_checking(bool new);
+
/* IOMMU Interface */
int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
@@ -202,12 +236,10 @@
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);
-void zpci_rescan(void);
bool zpci_is_enabled(void);
#else /* CONFIG_PCI */
static inline void zpci_event_error(void *e) {}
static inline void zpci_event_availability(void *e) {}
-static inline void zpci_rescan(void) {}
#endif /* CONFIG_PCI */
#ifdef CONFIG_HOTPLUG_PCI_S390
@@ -224,7 +256,14 @@
/* Helpers */
static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
{
- return pdev->sysdata;
+ struct zpci_bus *zbus = pdev->sysdata;
+
+ return zbus->function[pdev->devfn];
+}
+
+static inline struct zpci_dev *to_zpci_dev(struct device *dev)
+{
+ return to_zpci(to_pci_dev(dev));
}
struct zpci_dev *get_zdev_by_fid(u32);
@@ -245,7 +284,6 @@
void zpci_debug_exit(void);
void zpci_debug_init_device(struct zpci_dev *, const char *);
void zpci_debug_exit_device(struct zpci_dev *);
-void zpci_debug_info(struct zpci_dev *, struct seq_file *);
/* Error reporting */
int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 5035917..1f4b666 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -7,6 +7,7 @@
/*
* Call Logical Processor - Command Codes
*/
+#define CLP_SLPC 0x0001
#define CLP_LIST_PCI 0x0002
#define CLP_QUERY_PCI_FN 0x0003
#define CLP_QUERY_PCI_FNGRP 0x0004
@@ -51,6 +52,19 @@
extern bool zpci_unique_uid;
+struct clp_rsp_slpc_pci {
+ struct clp_rsp_hdr hdr;
+ u32 reserved2[4];
+ u32 lpif[8];
+ u32 reserved3[4];
+ u32 vwb : 1;
+ u32 : 1;
+ u32 mio_wb : 6;
+ u32 : 24;
+ u32 reserved5[3];
+ u32 lpic[8];
+} __packed;
+
/* List PCI functions request */
struct clp_req_list_pci {
struct clp_req_hdr hdr;
@@ -77,7 +91,7 @@
struct {
u64 wb;
u64 wt;
- } addr[PCI_BAR_COUNT];
+ } addr[PCI_STD_NUM_BARS];
u32 reserved[6];
} __packed;
@@ -93,21 +107,28 @@
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
u16 vfn; /* virtual fn number */
- u16 : 6;
+ u16 : 3;
+ u16 rid_avail : 1;
+ u16 is_physfn : 1;
+ u16 reserved1 : 1;
u16 mio_addr_avail : 1;
u16 util_str_avail : 1; /* utility string available? */
u16 pfgid : 8; /* pci function group id */
u32 fid; /* pci function id */
- u8 bar_size[PCI_BAR_COUNT];
+ u8 bar_size[PCI_STD_NUM_BARS];
u16 pchid;
- __le32 bar[PCI_BAR_COUNT];
+ __le32 bar[PCI_STD_NUM_BARS];
u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */
- u32 : 16;
+ u16 : 12;
+ u16 port : 4;
u8 fmb_len;
u8 pft; /* pci function type */
u64 sdma; /* start dma as */
u64 edma; /* end dma as */
- u32 reserved[11];
+#define ZPCI_RID_MASK_DEVFN 0x00ff
+ u16 rid; /* BUS/DEVFN PCI address */
+ u16 reserved0;
+ u32 reserved[10];
u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
u32 reserved2[16];
@@ -165,6 +186,11 @@
} __packed;
/* Combined request/response block structures used by clp insn */
+struct clp_req_rsp_slpc_pci {
+ struct clp_req_slpc request;
+ struct clp_rsp_slpc_pci response;
+} __packed;
+
struct clp_req_rsp_list_pci {
struct clp_req_list_pci request;
struct clp_rsp_list_pci response;
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 419fac7..f62cd3e 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -131,12 +131,6 @@
*entry |= ZPCI_TABLE_VALID;
}
-static inline void invalidate_table_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry |= ZPCI_TABLE_INVALID;
-}
-
static inline void invalidate_pt_entry(unsigned long *entry)
{
WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
@@ -173,11 +167,6 @@
return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
}
-static inline int entry_isprotected(unsigned long entry)
-{
- return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
-}
-
static inline unsigned long *get_rt_sto(unsigned long entry)
{
return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index e4dc64c..287bb88 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -14,12 +14,13 @@
/* I/O Map */
#define ZPCI_IOMAP_SHIFT 48
-#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_SHIFT 62
+#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT)
#define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1)
#define ZPCI_IOMAP_MAX_ENTRIES \
- ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+ (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT))
#define ZPCI_IOMAP_ADDR_IDX_MASK \
- (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
+ ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK)
struct zpci_iomap_entry {
u32 fh;
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 4652fff..b9da716 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -12,6 +12,7 @@
#include <linux/perf_event.h>
#include <linux/device.h>
+#include <asm/stacktrace.h>
/* Per-CPU flags for PMU states */
#define PMU_F_RESERVED 0x1000
@@ -73,4 +74,10 @@
#define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
#define SAMPLE_FREQ_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
+#define perf_arch_fetch_caller_regs(regs, __ip) do { \
+ (regs)->psw.addr = (__ip); \
+ (regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) - \
+ offsetof(struct stack_frame, back_chain); \
+} while (0)
+
#endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 77606c4..d1297d6 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -34,19 +34,21 @@
memset64((u64 *)crst, entry, _CRST_ENTRIES);
}
-static inline unsigned long pgd_entry_type(struct mm_struct *mm)
-{
- if (mm_pmd_folded(mm))
- return _SEGMENT_ENTRY_EMPTY;
- if (mm_pud_folded(mm))
- return _REGION3_ENTRY_EMPTY;
- if (mm_p4d_folded(mm))
- return _REGION2_ENTRY_EMPTY;
- return _REGION1_ENTRY_EMPTY;
-}
-
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
-void crst_table_downgrade(struct mm_struct *);
+
+static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ int rc;
+
+ if (addr + len > mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
{
@@ -116,24 +118,11 @@
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
- unsigned long *table = crst_table_alloc(mm);
-
- if (!table)
- return NULL;
- if (mm->context.asce_limit == _REGION3_SIZE) {
- /* Forking a compat process with 2 page table levels */
- if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
- crst_table_free(mm, table);
- return NULL;
- }
- }
- return (pgd_t *) table;
+ return (pgd_t *) crst_table_alloc(mm);
}
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
- if (mm->context.asce_limit == _REGION3_SIZE)
- pgtable_pmd_page_dtor(virt_to_page(pgd));
crst_table_free(mm, (unsigned long *) pgd);
}
@@ -157,8 +146,6 @@
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
-extern void rcu_table_freelist_finish(void);
-
void vmem_map_init(void);
void *vmem_crst_alloc(unsigned long val);
pte_t *vmem_pte_alloc(void);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ce5869..2338345 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -19,6 +19,7 @@
#include <linux/atomic.h>
#include <asm/bug.h>
#include <asm/page.h>
+#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
@@ -88,6 +89,7 @@
extern unsigned long VMALLOC_END;
#define VMALLOC_DEFAULT_SIZE ((128UL << 30) - MODULES_LEN)
extern struct page *vmemmap;
+extern unsigned long vmemmap_size;
#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
@@ -266,11 +268,9 @@
#endif
#define _REGION_ENTRY_BITS 0xfffffffffffff22fUL
-#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_HARDWARE_BITS 0xfffffffffffffe30UL
#define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE 0xfffffffffff00730UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
@@ -522,6 +522,15 @@
return 0;
}
+static inline int mm_is_protected(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+ if (unlikely(atomic_read(&mm->context.is_protected)))
+ return 1;
+#endif
+ return 0;
+}
+
static inline int mm_alloc_pgste(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -675,6 +684,7 @@
return pud_val(pud) == _REGION3_ENTRY_EMPTY;
}
+#define pud_leaf pud_large
static inline int pud_large(pud_t pud)
{
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
@@ -682,16 +692,7 @@
return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
}
-static inline unsigned long pud_pfn(pud_t pud)
-{
- unsigned long origin_mask;
-
- origin_mask = _REGION_ENTRY_ORIGIN;
- if (pud_large(pud))
- origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
- return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
-}
-
+#define pmd_leaf pmd_large
static inline int pmd_large(pmd_t pmd)
{
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
@@ -699,10 +700,8 @@
static inline int pmd_bad(pmd_t pmd)
{
- if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+ if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
return 1;
- if (pmd_large(pmd))
- return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
@@ -710,12 +709,10 @@
{
unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
- if (type > _REGION_ENTRY_TYPE_R3)
+ if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
return 1;
if (type < _REGION_ENTRY_TYPE_R3)
return 0;
- if (pud_large(pud))
- return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
}
@@ -740,16 +737,6 @@
return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
}
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- unsigned long origin_mask;
-
- origin_mask = _SEGMENT_ENTRY_ORIGIN;
- if (pmd_large(pmd))
- origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
- return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
-}
-
#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
@@ -764,18 +751,12 @@
static inline int pmd_dirty(pmd_t pmd)
{
- int dirty = 1;
- if (pmd_large(pmd))
- dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
- return dirty;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
}
static inline int pmd_young(pmd_t pmd)
{
- int young = 1;
- if (pmd_large(pmd))
- young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
- return young;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
}
static inline int pte_present(pte_t pte)
@@ -884,6 +865,25 @@
}
/*
+ * Extract the pgprot value from the given pte while at the same time making it
+ * usable for kernel address space mappings where fault driven dirty and
+ * young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
+ * must not be set.
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
+
+ if (pte_write(pte))
+ pte_flags |= pgprot_val(PAGE_KERNEL);
+ else
+ pte_flags |= pgprot_val(PAGE_KERNEL_RO);
+ pte_flags |= pte_val(pte) & mio_wb_bit_mask;
+
+ return __pgprot(pte_flags);
+}
+
+/*
* pgd/pmd/pte modification functions
*/
@@ -1077,7 +1077,12 @@
static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -1089,7 +1094,12 @@
static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
- return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_t res;
+
+ res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(vma->vm_mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
/*
@@ -1104,12 +1114,17 @@
unsigned long addr,
pte_t *ptep, int full)
{
+ pte_t res;
+
if (full) {
- pte_t pte = *ptep;
+ res = *ptep;
*ptep = __pte(_PAGE_INVALID);
- return pte;
+ } else {
+ res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
}
- return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+ if (mm_is_protected(mm) && pte_present(res))
+ uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+ return res;
}
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -1171,6 +1186,12 @@
void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
+#define pgprot_writecombine pgprot_writecombine
+pgprot_t pgprot_writecombine(pgprot_t prot);
+
+#define pgprot_writethrough pgprot_writethrough
+pgprot_t pgprot_writethrough(pgprot_t prot);
+
/*
* Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
@@ -1194,7 +1215,8 @@
static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
- pte_val(__pte) = physpage + pgprot_val(pgprot);
+
+ pte_val(__pte) = physpage | pgprot_val(pgprot);
if (!MACHINE_HAS_NX)
pte_val(__pte) &= ~_PAGE_NOEXEC;
return pte_mkyoung(__pte);
@@ -1214,13 +1236,40 @@
#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
-#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
-#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+static inline unsigned long pmd_deref(pmd_t pmd)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _SEGMENT_ENTRY_ORIGIN;
+ if (pmd_large(pmd))
+ origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+ return pmd_val(pmd) & origin_mask;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return pmd_deref(pmd) >> PAGE_SHIFT;
+}
+
+static inline unsigned long pud_deref(pud_t pud)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _REGION_ENTRY_ORIGIN;
+ if (pud_large(pud))
+ origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+ return pud_val(pud) & origin_mask;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return pud_deref(pud) >> PAGE_SHIFT;
+}
+
/*
* The pgd_offset function *always* adds the index for the top-level
* region/segment table. This is done to get a sequence like the
@@ -1245,7 +1294,6 @@
}
#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address)
{
@@ -1288,16 +1336,11 @@
}
#define pmd_offset pmd_offset
-static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
{
- return (pte_t *) pmd_deref(*pmd) + pte_index(address);
+ return (unsigned long) pmd_deref(pmd);
}
-#define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
-
-static inline void pte_unmap(pte_t *pte) { }
-
static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
{
return end <= current->mm->context.asce_limit;
@@ -1323,29 +1366,23 @@
static inline pmd_t pmd_mkwrite(pmd_t pmd)
{
pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
- if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
- return pmd;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
return pmd;
}
static inline pmd_t pmd_mkclean(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- }
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
static inline pmd_t pmd_mkdirty(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
- _SEGMENT_ENTRY_SOFT_DIRTY;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- }
+ pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
return pmd;
}
@@ -1359,29 +1396,23 @@
static inline pud_t pud_mkwrite(pud_t pud)
{
pud_val(pud) |= _REGION3_ENTRY_WRITE;
- if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
- return pud;
- pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+ if (pud_val(pud) & _REGION3_ENTRY_DIRTY)
+ pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
return pud;
}
static inline pud_t pud_mkclean(pud_t pud)
{
- if (pud_large(pud)) {
- pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
- pud_val(pud) |= _REGION_ENTRY_PROTECT;
- }
+ pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+ pud_val(pud) |= _REGION_ENTRY_PROTECT;
return pud;
}
static inline pud_t pud_mkdirty(pud_t pud)
{
- if (pud_large(pud)) {
- pud_val(pud) |= _REGION3_ENTRY_DIRTY |
- _REGION3_ENTRY_SOFT_DIRTY;
- if (pud_val(pud) & _REGION3_ENTRY_WRITE)
- pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
- }
+ pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY;
+ if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+ pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
return pud;
}
@@ -1405,38 +1436,29 @@
static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
- }
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
return pmd;
}
static inline pmd_t pmd_mkold(pmd_t pmd)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- }
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- if (pmd_large(pmd)) {
- pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
- _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
- _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
- pmd_val(pmd) |= massage_pgprot_pmd(newprot);
- if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- return pmd;
- }
- pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+ _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
return pmd;
}
@@ -1586,7 +1608,7 @@
}
#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
-static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
+static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
unsigned long addr,
pmd_t *pmdp, int full)
{
@@ -1595,7 +1617,7 @@
*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
return pmd;
}
- return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
}
#define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
@@ -1700,7 +1722,7 @@
#define kern_addr_valid(addr) (1)
extern int vmem_add_mapping(unsigned long start, unsigned long size);
-extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern void vmem_remove_mapping(unsigned long start, unsigned long size);
extern int s390_enable_sie(void);
extern int s390_enable_skey(void);
extern void s390_reset_cmma(struct mm_struct *mm);
@@ -1709,6 +1731,4 @@
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#include <asm-generic/pgtable.h>
-
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index b5ea9e1..b5f545d 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -29,12 +29,6 @@
old, new) != old);
}
-#define init_task_preempt_count(p) do { } while (0)
-
-#define init_idle_preempt_count(p, cpu) do { \
- S390_lowcore.preempt_count = PREEMPT_ENABLED; \
-} while (0)
-
static inline void set_preempt_need_resched(void)
{
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
@@ -88,12 +82,6 @@
S390_lowcore.preempt_count = pc;
}
-#define init_task_preempt_count(p) do { } while (0)
-
-#define init_idle_preempt_count(p, cpu) do { \
- S390_lowcore.preempt_count = PREEMPT_ENABLED; \
-} while (0)
-
static inline void set_preempt_need_resched(void)
{
}
@@ -130,11 +118,15 @@
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-#ifdef CONFIG_PREEMPT
+#define init_task_preempt_count(p) do { } while (0)
+/* Deferred to CPU bringup time */
+#define init_idle_preempt_count(p, cpu) do { } while (0)
+
+#ifdef CONFIG_PREEMPTION
extern asmlinkage void preempt_schedule(void);
#define __preempt_schedule() preempt_schedule()
extern asmlinkage void preempt_schedule_notrace(void);
#define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
#endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 48d6ccd..0987c3f 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,17 +14,15 @@
#include <linux/bits.h>
-#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
-#define CIF_ASCE_PRIMARY 1 /* primary asce needs fixup / uaccess */
-#define CIF_ASCE_SECONDARY 2 /* secondary asce needs fixup / uaccess */
-#define CIF_NOHZ_DELAY 3 /* delay HZ disable for a tick */
-#define CIF_FPU 4 /* restore FPU registers */
-#define CIF_IGNORE_IRQ 5 /* ignore interrupt (for udelay) */
-#define CIF_ENABLED_WAIT 6 /* in enabled wait state */
-#define CIF_MCCK_GUEST 7 /* machine check happening in guest */
-#define CIF_DEDICATED_CPU 8 /* this CPU is dedicated */
+#define CIF_ASCE_PRIMARY 0 /* primary asce needs fixup / uaccess */
+#define CIF_ASCE_SECONDARY 1 /* secondary asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
+#define CIF_FPU 3 /* restore FPU registers */
+#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
+#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
+#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
-#define _CIF_MCCK_PENDING BIT(CIF_MCCK_PENDING)
#define _CIF_ASCE_PRIMARY BIT(CIF_ASCE_PRIMARY)
#define _CIF_ASCE_SECONDARY BIT(CIF_ASCE_SECONDARY)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
@@ -84,7 +82,6 @@
void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
-extern int sysctl_ieee_emulation_warnings;
extern void execve_tail(void);
extern void __bpon(void);
@@ -93,15 +90,15 @@
*/
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_31BIT) ? \
- (1UL << 31) : -PAGE_SIZE)
+ _REGION3_SIZE : TASK_SIZE_MAX)
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
- (1UL << 30) : (1UL << 41))
+ (_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
#define TASK_SIZE TASK_SIZE_OF(current)
#define TASK_SIZE_MAX (-PAGE_SIZE)
#define STACK_TOP (test_thread_flag(TIF_31BIT) ? \
- (1UL << 31) : (1UL << 42))
-#define STACK_TOP_MAX (1UL << 42)
+ _REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP_MAX _REGION2_SIZE
#define HAVE_ARCH_PICK_MMAP_LAYOUT
@@ -179,7 +176,6 @@
regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
- crst_table_downgrade(current->mm); \
execve_tail(); \
} while (0)
@@ -207,7 +203,7 @@
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
-static inline unsigned long current_stack_pointer(void)
+static __always_inline unsigned long current_stack_pointer(void)
{
unsigned long sp;
@@ -311,7 +307,7 @@
/*
* Function to drop a processor into disabled wait state
*/
-static inline void __noreturn disabled_wait(void)
+static __always_inline void __noreturn disabled_wait(void)
{
psw_t psw;
diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h
new file mode 100644
index 0000000..f960b28
--- /dev/null
+++ b/arch/s390/include/asm/ptdump.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_PTDUMP_H
+#define _ASM_S390_PTDUMP_H
+
+void ptdump_check_wx(void);
+
+static inline void debug_checkwx(void)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_WX))
+ ptdump_check_wx();
+}
+
+#endif /* _ASM_S390_PTDUMP_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f009a13..16b3e43 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -184,5 +184,10 @@
return regs->gprs[15];
}
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->gprs[2] = rc;
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 4f35b10..19e84c9 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -26,9 +26,9 @@
/**
* struct qdesfmt0 - queue descriptor, format 0
- * @sliba: storage list information block address
- * @sla: storage list address
- * @slsba: storage list state block address
+ * @sliba: absolute address of storage list information block
+ * @sla: absolute address of storage list
+ * @slsba: absolute address of storage list state block
* @akey: access key for SLIB
* @bkey: access key for SL
* @ckey: access key for SBALs
@@ -56,7 +56,7 @@
* @oqdcnt: output queue descriptor count
* @iqdsz: input queue descriptor size
* @oqdsz: output queue descriptor size
- * @qiba: queue information block address
+ * @qiba: absolute address of queue information block
* @qkey: queue information block key
* @qdf0: queue descriptions
*/
@@ -201,7 +201,7 @@
* @scount: SBAL count
* @sflags: whole SBAL flags
* @length: length
- * @addr: address
+ * @addr: absolute data address
*/
struct qdio_buffer_element {
u8 eflags;
@@ -211,7 +211,7 @@
u8 scount;
u8 sflags;
u32 length;
- void *addr;
+ u64 addr;
} __attribute__ ((packed, aligned(16)));
/**
@@ -276,6 +276,7 @@
#define CHSC_AC2_MULTI_BUFFER_AVAILABLE 0x0080
#define CHSC_AC2_MULTI_BUFFER_ENABLED 0x0040
#define CHSC_AC2_DATA_DIV_AVAILABLE 0x0010
+#define CHSC_AC2_SNIFFER_AVAILABLE 0x0008
#define CHSC_AC2_DATA_DIV_ENABLED 0x0002
#define CHSC_AC3_FORMAT2_CQ_AVAILABLE 0x8000
@@ -324,10 +325,8 @@
/**
* struct qdio_initialize - qdio initialization data
- * @cdev: associated ccw device
* @q_format: queue format
* @qdr_ac: feature flags to set
- * @adapter_name: name for the adapter
* @qib_param_field_format: format for qib_parm_field
* @qib_param_field: pointer to 128 bytes or NULL, if no param field
* @qib_rflags: rflags to set
@@ -337,18 +336,16 @@
* @no_output_qs: number of output queues
* @input_handler: handler to be called for input queues
* @output_handler: handler to be called for output queues
- * @queue_start_poll_array: polling handlers (one per input queue or NULL)
+ * @irq_poll: Data IRQ polling handler (NULL when not supported)
* @scan_threshold: # of in-use buffers that triggers scan on output queue
* @int_parm: interruption parameter
- * @input_sbal_addr_array: address of no_input_qs * 128 pointers
- * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @input_sbal_addr_array: per-queue array, each element points to 128 SBALs
+ * @output_sbal_addr_array: per-queue array, each element points to 128 SBALs
* @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
*/
struct qdio_initialize {
- struct ccw_device *cdev;
unsigned char q_format;
unsigned char qdr_ac;
- unsigned char adapter_name[8];
unsigned int qib_param_field_format;
unsigned char *qib_param_field;
unsigned char qib_rflags;
@@ -358,43 +355,14 @@
unsigned int no_output_qs;
qdio_handler_t *input_handler;
qdio_handler_t *output_handler;
- void (**queue_start_poll_array) (struct ccw_device *, int,
- unsigned long);
+ void (*irq_poll)(struct ccw_device *cdev, unsigned long data);
unsigned int scan_threshold;
unsigned long int_parm;
- struct qdio_buffer **input_sbal_addr_array;
- struct qdio_buffer **output_sbal_addr_array;
+ struct qdio_buffer ***input_sbal_addr_array;
+ struct qdio_buffer ***output_sbal_addr_array;
struct qdio_outbuf_state *output_sbal_state_array;
};
-/**
- * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
- * @l3_ipv6_addr: entry contains IPv6 address
- * @l3_ipv4_addr: entry contains IPv4 address
- * @l2_addr_lnid: entry contains MAC address and VLAN ID
- */
-enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
-
-/**
- * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
- * @nit: Network interface token
- * @addr: Address of one of the three types
- *
- * The struct is passed to the callback function by qdio_brinfo_desc()
- */
-struct qdio_brinfo_entry_l3_ipv6 {
- u64 nit;
- struct { unsigned char _s6_addr[16]; } addr;
-} __packed;
-struct qdio_brinfo_entry_l3_ipv4 {
- u64 nit;
- struct { uint32_t _s_addr; } addr;
-} __packed;
-struct qdio_brinfo_entry_l2 {
- u64 nit;
- struct { u8 mac[6]; u16 lnid; } addr_lnid;
-} __packed;
-
#define QDIO_STATE_INACTIVE 0x00000002 /* after qdio_cleanup */
#define QDIO_STATE_ESTABLISHED 0x00000004 /* after qdio_establish */
#define QDIO_STATE_ACTIVE 0x00000008 /* after qdio_activate */
@@ -408,14 +376,16 @@
void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
-extern int qdio_allocate(struct qdio_initialize *);
-extern int qdio_establish(struct qdio_initialize *);
+extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
+ unsigned int no_output_qs);
+extern int qdio_establish(struct ccw_device *cdev,
+ struct qdio_initialize *init_data);
extern int qdio_activate(struct ccw_device *);
extern void qdio_release_aob(struct qaob *);
extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
unsigned int);
-extern int qdio_start_irq(struct ccw_device *, int);
-extern int qdio_stop_irq(struct ccw_device *, int);
+extern int qdio_start_irq(struct ccw_device *cdev);
+extern int qdio_stop_irq(struct ccw_device *cdev);
extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr,
bool is_input, unsigned int *bufnr,
@@ -423,10 +393,5 @@
extern int qdio_shutdown(struct ccw_device *, int);
extern int qdio_free(struct ccw_device *);
extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
-extern int qdio_pnso_brinfo(struct subchannel_id schid,
- int cnc, u16 *response,
- void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
- void *entry),
- void *priv);
#endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index c563f83..a7bdd12 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -114,8 +114,7 @@
void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
void sclp_early_detect(void);
void sclp_early_printk(const char *s);
-void sclp_early_printk_force(const char *s);
-void __sclp_early_printk(const char *s, unsigned int len, unsigned int force);
+void __sclp_early_printk(const char *s, unsigned int len);
int sclp_early_get_memsize(unsigned long *mem);
int sclp_early_get_hsa_size(unsigned long *hsa_size);
@@ -129,6 +128,8 @@
int sclp_chp_read_info(struct sclp_chp_info *info);
int sclp_pci_configure(u32 fid);
int sclp_pci_deconfigure(u32 fid);
+int sclp_ap_configure(u32 apid);
+int sclp_ap_deconfigure(u32 apid);
int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 42de04a..0c21514 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -26,14 +26,14 @@
* final .boot.data section, which should be identical in the decompressor and
* the decompressed kernel (that is checked during the build).
*/
-#define __bootdata(var) __section(.boot.data.var) var
+#define __bootdata(var) __section(".boot.data." #var) var
/*
* .boot.preserved.data is similar to .boot.data, but it is not part of the
* .init section and thus will be preserved for later use in the decompressed
* kernel.
*/
-#define __bootdata_preserved(var) __section(.boot.preserved.data.var) var
+#define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
extern unsigned long __sdma, __edma;
extern unsigned long __stext_dma, __etext_dma;
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index c59a835..a22a5a8 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -2,6 +2,10 @@
#ifndef _ASMS390_SET_MEMORY_H
#define _ASMS390_SET_MEMORY_H
+#include <linux/mutex.h>
+
+extern struct mutex cpa_mutex;
+
#define SET_MEMORY_RO 1UL
#define SET_MEMORY_RW 2UL
#define SET_MEMORY_NX 4UL
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index e6a5007..75a2ece 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -28,7 +28,6 @@
#define MACHINE_FLAG_DIAG9C BIT(3)
#define MACHINE_FLAG_ESOP BIT(4)
#define MACHINE_FLAG_IDTE BIT(5)
-#define MACHINE_FLAG_DIAG44 BIT(6)
#define MACHINE_FLAG_EDAT1 BIT(7)
#define MACHINE_FLAG_EDAT2 BIT(8)
#define MACHINE_FLAG_TOPOLOGY BIT(10)
@@ -82,12 +81,21 @@
char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */
};
+extern unsigned int zlib_dfltcc_support;
+#define ZLIB_DFLTCC_DISABLED 0
+#define ZLIB_DFLTCC_FULL 1
+#define ZLIB_DFLTCC_DEFLATE_ONLY 2
+#define ZLIB_DFLTCC_INFLATE_ONLY 3
+#define ZLIB_DFLTCC_FULL_DEBUG 4
+
extern int noexec_disabled;
extern int memory_end_set;
extern unsigned long memory_end;
extern unsigned long vmalloc_size;
extern unsigned long max_physmem_end;
-extern unsigned long __swsusp_reset_dma;
+
+/* The Write Back bit position in the physaddr is given by the SLPC PCI */
+extern unsigned long mio_wb_bit_mask;
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -96,7 +104,6 @@
#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
@@ -116,9 +123,6 @@
extern unsigned int console_devno;
extern unsigned int console_irq;
-extern char vmhalt_cmd[];
-extern char vmpoff_cmd[];
-
#define CONSOLE_IS_UNDEFINED (console_mode == 0)
#define CONSOLE_IS_SCLP (console_mode == 1)
#define CONSOLE_IS_3215 (console_mode == 2)
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index b157a81..01e3600 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -10,6 +10,7 @@
#include <asm/sigp.h>
#include <asm/lowcore.h>
+#include <asm/processor.h>
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
@@ -30,10 +31,10 @@
extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
extern void smp_save_dump_cpus(void);
-extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
+extern int smp_cpu_get_cpu_address(int cpu);
extern void smp_fill_possible_mask(void);
extern void smp_detect_cpus(void);
@@ -53,9 +54,14 @@
return cpu - (cpu % (smp_cpu_mtid + 1));
}
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+}
+
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
+extern void schedule_mcck_handler(void);
#endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index c02bff3..3a37172 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -85,7 +85,7 @@
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
typecheck(int, lp->lock);
- asm volatile(
+ asm_inline volatile(
ALTERNATIVE("", ".long 0xb2fa0070", 49) /* NIAI 7 */
" sth %1,%0\n"
: "=Q" (((unsigned short *) &lp->lock)[1])
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index e192681..90488b0 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -33,8 +33,8 @@
return addr >= info->begin && addr + len <= info->end;
}
-static inline unsigned long get_stack_pointer(struct task_struct *task,
- struct pt_regs *regs)
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+ struct pt_regs *regs)
{
if (regs)
return (unsigned long) kernel_stack_pointer(regs);
@@ -62,6 +62,17 @@
};
#endif
+/*
+ * Unlike current_stack_pointer() which simply returns current value of %r15
+ * current_frame_address() returns function stack frame address, which matches
+ * %r15 upon function invocation. It may differ from %r15 later if function
+ * allocates stack for local variables or new stack frame to call other
+ * functions.
+ */
+#define current_frame_address() \
+ ((unsigned long)__builtin_frame_address(0) - \
+ offsetof(struct stack_frame, back_chain))
+
#define CALL_ARGS_0() \
register unsigned long r2 asm("2")
#define CALL_ARGS_1(arg1) \
@@ -99,18 +110,20 @@
#define CALL_ON_STACK(fn, stack, nr, args...) \
({ \
+ unsigned long frame = current_frame_address(); \
CALL_ARGS_##nr(args); \
unsigned long prev; \
\
asm volatile( \
" la %[_prev],0(15)\n" \
- " la 15,0(%[_stack])\n" \
- " stg %[_prev],%[_bc](15)\n" \
+ " lg 15,%[_stack]\n" \
+ " stg %[_frame],%[_bc](15)\n" \
" brasl 14,%[_fn]\n" \
" la 15,0(%[_prev])\n" \
: [_prev] "=&a" (prev), CALL_FMT_##nr \
- : [_stack] "a" (stack), \
+ : [_stack] "R" (stack), \
[_bc] "i" (offsetof(struct stack_frame, back_chain)), \
+ [_frame] "d" (frame), \
[_fn] "X" (fn) : CALL_CLOBBER_##nr); \
r2; \
})
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
index f0ddefb..ba07463 100644
--- a/arch/s390/include/asm/stp.h
+++ b/arch/s390/include/asm/stp.h
@@ -6,43 +6,89 @@
#ifndef __S390_STP_H
#define __S390_STP_H
+#include <linux/compiler.h>
+
/* notifier for syncs */
extern struct atomic_notifier_head s390_epoch_delta_notifier;
/* STP interruption parameter */
struct stp_irq_parm {
- unsigned int _pad0 : 14;
- unsigned int tsc : 1; /* Timing status change */
- unsigned int lac : 1; /* Link availability change */
- unsigned int tcpc : 1; /* Time control parameter change */
- unsigned int _pad2 : 15;
-} __attribute__ ((packed));
+ u32 : 14;
+ u32 tsc : 1; /* Timing status change */
+ u32 lac : 1; /* Link availability change */
+ u32 tcpc : 1; /* Time control parameter change */
+ u32 : 15;
+} __packed;
#define STP_OP_SYNC 1
#define STP_OP_CTRL 3
struct stp_sstpi {
- unsigned int rsvd0;
- unsigned int rsvd1 : 8;
- unsigned int stratum : 8;
- unsigned int vbits : 16;
- unsigned int leaps : 16;
- unsigned int tmd : 4;
- unsigned int ctn : 4;
- unsigned int rsvd2 : 3;
- unsigned int c : 1;
- unsigned int tst : 4;
- unsigned int tzo : 16;
- unsigned int dsto : 16;
- unsigned int ctrl : 16;
- unsigned int rsvd3 : 16;
- unsigned int tto;
- unsigned int rsvd4;
- unsigned int ctnid[3];
- unsigned int rsvd5;
- unsigned int todoff[4];
- unsigned int rsvd6[48];
-} __attribute__ ((packed));
+ u32 : 32;
+ u32 tu : 1;
+ u32 lu : 1;
+ u32 : 6;
+ u32 stratum : 8;
+ u32 vbits : 16;
+ u32 leaps : 16;
+ u32 tmd : 4;
+ u32 ctn : 4;
+ u32 : 3;
+ u32 c : 1;
+ u32 tst : 4;
+ u32 tzo : 16;
+ u32 dsto : 16;
+ u32 ctrl : 16;
+ u32 : 16;
+ u32 tto;
+ u32 : 32;
+ u32 ctnid[3];
+ u32 : 32;
+ u32 todoff[4];
+ u32 rsvd[48];
+} __packed;
+
+struct stp_tzib {
+ u32 tzan : 16;
+ u32 : 16;
+ u32 tzo : 16;
+ u32 dsto : 16;
+ u32 stn;
+ u32 dstn;
+ u64 dst_on_alg;
+ u64 dst_off_alg;
+} __packed;
+
+struct stp_tcpib {
+ u32 atcode : 4;
+ u32 ntcode : 4;
+ u32 d : 1;
+ u32 : 23;
+ s32 tto;
+ struct stp_tzib atzib;
+ struct stp_tzib ntzib;
+ s32 adst_offset : 16;
+ s32 ndst_offset : 16;
+ u32 rsvd1;
+ u64 ntzib_update;
+ u64 ndsto_update;
+} __packed;
+
+struct stp_lsoib {
+ u32 p : 1;
+ u32 : 31;
+ s32 also : 16;
+ s32 nlso : 16;
+ u64 nlsout;
+} __packed;
+
+struct stp_stzi {
+ u32 rsvd0[3];
+ u64 data_ts;
+ u32 rsvd1[22];
+ struct stp_tcpib tcpib;
+ struct stp_lsoib lsoib;
+} __packed;
/* Functions needed by the machine check handler */
int stp_sync_check(void);
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 3c3d6fe..1320f42 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -30,7 +30,7 @@
})
#define __S390_SYS_STUBx(x, name, ...) \
- asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
+ asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO); \
asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
{ \
@@ -46,7 +46,7 @@
#define COMPAT_SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long __s390_compat_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO); \
+ ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO); \
asmlinkage long __s390_compat_sys_##sname(void)
#define SYSCALL_DEFINE0(sname) \
@@ -72,7 +72,7 @@
asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_compat_sys##name)))); \
- ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \
+ ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO); \
static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index e582fbe..13a04fc 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -24,7 +24,6 @@
#ifndef __ASSEMBLY__
#include <asm/lowcore.h>
#include <asm/page.h>
-#include <asm/processor.h>
#define STACK_INIT_OFFSET \
(THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index b6a4ce9..289aaff 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -49,11 +49,6 @@
asm volatile("sckc %0" : : "Q" (time));
}
-static inline void store_clock_comparator(__u64 *time)
-{
- asm volatile("stckc %0" : "=Q" (*time));
-}
-
void clock_comparator_work(void);
void __init time_early_init(void);
@@ -180,7 +175,6 @@
int get_phys_clock(unsigned long *clock);
void init_cpu_timer(void);
-unsigned long long monotonic_clock(void);
extern unsigned char tod_clock_base[16] __aligned(8);
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index aa406c0..954fa8c 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,7 +36,6 @@
#define p4d_free_tlb p4d_free_tlb
#define pud_free_tlb pud_free_tlb
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm-generic/tlb.h>
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 82703e0..6448bb5 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -5,8 +5,6 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
/*
* Flush all TLB entries on the local CPU.
@@ -32,8 +30,6 @@
: : "a" (opt), "a" (asce) : "cc");
}
-void smp_ptlb_all(void);
-
/*
* Flush all TLB entries on all CPUs.
*/
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index ef9dd25..3a0ac0c 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -16,8 +16,8 @@
unsigned short socket_id;
unsigned short book_id;
unsigned short drawer_id;
- unsigned short node_id;
unsigned short dedicated : 1;
+ int booted_cores;
cpumask_t thread_mask;
cpumask_t core_mask;
cpumask_t book_mask;
@@ -25,7 +25,6 @@
};
extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
-extern cpumask_t cpus_with_topology;
#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id)
@@ -37,6 +36,7 @@
#define topology_drawer_id(cpu) (cpu_topology[cpu].drawer_id)
#define topology_drawer_cpumask(cpu) (&cpu_topology[cpu].drawer_mask)
#define topology_cpu_dedicated(cpu) (cpu_topology[cpu].dedicated)
+#define topology_booted_cores(cpu) (cpu_topology[cpu].booted_cores)
#define mc_capable() 1
@@ -45,6 +45,7 @@
int topology_set_cpu_management(int fc);
void topology_schedule_update(void);
void store_topology(struct sysinfo_15_1_x *info);
+void update_cpu_masks(void);
void topology_expect_change(void);
const struct cpumask *cpu_coregroup_mask(int cpu);
@@ -54,6 +55,8 @@
static inline void topology_schedule_update(void) { }
static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
+static inline int topology_booted_cores(int cpu_nr) { return 1; }
+static inline void update_cpu_masks(void) { }
static inline void topology_expect_change(void) { }
#endif /* CONFIG_SCHED_TOPOLOGY */
@@ -71,14 +74,14 @@
#define cpu_to_node cpu_to_node
static inline int cpu_to_node(int cpu)
{
- return cpu_topology[cpu].node_id;
+ return 0;
}
/* Returns a pointer to the cpumask of CPUs on node 'node'. */
#define cpumask_of_node cpumask_of_node
static inline const struct cpumask *cpumask_of_node(int node)
{
- return &node_to_cpumask_map[node];
+ return cpu_possible_mask;
}
#define pcibus_to_node(bus) __pcibus_to_node(bus)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 3244388..c868e7e 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -32,7 +32,7 @@
#define USER_DS_SACF (3)
#define get_fs() (current->thread.mm_segment)
-#define segment_eq(a,b) (((a) & 2) == ((b) & 2))
+#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS)
void set_fs(mm_segment_t fs);
@@ -60,6 +60,9 @@
#define INLINE_COPY_TO_USER
#endif
+int __put_user_bad(void) __attribute__((noreturn));
+int __get_user_bad(void) __attribute__((noreturn));
+
#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
#define __put_get_user_asm(to, from, size, spec) \
@@ -109,6 +112,9 @@
(unsigned long *)x,
size, spec);
break;
+ default:
+ __put_user_bad();
+ break;
}
return rc;
}
@@ -139,6 +145,9 @@
(unsigned long __user *)ptr,
size, spec);
break;
+ default:
+ __get_user_bad();
+ break;
}
return rc;
}
@@ -179,7 +188,7 @@
default: \
__put_user_bad(); \
break; \
- } \
+ } \
__builtin_expect(__pu_err, 0); \
})
@@ -190,8 +199,6 @@
})
-int __put_user_bad(void) __attribute__((noreturn));
-
#define __get_user(x, ptr) \
({ \
int __gu_err = -EFAULT; \
@@ -238,8 +245,6 @@
__get_user(x, ptr); \
})
-int __get_user_bad(void) __attribute__((noreturn));
-
unsigned long __must_check
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
@@ -278,4 +283,115 @@
int copy_to_user_real(void __user *dest, void *src, unsigned long count);
void *s390_kernel_write(void *dst, const void *src, size_t size);
+#define HAVE_GET_KERNEL_NOFAULT
+
+int __noreturn __put_kernel_bad(void);
+
+#define __put_kernel_asm(val, to, insn) \
+({ \
+ int __rc; \
+ \
+ asm volatile( \
+ "0: " insn " %2,%1\n" \
+ "1: xr %0,%0\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %0,%3\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : "=d" (__rc), "+Q" (*(to)) \
+ : "d" (val), "K" (-EFAULT) \
+ : "cc"); \
+ __rc; \
+})
+
+#define __put_kernel_nofault(dst, src, type, err_label) \
+do { \
+ u64 __x = (u64)(*((type *)(src))); \
+ int __pk_err; \
+ \
+ switch (sizeof(type)) { \
+ case 1: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
+ break; \
+ case 2: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
+ break; \
+ case 4: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "st"); \
+ break; \
+ case 8: \
+ __pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+ break; \
+ default: \
+ __pk_err = __put_kernel_bad(); \
+ break; \
+ } \
+ if (unlikely(__pk_err)) \
+ goto err_label; \
+} while (0)
+
+int __noreturn __get_kernel_bad(void);
+
+#define __get_kernel_asm(val, from, insn) \
+({ \
+ int __rc; \
+ \
+ asm volatile( \
+ "0: " insn " %1,%2\n" \
+ "1: xr %0,%0\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: lhi %0,%3\n" \
+ " jg 2b\n" \
+ ".popsection\n" \
+ EX_TABLE(0b,3b) EX_TABLE(1b,3b) \
+ : "=d" (__rc), "+d" (val) \
+ : "Q" (*(from)), "K" (-EFAULT) \
+ : "cc"); \
+ __rc; \
+})
+
+#define __get_kernel_nofault(dst, src, type, err_label) \
+do { \
+ int __gk_err; \
+ \
+ switch (sizeof(type)) { \
+ case 1: { \
+ u8 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "ic"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ case 2: { \
+ u16 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "lh"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ case 4: { \
+ u32 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "l"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ case 8: { \
+ u64 __x = 0; \
+ \
+ __gk_err = __get_kernel_asm(__x, (type *)(src), "lg"); \
+ *((type *)(dst)) = (type)__x; \
+ break; \
+ }; \
+ default: \
+ __gk_err = __get_kernel_bad(); \
+ break; \
+ } \
+ if (unlikely(__gk_err)) \
+ goto err_label; \
+} while (0)
+
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index eaaefec..de9006b 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -35,7 +35,6 @@
struct task_struct *task;
struct pt_regs *regs;
unsigned long sp, ip;
- bool reuse_sp;
int graph_idx;
bool reliable;
bool error;
@@ -59,10 +58,11 @@
static inline void unwind_start(struct unwind_state *state,
struct task_struct *task,
struct pt_regs *regs,
- unsigned long sp)
+ unsigned long first_frame)
{
- sp = sp ? : get_stack_pointer(task, regs);
- __unwind_start(state, task, regs, sp);
+ task = task ?: current;
+ first_frame = first_frame ?: get_stack_pointer(task, regs);
+ __unwind_start(state, task, regs, first_frame);
}
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index ef3c00b..12c5f00 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -14,23 +14,67 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/bug.h>
+#include <linux/sched.h>
#include <asm/page.h>
+#include <asm/gmap.h>
#define UVC_RC_EXECUTED 0x0001
#define UVC_RC_INV_CMD 0x0002
#define UVC_RC_INV_STATE 0x0003
#define UVC_RC_INV_LEN 0x0005
#define UVC_RC_NO_RESUME 0x0007
+#define UVC_RC_NEED_DESTROY 0x8000
#define UVC_CMD_QUI 0x0001
+#define UVC_CMD_INIT_UV 0x000f
+#define UVC_CMD_CREATE_SEC_CONF 0x0100
+#define UVC_CMD_DESTROY_SEC_CONF 0x0101
+#define UVC_CMD_CREATE_SEC_CPU 0x0120
+#define UVC_CMD_DESTROY_SEC_CPU 0x0121
+#define UVC_CMD_CONV_TO_SEC_STOR 0x0200
+#define UVC_CMD_CONV_FROM_SEC_STOR 0x0201
+#define UVC_CMD_DESTR_SEC_STOR 0x0202
+#define UVC_CMD_SET_SEC_CONF_PARAMS 0x0300
+#define UVC_CMD_UNPACK_IMG 0x0301
+#define UVC_CMD_VERIFY_IMG 0x0302
+#define UVC_CMD_CPU_RESET 0x0310
+#define UVC_CMD_CPU_RESET_INITIAL 0x0311
+#define UVC_CMD_PREPARE_RESET 0x0320
+#define UVC_CMD_CPU_RESET_CLEAR 0x0321
+#define UVC_CMD_CPU_SET_STATE 0x0330
+#define UVC_CMD_SET_UNSHARE_ALL 0x0340
+#define UVC_CMD_PIN_PAGE_SHARED 0x0341
+#define UVC_CMD_UNPIN_PAGE_SHARED 0x0342
#define UVC_CMD_SET_SHARED_ACCESS 0x1000
#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
/* Bits in installed uv calls */
enum uv_cmds_inst {
BIT_UVC_CMD_QUI = 0,
+ BIT_UVC_CMD_INIT_UV = 1,
+ BIT_UVC_CMD_CREATE_SEC_CONF = 2,
+ BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
+ BIT_UVC_CMD_CREATE_SEC_CPU = 4,
+ BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
+ BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
+ BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+ BIT_UVC_CMD_SET_SEC_PARMS = 11,
+ BIT_UVC_CMD_UNPACK_IMG = 13,
+ BIT_UVC_CMD_VERIFY_IMG = 14,
+ BIT_UVC_CMD_CPU_RESET = 15,
+ BIT_UVC_CMD_CPU_RESET_INITIAL = 16,
+ BIT_UVC_CMD_CPU_SET_STATE = 17,
+ BIT_UVC_CMD_PREPARE_RESET = 18,
+ BIT_UVC_CMD_CPU_PERFORM_CLEAR_RESET = 19,
+ BIT_UVC_CMD_UNSHARE_ALL = 20,
+ BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
+ BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+};
+
+enum uv_feat_ind {
+ BIT_UV_FEAT_MISC = 0,
};
struct uv_cb_header {
@@ -40,13 +84,128 @@
u16 rrc; /* Return Reason Code */
} __packed __aligned(8);
+/* Query Ultravisor Information */
struct uv_cb_qui {
struct uv_cb_header header;
u64 reserved08;
u64 inst_calls_list[4];
- u64 reserved30[15];
+ u64 reserved30[2];
+ u64 uv_base_stor_len;
+ u64 reserved48;
+ u64 conf_base_phys_stor_len;
+ u64 conf_base_virt_stor_len;
+ u64 conf_virt_var_stor_len;
+ u64 cpu_stor_len;
+ u32 reserved70[3];
+ u32 max_num_sec_conf;
+ u64 max_guest_stor_addr;
+ u8 reserved88[158 - 136];
+ u16 max_guest_cpu_id;
+ u64 uv_feature_indications;
+ u8 reserveda0[200 - 168];
} __packed __aligned(8);
+/* Initialize Ultravisor */
+struct uv_cb_init {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 stor_origin;
+ u64 stor_len;
+ u64 reserved28[4];
+} __packed __aligned(8);
+
+/* Create Guest Configuration */
+struct uv_cb_cgc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 conf_base_stor_origin;
+ u64 conf_virt_stor_origin;
+ u64 reserved30;
+ u64 guest_stor_origin;
+ u64 guest_stor_len;
+ u64 guest_sca;
+ u64 guest_asce;
+ u64 reserved58[5];
+} __packed __aligned(8);
+
+/* Create Secure CPU */
+struct uv_cb_csc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u64 guest_handle;
+ u64 stor_origin;
+ u8 reserved30[6];
+ u16 num;
+ u64 state_origin;
+ u64 reserved40[4];
+} __packed __aligned(8);
+
+/* Convert to Secure */
+struct uv_cb_cts {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+} __packed __aligned(8);
+
+/* Convert from Secure / Pin Page Shared */
+struct uv_cb_cfs {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 paddr;
+} __packed __aligned(8);
+
+/* Set Secure Config Parameter */
+struct uv_cb_ssc {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 sec_header_origin;
+ u32 sec_header_len;
+ u32 reserved2c;
+ u64 reserved30[4];
+} __packed __aligned(8);
+
+/* Unpack */
+struct uv_cb_unp {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 guest_handle;
+ u64 gaddr;
+ u64 tweak[2];
+ u64 reserved38[3];
+} __packed __aligned(8);
+
+#define PV_CPU_STATE_OPR 1
+#define PV_CPU_STATE_STP 2
+#define PV_CPU_STATE_CHKSTP 3
+#define PV_CPU_STATE_OPR_LOAD 5
+
+struct uv_cb_cpu_set_state {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 cpu_handle;
+ u8 reserved20[7];
+ u8 state;
+ u64 reserved28[5];
+};
+
+/*
+ * A common UV call struct for calls that take no payload
+ * Examples:
+ * Destroy cpu/config
+ * Verify
+ */
+struct uv_cb_nodata {
+ struct uv_cb_header header;
+ u64 reserved08[2];
+ u64 handle;
+ u64 reserved20[4];
+} __packed __aligned(8);
+
+/* Set Shared Access */
struct uv_cb_share {
struct uv_cb_header header;
u64 reserved08[3];
@@ -54,21 +213,77 @@
u64 reserved28;
} __packed __aligned(8);
-static inline int uv_call(unsigned long r1, unsigned long r2)
+static inline int __uv_call(unsigned long r1, unsigned long r2)
{
int cc;
asm volatile(
- "0: .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
- " brc 3,0b\n"
- " ipm %[cc]\n"
- " srl %[cc],28\n"
+ " .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
: [cc] "=d" (cc)
: [r1] "a" (r1), [r2] "a" (r2)
: "memory", "cc");
return cc;
}
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ } while (cc > 1);
+ return cc;
+}
+
+/* Low level uv_call that avoids stalls for long running busy conditions */
+static inline int uv_call_sched(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ do {
+ cc = __uv_call(r1, r2);
+ cond_resched();
+ } while (cc > 1);
+ return cc;
+}
+
+/*
+ * special variant of uv_call that only transports the cpu or guest
+ * handle and the command, like destroy or verify.
+ */
+static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_nodata uvcb = {
+ .header.cmd = cmd,
+ .header.len = sizeof(uvcb),
+ .handle = handle,
+ };
+ int cc;
+
+ WARN(!handle, "No handle provided to Ultravisor call cmd %x\n", cmd);
+ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ return cc ? -EINVAL : 0;
+}
+
+struct uv_info {
+ unsigned long inst_calls_list[4];
+ unsigned long uv_base_stor_len;
+ unsigned long guest_base_stor_len;
+ unsigned long guest_virt_base_stor_len;
+ unsigned long guest_virt_var_stor_len;
+ unsigned long guest_cpu_stor_len;
+ unsigned long max_sec_stor_addr;
+ unsigned int max_num_sec_conf;
+ unsigned short max_guest_cpu_id;
+ unsigned long uv_feature_indications;
+};
+
+extern struct uv_info uv_info;
+
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
@@ -86,7 +301,7 @@
};
if (!is_prot_virt_guest())
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
/*
* Sharing is page wise, if we encounter addresses that are
* not page aligned, we assume something went wrong. If
@@ -121,11 +336,46 @@
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
-void uv_query_info(void);
#else
#define is_prot_virt_guest() 0
static inline int uv_set_shared(unsigned long addr) { return 0; }
static inline int uv_remove_shared(unsigned long addr) { return 0; }
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+extern int prot_virt_host;
+
+static inline int is_prot_virt_host(void)
+{
+ return prot_virt_host;
+}
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int uv_destroy_page(unsigned long paddr);
+int uv_convert_from_secure(unsigned long paddr);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+
+void setup_uv(void);
+void adjust_to_uv_max(unsigned long *vmax);
+#else
+#define is_prot_virt_host() 0
+static inline void setup_uv(void) {}
+static inline void adjust_to_uv_max(unsigned long *vmax) {}
+
+static inline int uv_destroy_page(unsigned long paddr)
+{
+ return 0;
+}
+
+static inline int uv_convert_from_secure(unsigned long paddr)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+void uv_query_info(void);
+#else
static inline void uv_query_info(void) {}
#endif
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index f3ba84f..29b44a9 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -2,6 +2,8 @@
#ifndef __S390_VDSO_H__
#define __S390_VDSO_H__
+#include <vdso/datapage.h>
+
/* Default link addresses for the vDSOs */
#define VDSO32_LBASE 0
#define VDSO64_LBASE 0
@@ -18,38 +20,22 @@
* itself and may change without notice.
*/
-struct vdso_data {
- __u64 tb_update_count; /* Timebase atomicity ctr 0x00 */
- __u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */
- __u64 xtime_clock_sec; /* Kernel time 0x10 */
- __u64 xtime_clock_nsec; /* 0x18 */
- __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */
- __u64 xtime_coarse_nsec; /* 0x28 */
- __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */
- __u64 wtom_clock_nsec; /* 0x38 */
- __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */
- __u64 wtom_coarse_nsec; /* 0x48 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */
- __u32 tz_dsttime; /* Type of dst correction 0x54 */
- __u32 ectg_available; /* ECTG instruction present 0x58 */
- __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */
- __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */
- __u32 ts_dir; /* TOD steering direction 0x64 */
- __u64 ts_end; /* TOD steering end 0x68 */
- __u32 hrtimer_res; /* hrtimer resolution 0x70 */
-};
-
struct vdso_per_cpu_data {
- __u64 ectg_timer_base;
- __u64 ectg_user_time;
- __u32 cpu_nr;
- __u32 node_id;
+ /*
+ * Note: node_id and cpu_nr must be at adjacent memory locations.
+ * VDSO userspace must read both values with a single instruction.
+ */
+ union {
+ __u64 getcpu_val;
+ struct {
+ __u32 node_id;
+ __u32 cpu_nr;
+ };
+ };
};
extern struct vdso_data *vdso_data;
-extern struct vdso_data boot_vdso_data;
-void vdso_alloc_boot_cpu(struct lowcore *lowcore);
int vdso_alloc_per_cpu(struct lowcore *lowcore);
void vdso_free_per_cpu(struct lowcore *lowcore);
diff --git a/arch/s390/include/asm/vdso/clocksource.h b/arch/s390/include/asm/vdso/clocksource.h
new file mode 100644
index 0000000..a93eda0
--- /dev/null
+++ b/arch/s390/include/asm/vdso/clocksource.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_CLOCKSOURCE_H
+#define __ASM_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES \
+ VDSO_CLOCKMODE_TOD
+
+#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
new file mode 100644
index 0000000..73ee891
--- /dev/null
+++ b/arch/s390/include/asm/vdso/data.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_VDSO_DATA_H
+#define __S390_ASM_VDSO_DATA_H
+
+#include <linux/types.h>
+#include <vdso/datapage.h>
+
+struct arch_vdso_data {
+ __s64 tod_steering_delta;
+ __u64 tod_steering_end;
+};
+
+#endif /* __S390_ASM_VDSO_DATA_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
new file mode 100644
index 0000000..bf12306
--- /dev/null
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_VDSO_GETTIMEOFDAY_H
+#define ASM_VDSO_GETTIMEOFDAY_H
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+#include <asm/timex.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <linux/compiler.h>
+
+#define vdso_calc_delta __arch_vdso_calc_delta
+static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+ return (cycles - last) * mult;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd)
+{
+ const struct vdso_data *vdso = __arch_get_vdso_data();
+ u64 adj, now;
+
+ now = get_tod_clock();
+ adj = vdso->arch_data.tod_steering_end - now;
+ if (unlikely((s64) adj > 0))
+ now += (vdso->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+ return now;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+ register unsigned long r1 __asm__("r1") = __NR_clock_gettime;
+ register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
+ register void *r3 __asm__("r3") = ts;
+
+ asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+ return r2;
+}
+
+static __always_inline
+long gettimeofday_fallback(register struct __kernel_old_timeval *tv,
+ register struct timezone *tz)
+{
+ register unsigned long r1 __asm__("r1") = __NR_gettimeofday;
+ register unsigned long r2 __asm__("r2") = (unsigned long)tv;
+ register void *r3 __asm__("r3") = tz;
+
+ asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+ return r2;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+ register unsigned long r1 __asm__("r1") = __NR_clock_getres;
+ register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
+ register void *r3 __asm__("r3") = ts;
+
+ asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+ return r2;
+}
+
+#endif
diff --git a/arch/s390/include/asm/vdso/processor.h b/arch/s390/include/asm/vdso/processor.h
new file mode 100644
index 0000000..cfcc3e1
--- /dev/null
+++ b/arch/s390/include/asm/vdso/processor.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#define cpu_relax() barrier()
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
new file mode 100644
index 0000000..6c67c08
--- /dev/null
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vdso.h>
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+
+static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __s390_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h
new file mode 100644
index 0000000..3ba3a6b
--- /dev/null
+++ b/arch/s390/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_S390_VMALLOC_H
+#define _ASM_S390_VMALLOC_H
+
+#endif /* _ASM_S390_VMALLOC_H */
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
index 42f707d..e601ada 100644
--- a/arch/s390/include/asm/vtimer.h
+++ b/arch/s390/include/asm/vtimer.h
@@ -25,8 +25,6 @@
extern int mod_virt_timer(struct vtimer_list *timer, u64 expires);
extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires);
extern int del_virt_timer(struct vtimer_list *timer);
-
-extern void init_cpu_vtimer(void);
extern void vtime_init(void);
#endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h
deleted file mode 100644
index c7c564d..0000000
--- a/arch/s390/include/uapi/asm/debug.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * S/390 debug facility
- *
- * Copyright IBM Corp. 1999, 2000
- */
-
-#ifndef _UAPIDEBUG_H
-#define _UAPIDEBUG_H
-
-#include <linux/fs.h>
-
-/* Note:
- * struct __debug_entry must be defined outside of #ifdef __KERNEL__
- * in order to allow a user program to analyze the 'raw'-view.
- */
-
-struct __debug_entry{
- union {
- struct {
- unsigned long long clock:52;
- unsigned long long exception:1;
- unsigned long long level:3;
- unsigned long long cpuid:8;
- } fields;
-
- unsigned long long stck;
- } id;
- void* caller;
-} __attribute__((packed));
-
-
-#define __DEBUG_FEATURE_VERSION 2 /* version of debug feature */
-
-#endif /* _UAPIDEBUG_H */
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
index 5b1c4f4..1030cd1 100644
--- a/arch/s390/include/uapi/asm/ipcbuf.h
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
#ifndef __S390_IPCBUF_H__
#define __S390_IPCBUF_H__
+#include <linux/posix_types.h>
+
/*
* The user_ipc_perm structure for S/390 architecture.
* Note extra padding because this structure is passed back and forth
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index 451ba7d..d1ecd5d 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -27,6 +27,7 @@
IPL_PBT_FCP = 0,
IPL_PBT_SCP_DATA = 1,
IPL_PBT_CCW = 2,
+ IPL_PBT_NVME = 4,
};
/* IPL Parameter Block 0 with common fields */
@@ -67,6 +68,30 @@
#define IPL_PB0_FCP_OPT_IPL 0x10
#define IPL_PB0_FCP_OPT_DUMP 0x20
+/* IPL Parameter Block 0 for NVMe */
+struct ipl_pb0_nvme {
+ __u32 len;
+ __u8 pbt;
+ __u8 reserved1[3];
+ __u8 loadparm[8];
+ __u8 reserved2[304];
+ __u8 opt;
+ __u8 reserved3[3];
+ __u32 fid;
+ __u8 reserved4[12];
+ __u32 nsid;
+ __u8 reserved5[4];
+ __u32 bootprog;
+ __u8 reserved6[12];
+ __u64 br_lba;
+ __u32 scp_data_len;
+ __u8 reserved7[260];
+ __u8 scp_data[];
+} __packed;
+
+#define IPL_PB0_NVME_OPT_IPL 0x10
+#define IPL_PB0_NVME_OPT_DUMP 0x20
+
/* IPL Parameter Block 0 for CCW */
struct ipl_pb0_ccw {
__u32 len;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 436ec76..7a6b148 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -231,11 +231,13 @@
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
- KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+ KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
@@ -264,7 +266,8 @@
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
- __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ __u64 diag318; /* diagnose 0x318 info */
+ __u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index e22f072..7349e96 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -25,20 +25,26 @@
#define MAXPROTKEYSIZE 64 /* a protected key blob may be up to 64 bytes */
#define MAXCLRKEYSIZE 32 /* a clear key value may be up to 32 bytes */
#define MAXAESCIPHERKEYSIZE 136 /* our aes cipher keys have always 136 bytes */
+#define MINEP11AESKEYBLOBSIZE 256 /* min EP11 AES key blob size */
+#define MAXEP11AESKEYBLOBSIZE 320 /* max EP11 AES key blob size */
-/* Minimum and maximum size of a key blob */
+/* Minimum size of a key blob */
#define MINKEYBLOBSIZE SECKEYBLOBSIZE
-#define MAXKEYBLOBSIZE MAXAESCIPHERKEYSIZE
/* defines for the type field within the pkey_protkey struct */
#define PKEY_KEYTYPE_AES_128 1
#define PKEY_KEYTYPE_AES_192 2
#define PKEY_KEYTYPE_AES_256 3
+#define PKEY_KEYTYPE_ECC 4
/* the newer ioctls use a pkey_key_type enum for type information */
enum pkey_key_type {
PKEY_TYPE_CCA_DATA = (__u32) 1,
PKEY_TYPE_CCA_CIPHER = (__u32) 2,
+ PKEY_TYPE_EP11 = (__u32) 3,
+ PKEY_TYPE_CCA_ECC = (__u32) 0x1f,
+ PKEY_TYPE_EP11_AES = (__u32) 6,
+ PKEY_TYPE_EP11_ECC = (__u32) 7,
};
/* the newer ioctls use a pkey_key_size enum for key size information */
@@ -87,6 +93,20 @@
};
/*
+ * EP11 key blobs of type PKEY_TYPE_EP11_AES and PKEY_TYPE_EP11_ECC
+ * are ep11 blobs prepended by this header:
+ */
+struct ep11kblob_header {
+ __u8 type; /* always 0x00 */
+ __u8 hver; /* header version, currently needs to be 0x00 */
+ __u16 len; /* total length in bytes (including this header) */
+ __u8 version; /* PKEY_TYPE_EP11_AES or PKEY_TYPE_EP11_ECC */
+ __u8 res0; /* unused */
+ __u16 bitlen; /* clear key bit len, 0 for unknown */
+ __u8 res1[8]; /* unused */
+} __packed;
+
+/*
* Generate CCA AES secure key.
*/
struct pkey_genseck {
@@ -200,7 +220,7 @@
/*
* Generate secure key, version 2.
- * Generate either a CCA AES secure key or a CCA AES cipher key.
+ * Generate CCA AES secure key, CCA AES cipher key or EP11 AES secure key.
* There needs to be a list of apqns given with at least one entry in there.
* All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
* is not supported. The implementation walks through the list of apqns and
@@ -210,10 +230,13 @@
* (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to
* generate a list of apqns based on the key type to generate.
* The keygenflags argument is passed to the low level generation functions
- * individual for the key type and has a key type specific meaning. Currently
- * only CCA AES cipher keys react to this parameter: Use one or more of the
- * PKEY_KEYGEN_* flags to widen the export possibilities. By default a cipher
- * key is only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * individual for the key type and has a key type specific meaning. When
+ * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_*
+ * flags to widen the export possibilities. By default a cipher key is
+ * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * The keygenflag argument for generating an EP11 AES key should either be 0
+ * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and
+ * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags.
*/
struct pkey_genseck2 {
struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets*/
@@ -229,8 +252,8 @@
/*
* Generate secure key from clear key value, version 2.
- * Construct a CCA AES secure key or CCA AES cipher key from a given clear key
- * value.
+ * Construct an CCA AES secure key, CCA AES cipher key or EP11 AES secure
+ * key from a given clear key value.
* There needs to be a list of apqns given with at least one entry in there.
* All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
* is not supported. The implementation walks through the list of apqns and
@@ -240,10 +263,13 @@
* (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to
* generate a list of apqns based on the key type to generate.
* The keygenflags argument is passed to the low level generation functions
- * individual for the key type and has a key type specific meaning. Currently
- * only CCA AES cipher keys react to this parameter: Use one or more of the
- * PKEY_KEYGEN_* flags to widen the export possibilities. By default a cipher
- * key is only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * individual for the key type and has a key type specific meaning. When
+ * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_*
+ * flags to widen the export possibilities. By default a cipher key is
+ * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * The keygenflag argument for generating an EP11 AES key should either be 0
+ * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and
+ * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags.
*/
struct pkey_clr2seck2 {
struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
@@ -266,14 +292,19 @@
* with one apqn able to handle this key.
* The function also checks for the master key verification patterns
* of the key matching to the current or alternate mkvp of the apqn.
- * Currently CCA AES secure keys and CCA AES cipher keys are supported.
- * The flags field is updated with some additional info about the apqn mkvp
+ * For CCA AES secure keys and CCA AES cipher keys this means to check
+ * the key's mkvp against the current or old mkvp of the apqns. The flags
+ * field is updated with some additional info about the apqn mkvp
* match: If the current mkvp matches to the key's mkvp then the
* PKEY_FLAGS_MATCH_CUR_MKVP bit is set, if the alternate mkvp matches to
* the key's mkvp the PKEY_FLAGS_MATCH_ALT_MKVP is set. For CCA keys the
* alternate mkvp is the old master key verification pattern.
* CCA AES secure keys are also checked to have the CPACF export allowed
* bit enabled (XPRTCPAC) in the kmf1 field.
+ * EP11 keys are also supported and the wkvp of the key is checked against
+ * the current wkvp of the apqns. There is no alternate for this type of
+ * key and so on a match the flag PKEY_FLAGS_MATCH_CUR_MKVP always is set.
+ * EP11 keys are also checked to have XCP_BLOB_PROTKEY_EXTRACTABLE set.
* The ioctl returns 0 as long as the given or found apqn matches to
* matches with the current or alternate mkvp to the key's mkvp. If the given
* apqn does not match or there is no such apqn found, -1 with errno
@@ -291,7 +322,7 @@
#define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2)
/*
- * Transform a key blob (of any type) into a protected key, version 2.
+ * Transform a key blob into a protected key, version 2.
* There needs to be a list of apqns given with at least one entry in there.
* All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
* is not supported. The implementation walks through the list of apqns and
@@ -300,6 +331,8 @@
* list is tried until success (return 0) or the end of the list is reached
* (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
* generate a list of apqns based on the key.
+ * Deriving ECC protected keys from ECC secure keys is not supported with
+ * this ioctl, use PKEY_KBLOB2PROTK3 for this purpose.
*/
struct pkey_kblob2pkey2 {
__u8 __user *key; /* in: pointer to key blob */
@@ -313,16 +346,20 @@
/*
* Build a list of APQNs based on a key blob given.
* Is able to find out which type of secure key is given (CCA AES secure
- * key or CCA AES cipher key) and tries to find all matching crypto cards
- * based on the MKVP and maybe other criterias (like CCA AES cipher keys
- * need a CEX5C or higher). The list of APQNs is further filtered by the key's
- * mkvp which needs to match to either the current mkvp or the alternate mkvp
- * (which is the old mkvp on CCA adapters) of the apqns. The flags argument may
- * be used to limit the matching apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is
- * given, only the current mkvp of each apqn is compared. Likewise with the
- * PKEY_FLAGS_MATCH_ALT_MKVP. If both are given, it is assumed to
- * return apqns where either the current or the alternate mkvp
- * matches. At least one of the matching flags needs to be given.
+ * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private
+ * key) and tries to find all matching crypto cards based on the MKVP and maybe
+ * other criterias (like CCA AES cipher keys need a CEX5C or higher, EP11 keys
+ * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of
+ * APQNs is further filtered by the key's mkvp which needs to match to either
+ * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters
+ * only) of the apqns. The flags argument may be used to limit the matching
+ * apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of
+ * each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both
+ * are given, it is assumed to return apqns where either the current or the
+ * alternate mkvp matches. At least one of the matching flags needs to be given.
+ * The flags argument for EP11 keys has no further action and is currently
+ * ignored (but needs to be given as PKEY_FLAGS_MATCH_CUR_MKVP) as there is only
+ * the wkvp from the key to match against the apqn's wkvp.
* The list of matching apqns is stored into the space given by the apqns
* argument and the number of stored entries goes into apqn_entries. If the list
* is empty (apqn_entries is 0) the apqn_entries field is updated to the number
@@ -348,14 +385,19 @@
* restrict the list by given master key verification patterns.
* For different key types there may be different ways to match the
* master key verification patterns. For CCA keys (CCA data key and CCA
- * cipher key) the first 8 bytes of cur_mkvp refer to the current mkvp value
- * of the apqn and the first 8 bytes of the alt_mkvp refer to the old mkvp.
- * The flags argument controls if the apqns current and/or alternate mkvp
+ * cipher key) the first 8 bytes of cur_mkvp refer to the current AES mkvp value
+ * of the apqn and the first 8 bytes of the alt_mkvp refer to the old AES mkvp.
+ * For CCA ECC keys it is similar but the match is against the APKA current/old
+ * mkvp. The flags argument controls if the apqns current and/or alternate mkvp
* should match. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current
* mkvp of each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP.
* If both are given, it is assumed to return apqns where either the
* current or the alternate mkvp matches. If no match flag is given
* (flags is 0) the mkvp values are ignored for the match process.
+ * For EP11 keys there is only the current wkvp. So if the apqns should also
+ * match to a given wkvp, then the PKEY_FLAGS_MATCH_CUR_MKVP flag should be
+ * set. The wkvp value is 32 bytes but only the leftmost 16 bytes are compared
+ * against the leftmost 16 byte of the wkvp of the apqn.
* The list of matching apqns is stored into the space given by the apqns
* argument and the number of stored entries goes into apqn_entries. If the list
* is empty (apqn_entries is 0) the apqn_entries field is updated to the number
@@ -376,4 +418,30 @@
};
#define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype)
+/*
+ * Transform a key blob into a protected key, version 3.
+ * The difference to version 2 of this ioctl is that the protected key
+ * buffer is now explicitly and not within a struct pkey_protkey any more.
+ * So this ioctl is also able to handle EP11 and CCA ECC secure keys and
+ * provide ECC protected keys.
+ * There needs to be a list of apqns given with at least one entry in there.
+ * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
+ * is not supported. The implementation walks through the list of apqns and
+ * tries to send the request to each apqn without any further checking (like
+ * card type or online state). If the apqn fails, simple the next one in the
+ * list is tried until success (return 0) or the end of the list is reached
+ * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
+ * generate a list of apqns based on the key.
+ */
+struct pkey_kblob2pkey3 {
+ __u8 __user *key; /* in: pointer to key blob */
+ __u32 keylen; /* in: key blob size */
+ struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
+ __u32 apqn_entries; /* in: # of apqn target list entries */
+ __u32 pkeytype; /* out: prot key type (enum pkey_key_type) */
+ __u32 pkeylen; /* in/out: size of pkey buffer/actual len of pkey */
+ __u8 __user *pkey; /* in: pkey blob buffer space ptr */
+};
+#define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3)
+
#endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 6ca1e68..ede3186 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -29,7 +29,7 @@
{ 0x13, "SIGP conditional emergency signal" }, \
{ 0x15, "SIGP sense running" }, \
{ 0x16, "SIGP set multithreading"}, \
- { 0x17, "SIGP store additional status ait address"}
+ { 0x17, "SIGP store additional status at address"}
#define icpt_prog_codes \
{ 0x0001, "Prog Operation" }, \
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index f9e5e1f..22fd202 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -36,12 +36,12 @@
* - length(n_modulus) = inputdatalength
*/
struct ica_rsa_modexpo {
- char __user *inputdata;
- unsigned int inputdatalength;
- char __user *outputdata;
- unsigned int outputdatalength;
- char __user *b_key;
- char __user *n_modulus;
+ __u8 __user *inputdata;
+ __u32 inputdatalength;
+ __u8 __user *outputdata;
+ __u32 outputdatalength;
+ __u8 __user *b_key;
+ __u8 __user *n_modulus;
};
/**
@@ -59,15 +59,15 @@
* - length(u_mult_inv) = inputdatalength/2 + 8
*/
struct ica_rsa_modexpo_crt {
- char __user *inputdata;
- unsigned int inputdatalength;
- char __user *outputdata;
- unsigned int outputdatalength;
- char __user *bp_key;
- char __user *bq_key;
- char __user *np_prime;
- char __user *nq_prime;
- char __user *u_mult_inv;
+ __u8 __user *inputdata;
+ __u32 inputdatalength;
+ __u8 __user *outputdata;
+ __u32 outputdatalength;
+ __u8 __user *bp_key;
+ __u8 __user *bq_key;
+ __u8 __user *np_prime;
+ __u8 __user *nq_prime;
+ __u8 __user *u_mult_inv;
};
/**
@@ -83,67 +83,67 @@
* key block
*/
struct CPRBX {
- unsigned short cprb_len; /* CPRB length 220 */
- unsigned char cprb_ver_id; /* CPRB version id. 0x02 */
- unsigned char pad_000[3]; /* Alignment pad bytes */
- unsigned char func_id[2]; /* function id 0x5432 */
- unsigned char cprb_flags[4]; /* Flags */
- unsigned int req_parml; /* request parameter buffer len */
- unsigned int req_datal; /* request data buffer */
- unsigned int rpl_msgbl; /* reply message block length */
- unsigned int rpld_parml; /* replied parameter block len */
- unsigned int rpl_datal; /* reply data block len */
- unsigned int rpld_datal; /* replied data block len */
- unsigned int req_extbl; /* request extension block len */
- unsigned char pad_001[4]; /* reserved */
- unsigned int rpld_extbl; /* replied extension block len */
- unsigned char padx000[16 - sizeof(char *)];
- unsigned char *req_parmb; /* request parm block 'address' */
- unsigned char padx001[16 - sizeof(char *)];
- unsigned char *req_datab; /* request data block 'address' */
- unsigned char padx002[16 - sizeof(char *)];
- unsigned char *rpl_parmb; /* reply parm block 'address' */
- unsigned char padx003[16 - sizeof(char *)];
- unsigned char *rpl_datab; /* reply data block 'address' */
- unsigned char padx004[16 - sizeof(char *)];
- unsigned char *req_extb; /* request extension block 'addr'*/
- unsigned char padx005[16 - sizeof(char *)];
- unsigned char *rpl_extb; /* reply extension block 'address'*/
- unsigned short ccp_rtcode; /* server return code */
- unsigned short ccp_rscode; /* server reason code */
- unsigned int mac_data_len; /* Mac Data Length */
- unsigned char logon_id[8]; /* Logon Identifier */
- unsigned char mac_value[8]; /* Mac Value */
- unsigned char mac_content_flgs;/* Mac content flag byte */
- unsigned char pad_002; /* Alignment */
- unsigned short domain; /* Domain */
- unsigned char usage_domain[4];/* Usage domain */
- unsigned char cntrl_domain[4];/* Control domain */
- unsigned char S390enf_mask[4];/* S/390 enforcement mask */
- unsigned char pad_004[36]; /* reserved */
+ __u16 cprb_len; /* CPRB length 220 */
+ __u8 cprb_ver_id; /* CPRB version id. 0x02 */
+ __u8 pad_000[3]; /* Alignment pad bytes */
+ __u8 func_id[2]; /* function id 0x5432 */
+ __u8 cprb_flags[4]; /* Flags */
+ __u32 req_parml; /* request parameter buffer len */
+ __u32 req_datal; /* request data buffer */
+ __u32 rpl_msgbl; /* reply message block length */
+ __u32 rpld_parml; /* replied parameter block len */
+ __u32 rpl_datal; /* reply data block len */
+ __u32 rpld_datal; /* replied data block len */
+ __u32 req_extbl; /* request extension block len */
+ __u8 pad_001[4]; /* reserved */
+ __u32 rpld_extbl; /* replied extension block len */
+ __u8 padx000[16 - sizeof(__u8 *)];
+ __u8 __user *req_parmb; /* request parm block 'address' */
+ __u8 padx001[16 - sizeof(__u8 *)];
+ __u8 __user *req_datab; /* request data block 'address' */
+ __u8 padx002[16 - sizeof(__u8 *)];
+ __u8 __user *rpl_parmb; /* reply parm block 'address' */
+ __u8 padx003[16 - sizeof(__u8 *)];
+ __u8 __user *rpl_datab; /* reply data block 'address' */
+ __u8 padx004[16 - sizeof(__u8 *)];
+ __u8 __user *req_extb; /* request extension block 'addr'*/
+ __u8 padx005[16 - sizeof(__u8 *)];
+ __u8 __user *rpl_extb; /* reply extension block 'address'*/
+ __u16 ccp_rtcode; /* server return code */
+ __u16 ccp_rscode; /* server reason code */
+ __u32 mac_data_len; /* Mac Data Length */
+ __u8 logon_id[8]; /* Logon Identifier */
+ __u8 mac_value[8]; /* Mac Value */
+ __u8 mac_content_flgs; /* Mac content flag byte */
+ __u8 pad_002; /* Alignment */
+ __u16 domain; /* Domain */
+ __u8 usage_domain[4]; /* Usage domain */
+ __u8 cntrl_domain[4]; /* Control domain */
+ __u8 S390enf_mask[4]; /* S/390 enforcement mask */
+ __u8 pad_004[36]; /* reserved */
} __attribute__((packed));
/**
* xcRB
*/
struct ica_xcRB {
- unsigned short agent_ID;
- unsigned int user_defined;
- unsigned short request_ID;
- unsigned int request_control_blk_length;
- unsigned char padding1[16 - sizeof(char *)];
- char __user *request_control_blk_addr;
- unsigned int request_data_length;
- char padding2[16 - sizeof(char *)];
- char __user *request_data_address;
- unsigned int reply_control_blk_length;
- char padding3[16 - sizeof(char *)];
- char __user *reply_control_blk_addr;
- unsigned int reply_data_length;
- char padding4[16 - sizeof(char *)];
- char __user *reply_data_addr;
- unsigned short priority_window;
- unsigned int status;
+ __u16 agent_ID;
+ __u32 user_defined;
+ __u16 request_ID;
+ __u32 request_control_blk_length;
+ __u8 _padding1[16 - sizeof(__u8 *)];
+ __u8 __user *request_control_blk_addr;
+ __u32 request_data_length;
+ __u8 _padding2[16 - sizeof(__u8 *)];
+ __u8 __user *request_data_address;
+ __u32 reply_control_blk_length;
+ __u8 _padding3[16 - sizeof(__u8 *)];
+ __u8 __user *reply_control_blk_addr;
+ __u32 reply_data_length;
+ __u8 __padding4[16 - sizeof(__u8 *)];
+ __u8 __user *reply_data_addr;
+ __u16 priority_window;
+ __u32 status;
} __attribute__((packed));
/**
@@ -161,17 +161,17 @@
* @payload_len: Payload length
*/
struct ep11_cprb {
- __u16 cprb_len;
- unsigned char cprb_ver_id;
- unsigned char pad_000[2];
- unsigned char flags;
- unsigned char func_id[2];
- __u32 source_id;
- __u32 target_id;
- __u32 ret_code;
- __u32 reserved1;
- __u32 reserved2;
- __u32 payload_len;
+ __u16 cprb_len;
+ __u8 cprb_ver_id;
+ __u8 pad_000[2];
+ __u8 flags;
+ __u8 func_id[2];
+ __u32 source_id;
+ __u32 target_id;
+ __u32 ret_code;
+ __u32 reserved1;
+ __u32 reserved2;
+ __u32 payload_len;
} __attribute__((packed));
/**
@@ -197,13 +197,13 @@
*/
struct ep11_urb {
__u16 targets_num;
- __u64 targets;
+ __u8 __user *targets;
__u64 weight;
__u64 req_no;
__u64 req_len;
- __u64 req;
+ __u8 __user *req;
__u64 resp_len;
- __u64 resp;
+ __u8 __user *resp;
} __attribute__((packed));
/**
@@ -237,7 +237,9 @@
struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT];
};
-#define AUTOSELECT 0xFFFFFFFF
+#define AUTOSELECT 0xFFFFFFFF
+#define AUTOSEL_AP ((__u16) 0xFFFF)
+#define AUTOSEL_DOM ((__u16) 0xFFFF)
#define ZCRYPT_IOCTL_MAGIC 'z'
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
index c5f676c..bbb90f9 100644
--- a/arch/s390/kernel/.gitignore
+++ b/arch/s390/kernel/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7edbbcd..dd73b7f 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -33,11 +33,6 @@
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
-#
-# Pass UTS_MACHINE for user_regset definition
-#
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
@@ -54,7 +49,7 @@
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
-obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
+obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
@@ -62,6 +57,7 @@
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_KPROBES) += kprobes_insn_page.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_UPROBES) += uprobes.o
@@ -70,7 +66,7 @@
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
-obj-$(CONFIG_IMA) += ima_arch.o
+obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
@@ -78,7 +74,7 @@
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_diag.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/
-obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index a65cb49..483051e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -11,9 +11,9 @@
#include <linux/kvm_host.h>
#include <linux/sched.h>
#include <linux/purgatory.h>
+#include <linux/pgtable.h>
#include <asm/idle.h>
#include <asm/vdso.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/stacktrace.h>
@@ -53,42 +53,13 @@
/* stack_frame offsets */
OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
OFFSET(__SF_GPRS, stack_frame, gprs);
- OFFSET(__SF_EMPTY, stack_frame, empty1);
- OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
- OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
- OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
- OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
+ OFFSET(__SF_EMPTY, stack_frame, empty1[0]);
+ OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]);
+ OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]);
+ OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]);
+ OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]);
BLANK();
- /* timeval/timezone offsets for use by vdso */
- OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
- OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp);
- OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec);
- OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec);
- OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec);
- OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec);
- OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec);
- OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec);
- OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec);
- OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec);
- OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest);
- OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
- OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
- OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
- OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
- OFFSET(__VDSO_TS_END, vdso_data, ts_end);
- OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
- OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
- OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
- OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
- OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id);
- BLANK();
- /* constants used by the vdso */
- DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
- DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
- DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
+ OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
BLANK();
/* idle data offsets */
OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f96a585..205b2e2 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -141,7 +141,7 @@
while (count) {
from = __pa(src);
if (!OLDMEM_BASE && from < sclp.hsa_size) {
- /* Copy from zfcpdump HSA area */
+ /* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - from);
rc = memcpy_hsa_kernel(dst, from, len);
if (rc)
@@ -184,7 +184,7 @@
while (count) {
from = __pa(src);
if (!OLDMEM_BASE && from < sclp.hsa_size) {
- /* Copy from zfcpdump HSA area */
+ /* Copy from zfcp/nvme dump HSA area */
len = min(count, sclp.hsa_size - from);
rc = memcpy_hsa_user(dst, from, len);
if (rc)
@@ -258,7 +258,7 @@
}
/*
- * Remap "oldmem" for zfcpdump
+ * Remap "oldmem" for zfcp/nvme dump
*
* We only map available memory above HSA size. Memory below HSA size
* is read on demand using the copy_oldmem_page() function.
@@ -283,7 +283,7 @@
}
/*
- * Remap "oldmem" for kdump or zfcpdump
+ * Remap "oldmem" for kdump or zfcp/nvme dump
*/
int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
unsigned long pfn, unsigned long size, pgprot_t prot)
@@ -549,8 +549,7 @@
int cnt = 0;
u64 idx;
- for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
- MEMBLOCK_NONE, NULL, NULL, NULL)
+ for_each_physmem_range(idx, &oldmem_type, NULL, NULL)
cnt++;
return cnt;
}
@@ -563,8 +562,7 @@
phys_addr_t start, end;
u64 idx;
- for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
- MEMBLOCK_NONE, &start, &end, NULL) {
+ for_each_physmem_range(idx, &oldmem_type, &start, &end) {
phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
phdr->p_offset = start;
@@ -634,11 +632,11 @@
u32 alloc_size;
u64 hdr_off;
- /* If we are not in kdump or zfcpdump mode return */
- if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+ /* If we are not in kdump or zfcp/nvme dump mode return */
+ if (!OLDMEM_BASE && !is_ipl_type_dump())
return 0;
- /* If we cannot get HSA size for zfcpdump return error */
- if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size)
+ /* If we cannot get HSA size for zfcp/nvme dump return error */
+ if (is_ipl_type_dump() && !sclp.hsa_size)
return -ENODEV;
/* For kdump, exclude previous crashkernel memory */
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index b1aadc3..89fbfb3 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -2,7 +2,7 @@
/*
* S/390 debug facility
*
- * Copyright IBM Corp. 1999, 2012
+ * Copyright IBM Corp. 1999, 2020
*
* Author(s): Michael Holzheu (holzheu@de.ibm.com),
* Holger Smolinski (Holger.Smolinski@de.ibm.com)
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/minmax.h>
#include <linux/debugfs.h>
#include <asm/debug.h>
@@ -90,27 +91,13 @@
size_t user_buf_size, loff_t *offset);
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, const char *in_buf);
-static int debug_raw_format_fn(debug_info_t *id,
- struct debug_view *view, char *out_buf,
- const char *in_buf);
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf);
-
static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
char *out_buf, debug_sprintf_entry_t *curr_event);
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
+static void debug_events_append(debug_info_t *dest, debug_info_t *src);
/* globals */
-struct debug_view debug_raw_view = {
- "raw",
- NULL,
- &debug_raw_header_fn,
- &debug_raw_format_fn,
- NULL,
- NULL
-};
-EXPORT_SYMBOL(debug_raw_view);
-
struct debug_view debug_hex_ascii_view = {
"hex_ascii",
NULL,
@@ -414,7 +401,7 @@
act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
[p_info->act_page] + p_info->act_entry);
- if (act_entry->id.stck == 0LL)
+ if (act_entry->clock == 0LL)
goto out; /* empty entry */
if (view->header_proc)
len += view->header_proc(id_snap, view, p_info->act_area,
@@ -752,35 +739,28 @@
*/
static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area)
{
- debug_entry_t ***new_areas;
+ debug_info_t *new_id;
unsigned long flags;
- int rc = 0;
if (!id || (nr_areas <= 0) || (pages_per_area < 0))
return -EINVAL;
- if (pages_per_area > 0) {
- new_areas = debug_areas_alloc(pages_per_area, nr_areas);
- if (!new_areas) {
- pr_info("Allocating memory for %i pages failed\n",
- pages_per_area);
- rc = -ENOMEM;
- goto out;
- }
- } else {
- new_areas = NULL;
+
+ new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+ id->level, ALL_AREAS);
+ if (!new_id) {
+ pr_info("Allocating memory for %i pages failed\n",
+ pages_per_area);
+ return -ENOMEM;
}
+
spin_lock_irqsave(&id->lock, flags);
- debug_areas_free(id);
- id->areas = new_areas;
- id->nr_areas = nr_areas;
- id->pages_per_area = pages_per_area;
- id->active_area = 0;
- memset(id->active_entries, 0, sizeof(int)*id->nr_areas);
- memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
+ debug_events_append(new_id, id);
+ debug_areas_swap(new_id, id);
+ debug_info_free(new_id);
spin_unlock_irqrestore(&id->lock, flags);
pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area);
-out:
- return rc;
+
+ return 0;
}
/**
@@ -847,6 +827,42 @@
id->active_entries[id->active_area]);
}
+/* Swap debug areas of a and b. */
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b)
+{
+ swap(a->nr_areas, b->nr_areas);
+ swap(a->pages_per_area, b->pages_per_area);
+ swap(a->areas, b->areas);
+ swap(a->active_area, b->active_area);
+ swap(a->active_pages, b->active_pages);
+ swap(a->active_entries, b->active_entries);
+}
+
+/* Append all debug events in active area from source to destination log. */
+static void debug_events_append(debug_info_t *dest, debug_info_t *src)
+{
+ debug_entry_t *from, *to, *last;
+
+ if (!src->areas || !dest->areas)
+ return;
+
+ /* Loop over all entries in src, starting with oldest. */
+ from = get_active_entry(src);
+ last = from;
+ do {
+ if (from->clock != 0LL) {
+ to = get_active_entry(dest);
+ memset(to, 0, dest->entry_size);
+ memcpy(to, from, min(src->entry_size,
+ dest->entry_size));
+ proceed_active_entry(dest);
+ }
+
+ proceed_active_entry(src);
+ from = get_active_entry(src);
+ } while (from != last);
+}
+
/*
* debug_finish_entry:
* - set timestamp, caller address, cpu number etc.
@@ -855,12 +871,17 @@
static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
int level, int exception)
{
- active->id.stck = get_tod_clock_fast() -
- *(unsigned long long *) &tod_clock_base[1];
- active->id.fields.cpuid = smp_processor_id();
+ unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ unsigned long timestamp;
+
+ get_tod_clock_ext(clk);
+ timestamp = *(unsigned long *) &clk[0] >> 4;
+ timestamp -= TOD_UNIX_EPOCH >> 12;
+ active->clock = timestamp;
+ active->cpu = smp_processor_id();
active->caller = __builtin_return_address(0);
- active->id.fields.exception = exception;
- active->id.fields.level = level;
+ active->exception = exception;
+ active->level = level;
proceed_active_entry(id);
if (exception)
proceed_active_area(id);
@@ -878,7 +899,7 @@
* if debug_active is already off
*/
static int s390dbf_procactive(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
if (!write || debug_stoppable || !debug_active)
return proc_dointvec(table, write, buffer, lenp, ppos);
@@ -1396,32 +1417,6 @@
}
/*
- * prints debug header in raw format
- */
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
- int area, debug_entry_t *entry, char *out_buf)
-{
- int rc;
-
- rc = sizeof(debug_entry_t);
- memcpy(out_buf, entry, sizeof(debug_entry_t));
- return rc;
-}
-
-/*
- * prints debug data in raw format
- */
-static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view,
- char *out_buf, const char *in_buf)
-{
- int rc;
-
- rc = id->buf_size;
- memcpy(out_buf, in_buf, id->buf_size);
- return rc;
-}
-
-/*
* prints debug data in hex/ascii format
*/
static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
@@ -1450,25 +1445,24 @@
int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
int area, debug_entry_t *entry, char *out_buf)
{
- unsigned long base, sec, usec;
+ unsigned long sec, usec;
unsigned long caller;
unsigned int level;
char *except_str;
int rc = 0;
- level = entry->id.fields.level;
- base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
- sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
+ level = entry->level;
+ sec = entry->clock;
usec = do_div(sec, USEC_PER_SEC);
- if (entry->id.fields.exception)
+ if (entry->exception)
except_str = "*";
else
except_str = "-";
caller = (unsigned long) entry->caller;
- rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ",
+ rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK ",
area, sec, usec, level, except_str,
- entry->id.fields.cpuid, (void *)caller);
+ entry->cpu, (void *)caller);
return rc;
}
EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index ccba63a..b8b0cd7 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -104,18 +104,7 @@
.show = show_diag_stat,
};
-static int show_diag_stat_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &show_diag_stat_sops);
-}
-
-static const struct file_operations show_diag_stat_fops = {
- .open = show_diag_stat_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
+DEFINE_SEQ_ATTRIBUTE(show_diag_stat);
static int __init show_diag_stat_init(void)
{
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 03e35b3..5412efe 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -482,31 +482,37 @@
return (int) (ptr - buffer);
}
+static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len)
+{
+ if (user_mode(regs)) {
+ if (copy_from_user(dst, (char __user *)src, len))
+ return -EFAULT;
+ } else {
+ if (copy_from_kernel_nofault(dst, src, len))
+ return -EFAULT;
+ }
+ return 0;
+}
+
void show_code(struct pt_regs *regs)
{
char *mode = user_mode(regs) ? "User" : "Krnl";
unsigned char code[64];
char buffer[128], *ptr;
- mm_segment_t old_fs;
unsigned long addr;
int start, end, opsize, hops, i;
/* Get a snapshot of the 64 bytes surrounding the fault address. */
- old_fs = get_fs();
- set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
addr = regs->psw.addr - 34 + start;
- if (__copy_from_user(code + start - 2,
- (char __user *) addr, 2))
+ if (copy_from_regs(regs, code + start - 2, (void *)addr, 2))
break;
}
for (end = 32; end < 64; end += 2) {
addr = regs->psw.addr + end - 32;
- if (__copy_from_user(code + end,
- (char __user *) addr, 2))
+ if (copy_from_regs(regs, code + end, (void *)addr, 2))
break;
}
- set_fs(old_fs);
/* Code snapshot useable ? */
if ((regs->psw.addr & 1) || start >= end) {
printk("%s Code: Bad PSW.\n", mode);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 34bdc60..0dc4b25 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -38,6 +38,7 @@
return "unknown";
}
}
+EXPORT_SYMBOL_GPL(stack_type_name);
static inline bool in_stack(unsigned long sp, struct stack_info *info,
enum stack_type type, unsigned long low,
@@ -93,7 +94,9 @@
if (!sp)
goto unknown;
- task = task ? : current;
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (sp & 0x7)
+ goto unknown;
/* Check per-task stack */
if (in_task_stack(sp, task, info))
@@ -123,17 +126,16 @@
return -EINVAL;
}
-void show_stack(struct task_struct *task, unsigned long *stack)
+void show_stack(struct task_struct *task, unsigned long *stack,
+ const char *loglvl)
{
struct unwind_state state;
- printk("Call Trace:\n");
- if (!task)
- task = current;
+ printk("%sCall Trace:\n", loglvl);
unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
- printk(state.reliable ? " [<%016lx>] %pSR \n" :
- "([<%016lx>] %pSR)\n",
- state.ip, (void *) state.ip);
+ printk(state.reliable ? "%s [<%016lx>] %pSR \n" :
+ "%s([<%016lx>] %pSR)\n",
+ loglvl, state.ip, (void *) state.ip);
debug_show_held_locks(task ? : current);
}
@@ -174,7 +176,7 @@
show_registers(regs);
/* Show stack backtrace if pt_regs is from kernel mode */
if (!user_mode(regs))
- show_stack(NULL, (unsigned long *) regs->gprs[15]);
+ show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT);
show_last_breaking_event(regs);
}
@@ -194,6 +196,8 @@
regs->int_code >> 17, ++die_counter);
#ifdef CONFIG_PREEMPT
pr_cont("PREEMPT ");
+#elif defined(CONFIG_PREEMPT_RT)
+ pr_cont("PREEMPT_RT ");
#endif
pr_cont("SMP ");
if (debug_pagealloc_enabled())
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index eb89cb0..985e1e7 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
+#include <asm/switch_to.h>
#include "entry.h"
static void __init reset_tod_clock(void)
@@ -205,21 +206,6 @@
S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
}
-static __init void detect_diag44(void)
-{
- int rc;
-
- diag_stat_inc(DIAG_STAT_X044);
- asm volatile(
- " diag 0,0,0x44\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-}
-
static __init void detect_machine_facilities(void)
{
if (test_facility(8)) {
@@ -240,7 +226,7 @@
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
__ctl_set_bit(0, 17);
}
- if (test_facility(130)) {
+ if (test_facility(130) && !noexec_disabled) {
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
__ctl_set_bit(0, 20);
}
@@ -266,6 +252,24 @@
#endif
}
+static inline void setup_control_registers(void)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, 0, 0);
+ reg |= CR0_LOW_ADDRESS_PROTECTION;
+ reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+ reg |= CR0_EXTERNAL_CALL_SUBMASK;
+ __ctl_load(reg, 0, 0);
+}
+
+static inline void setup_access_registers(void)
+{
+ unsigned int acrs[NUM_ACRS] = { 0 };
+
+ restore_access_regs(acrs);
+}
+
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -274,34 +278,6 @@
}
early_param("novx", disable_vector_extension);
-static int __init noexec_setup(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (!rc && !enabled) {
- /* Disable no-execute support */
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
- __ctl_clear_bit(0, 20);
- }
- return rc;
-}
-early_param("noexec", noexec_setup);
-
-static int __init cad_setup(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (!rc && enabled && test_facility(128))
- /* Enable problem state CAD. */
- __ctl_set_bit(2, 3);
- return rc;
-}
-early_param("cad", cad_setup);
-
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
static void __init setup_boot_command_line(void)
{
@@ -333,10 +309,11 @@
setup_arch_string();
setup_boot_command_line();
detect_diag9c();
- detect_diag44();
detect_machine_facilities();
save_vector_registers();
setup_topology();
sclp_early_detect();
+ setup_control_registers();
+ setup_access_registers();
lockdep_on();
}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index 6f24d83..d9d53f4 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -10,7 +10,7 @@
static void sclp_early_write(struct console *con, const char *s, unsigned int len)
{
- __sclp_early_printk(s, len, 0);
+ __sclp_early_printk(s, len);
}
static struct console sclp_early_console = {
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 5cba181..963e8cb 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -55,14 +55,11 @@
_TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
- _CIF_ASCE_SECONDARY | _CIF_FPU)
+_CIF_WORK = (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU)
_PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
_LPP_OFFSET = __LC_LPP
-#define BASED(name) name-cleanup_critical(%r13)
-
.macro TRACE_IRQS_ON
#ifdef CONFIG_TRACE_IRQFLAGS
basr %r2,%r0
@@ -113,31 +110,69 @@
#endif
.endm
- .macro SWITCH_ASYNC savearea,timer
+ .macro SWITCH_ASYNC savearea,timer,clock
tmhh %r8,0x0001 # interrupting from user ?
- jnz 2f
+ jnz 4f
+#if IS_ENABLED(CONFIG_KVM)
lgr %r14,%r9
- cghi %r14,__LC_RETURN_LPSWE
- je 0f
- slg %r14,BASED(.Lcritical_start)
- clg %r14,BASED(.Lcritical_length)
- jhe 1f
-0:
+ larl %r13,.Lsie_gmap
+ slgr %r14,%r13
+ lghi %r13,.Lsie_done - .Lsie_gmap
+ clgr %r14,%r13
+ jhe 0f
lghi %r11,\savearea # inside critical section, do cleanup
- brasl %r14,cleanup_critical
- tmhh %r8,0x0001 # retest problem state after cleanup
- jnz 2f
-1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
+ brasl %r14,.Lcleanup_sie
+#endif
+0: larl %r13,.Lpsw_idle_exit
+ cgr %r13,%r9
+ jne 3f
+
+ larl %r1,smp_cpu_mtid
+ llgf %r1,0(%r1)
+ ltgr %r1,%r1
+ jz 2f # no SMT, skip mt_cycles calculation
+ .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
+ larl %r3,mt_cycles
+ ag %r3,__LC_PERCPU_OFFSET
+ la %r4,__SF_EMPTY+16(%r15)
+1: lg %r0,0(%r3)
+ slg %r0,0(%r4)
+ alg %r0,64(%r4)
+ stg %r0,0(%r3)
+ la %r3,8(%r3)
+ la %r4,8(%r4)
+ brct %r1,1b
+
+2: mvc __CLOCK_IDLE_EXIT(8,%r2), \clock
+ mvc __TIMER_IDLE_EXIT(8,%r2), \timer
+ # account system time going idle
+ ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
+
+ lg %r13,__LC_STEAL_TIMER
+ alg %r13,__CLOCK_IDLE_ENTER(%r2)
+ slg %r13,__LC_LAST_UPDATE_CLOCK
+ stg %r13,__LC_STEAL_TIMER
+
+ mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+
+ lg %r13,__LC_SYSTEM_TIMER
+ alg %r13,__LC_LAST_UPDATE_TIMER
+ slg %r13,__TIMER_IDLE_ENTER(%r2)
+ stg %r13,__LC_SYSTEM_TIMER
+ mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+
+ nihh %r8,0xfcfd # clear wait state and irq bits
+3: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
slgr %r14,%r15
srag %r14,%r14,STACK_SHIFT
- jnz 3f
+ jnz 5f
CHECK_STACK \savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 4f
-2: UPDATE_VTIME %r14,%r15,\timer
+ j 6f
+4: UPDATE_VTIME %r14,%r15,\timer
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-3: lg %r15,__LC_ASYNC_STACK # load async stack
-4: la %r11,STACK_FRAME_OVERHEAD(%r15)
+5: lg %r15,__LC_ASYNC_STACK # load async stack
+6: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
.macro UPDATE_VTIME w1,w2,enter_timer
@@ -152,12 +187,30 @@
mvc __LC_LAST_UPDATE_TIMER(8),\enter_timer
.endm
- .macro REENABLE_IRQS
+ .macro RESTORE_SM_CLEAR_PER
stg %r8,__LC_RETURN_PSW
ni __LC_RETURN_PSW,0xbf
ssm __LC_RETURN_PSW
.endm
+ .macro ENABLE_INTS
+ stosm __SF_EMPTY(%r15),3
+ .endm
+
+ .macro ENABLE_INTS_TRACE
+ TRACE_IRQS_ON
+ ENABLE_INTS
+ .endm
+
+ .macro DISABLE_INTS
+ stnsm __SF_EMPTY(%r15),0xfc
+ .endm
+
+ .macro DISABLE_INTS_TRACE
+ DISABLE_INTS
+ TRACE_IRQS_OFF
+ .endm
+
.macro STCK savearea
#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
.insn s,0xb27c0000,\savearea # store clock fast
@@ -254,8 +307,6 @@
BR_EX %r14
ENDPROC(__switch_to)
-.L__critical_start:
-
#if IS_ENABLED(CONFIG_KVM)
/*
* sie64a calling convention:
@@ -288,7 +339,6 @@
BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_entry:
sie 0(%r14)
-.Lsie_exit:
BPOFF
BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
.Lsie_skip:
@@ -336,21 +386,19 @@
/*
* SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
+ * are entered with interrupts disabled.
*/
ENTRY(system_call)
stpt __LC_SYNC_ENTER_TIMER
-.Lsysc_stmg:
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
BPOFF
lg %r12,__LC_CURRENT
- lghi %r13,__TASK_thread
lghi %r14,_PIF_SYSCALL
.Lsysc_per:
+ lghi %r13,__TASK_thread
lg %r15,__LC_KERNEL_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
-.Lsysc_vtime:
UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
stmg %r0,%r7,__PT_R0(%r11)
@@ -358,6 +406,8 @@
mvc __PT_PSW(16,%r11),__LC_SVC_OLD_PSW
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
stg %r14,__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ ENABLE_INTS
.Lsysc_do_svc:
# clear user controlled register to prevent speculative use
xgr %r0,%r0
@@ -373,7 +423,6 @@
jnl .Lsysc_nr_ok
slag %r8,%r1,3
.Lsysc_nr_ok:
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stg %r2,__PT_ORIG_GPR2(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lg %r9,0(%r8,%r10) # get system call add.
@@ -389,30 +438,30 @@
#endif
LOCKDEP_SYS_EXIT
.Lsysc_tif:
+ DISABLE_INTS
TSTMSK __PT_FLAGS(%r11),_PIF_WORK
jnz .Lsysc_work
TSTMSK __TI_flags(%r12),_TIF_WORK
jnz .Lsysc_work # check for work
- TSTMSK __LC_CPU_FLAGS,_CIF_WORK
+ TSTMSK __LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU)
jnz .Lsysc_work
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
.Lsysc_restore:
- lg %r14,__LC_VDSO_PER_CPU
- lmg %r0,%r10,__PT_R0(%r11)
+ DISABLE_INTS
+ TSTMSK __LC_CPU_FLAGS, _CIF_FPU
+ jz .Lsysc_skip_fpu
+ brasl %r14,load_fpu_regs
+.Lsysc_skip_fpu:
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
-.Lsysc_exit_timer:
stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
- lmg %r11,%r15,__PT_R11(%r11)
- b __LC_RETURN_LPSWE(%r0)
-.Lsysc_done:
+ lmg %r0,%r15,__PT_R0(%r11)
+ b __LC_RETURN_LPSWE
#
# One of the work bits is on. Find out which one.
#
.Lsysc_work:
- TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
- jo .Lsysc_mcck_pending
+ ENABLE_INTS
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lsysc_reschedule
TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
@@ -436,11 +485,9 @@
jo .Lsysc_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lsysc_notify_resume
- TSTMSK __LC_CPU_FLAGS,_CIF_FPU
- jo .Lsysc_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lsysc_asce
- j .Lsysc_return # beware of critical section cleanup
+ j .Lsysc_return
#
# _TIF_NEED_RESCHED is set, call schedule
@@ -450,13 +497,6 @@
jg schedule
#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lsysc_mcck_pending:
- larl %r14,.Lsysc_return
- jg s390_handle_mcck # TIF bit will be cleared by handler
-
-#
# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
#
.Lsysc_asce:
@@ -475,12 +515,6 @@
larl %r14,.Lsysc_return
jg set_fs_fixup
-#
-# CIF_FPU is set, restore floating-point controls and floating-point registers.
-#
-.Lsysc_vxrs:
- larl %r14,.Lsysc_return
- jg load_fpu_regs
#
# _TIF_SIGPENDING is set, call do_signal
@@ -564,7 +598,6 @@
jnh .Lsysc_tracenogo
sllg %r8,%r2,3
lg %r9,0(%r8,%r10)
-.Lsysc_tracego:
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lg %r2,__PT_ORIG_GPR2(%r11)
@@ -585,8 +618,6 @@
la %r11,STACK_FRAME_OVERHEAD(%r15)
lg %r12,__LC_CURRENT
brasl %r14,schedule_tail
- TRACE_IRQS_ON
- ssm __LC_SVC_NEW_PSW # reenable interrupts
tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
jne .Lsysc_tracenogo
# it's a kernel thread
@@ -620,15 +651,16 @@
lghi %r10,1
0: lg %r12,__LC_CURRENT
lghi %r11,0
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
tmhh %r8,0x0001 # test problem state bit
jnz 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
# cleanup critical section for program checks in sie64a
lgr %r14,%r9
- slg %r14,BASED(.Lsie_critical_start)
- clg %r14,BASED(.Lsie_critical_length)
+ larl %r13,.Lsie_gmap
+ slgr %r14,%r13
+ lghi %r13,.Lsie_done - .Lsie_gmap
+ clgr %r14,%r13
jhe 1f
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
@@ -680,8 +712,8 @@
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-6: REENABLE_IRQS
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+6: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ RESTORE_SM_CLEAR_PER
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
nill %r10,0x007f
@@ -702,8 +734,8 @@
# PER event in supervisor state, must be kprobes
#
.Lpgm_kprobe:
- REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ RESTORE_SM_CLEAR_PER
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_per_trap
j .Lpgm_return
@@ -713,11 +745,10 @@
#
.Lpgm_svcper:
mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
- lghi %r13,__TASK_thread
larl %r14,.Lsysc_per
stg %r14,__LC_RETURN_PSW+8
lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
- lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs
+ lpswe __LC_RETURN_PSW # branch to .Lsysc_per
ENDPROC(pgm_check_handler)
/*
@@ -729,9 +760,8 @@
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK
stmg %r0,%r7,__PT_R0(%r11)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
@@ -747,10 +777,10 @@
stmg %r8,%r9,__PT_PSW(%r11)
mvc __PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
jo .Lio_restore
TRACE_IRQS_OFF
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
.Lio_loop:
lgr %r2,%r11 # pass pointer to pt_regs
lghi %r3,IO_INTERRUPT
@@ -767,25 +797,20 @@
j .Lio_loop
.Lio_return:
LOCKDEP_SYS_EXIT
- TRACE_IRQS_ON
-.Lio_tif:
TSTMSK __TI_flags(%r12),_TIF_WORK
jnz .Lio_work # there is work to do (signals etc.)
TSTMSK __LC_CPU_FLAGS,_CIF_WORK
jnz .Lio_work
.Lio_restore:
- lg %r14,__LC_VDSO_PER_CPU
- lmg %r0,%r10,__PT_R0(%r11)
+ TRACE_IRQS_ON
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jno .Lio_exit_kernel
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
-.Lio_exit_timer:
stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
.Lio_exit_kernel:
- lmg %r11,%r15,__PT_R11(%r11)
- b __LC_RETURN_LPSWE(%r0)
+ lmg %r0,%r15,__PT_R0(%r11)
+ b __LC_RETURN_LPSWE
.Lio_done:
#
@@ -800,7 +825,7 @@
.Lio_work:
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jo .Lio_work_user # yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
# check for preemptive scheduling
icm %r0,15,__LC_PREEMPT_COUNT
jnz .Lio_restore # preemption is disabled
@@ -813,9 +838,6 @@
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
- # TRACE_IRQS_ON already done at .Lio_return, call
- # TRACE_IRQS_OFF to keep things symmetrical
- TRACE_IRQS_OFF
brasl %r14,preempt_schedule_irq
j .Lio_return
#else
@@ -835,9 +857,6 @@
#
# One of the work bits is on. Find out which one.
#
-.Lio_work_tif:
- TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
- jo .Lio_mcck_pending
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jo .Lio_reschedule
#ifdef CONFIG_LIVEPATCH
@@ -854,15 +873,6 @@
jo .Lio_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
jnz .Lio_asce
- j .Lio_return # beware of critical section cleanup
-
-#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lio_mcck_pending:
- # TRACE_IRQS_ON already done at .Lio_return
- brasl %r14,s390_handle_mcck # TIF bit will be cleared by handler
- TRACE_IRQS_OFF
j .Lio_return
#
@@ -895,23 +905,19 @@
# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
#
.Lio_guarded_storage:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,gs_load_bc_cb
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
#
# _TIF_NEED_RESCHED is set, call schedule
#
.Lio_reschedule:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
brasl %r14,schedule # call scheduler
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
#
@@ -928,24 +934,20 @@
# _TIF_SIGPENDING or is set, call do_signal
#
.Lio_sigpending:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_signal
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
#
# _TIF_NOTIFY_RESUME or is set, call do_notify_resume
#
.Lio_notify_resume:
- # TRACE_IRQS_ON already done at .Lio_return
- ssm __LC_SVC_NEW_PSW # reenable interrupts
+ ENABLE_INTS_TRACE
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,do_notify_resume
- ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- TRACE_IRQS_OFF
+ DISABLE_INTS_TRACE
j .Lio_return
ENDPROC(io_int_handler)
@@ -958,9 +960,8 @@
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r12,__LC_CURRENT
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
- SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
+ SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK
stmg %r0,%r7,__PT_R0(%r11)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
@@ -979,10 +980,10 @@
mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
mvc __PT_INT_PARM_LONG(8,%r11),0(%r1)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+ xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ
jo .Lio_restore
TRACE_IRQS_OFF
- xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
lghi %r3,EXT_INTERRUPT
brasl %r14,do_IRQ
@@ -990,12 +991,12 @@
ENDPROC(ext_int_handler)
/*
- * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
+ * Load idle PSW.
*/
ENTRY(psw_idle)
stg %r14,(__SF_GPRS+8*8)(%r15)
stg %r3,__SF_EMPTY(%r15)
- larl %r1,.Lpsw_idle_lpsw+4
+ larl %r1,.Lpsw_idle_exit
stg %r1,__SF_EMPTY+8(%r15)
larl %r1,smp_cpu_mtid
llgf %r1,0(%r1)
@@ -1007,10 +1008,9 @@
BPON
STCK __CLOCK_IDLE_ENTER(%r2)
stpt __TIMER_IDLE_ENTER(%r2)
-.Lpsw_idle_lpsw:
lpswe __SF_EMPTY(%r15)
+.Lpsw_idle_exit:
BR_EX %r14
-.Lpsw_idle_end:
ENDPROC(psw_idle)
/*
@@ -1021,6 +1021,7 @@
* of the register contents at return from io or a system call.
*/
ENTRY(save_fpu_regs)
+ stnsm __SF_EMPTY(%r15),0xfc
lg %r2,__LC_CURRENT
aghi %r2,__TASK_thread
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
@@ -1052,6 +1053,7 @@
.Lsave_fpu_regs_done:
oi __LC_CPU_FLAGS+7,_CIF_FPU
.Lsave_fpu_regs_exit:
+ ssm __SF_EMPTY(%r15)
BR_EX %r14
.Lsave_fpu_regs_end:
ENDPROC(save_fpu_regs)
@@ -1068,6 +1070,7 @@
* %r4
*/
load_fpu_regs:
+ stnsm __SF_EMPTY(%r15),0xfc
lg %r4,__LC_CURRENT
aghi %r4,__TASK_thread
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
@@ -1099,12 +1102,11 @@
.Lload_fpu_regs_done:
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
.Lload_fpu_regs_exit:
+ ssm __SF_EMPTY(%r15)
BR_EX %r14
.Lload_fpu_regs_end:
ENDPROC(load_fpu_regs)
-.L__critical_end:
-
/*
* Machine check handler routines
*/
@@ -1117,7 +1119,6 @@
lam %a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
lg %r12,__LC_CURRENT
- larl %r13,cleanup_critical
lmg %r8,%r9,__LC_MCK_OLD_PSW
TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
@@ -1183,7 +1184,7 @@
TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
- SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
+ SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK
.Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
@@ -1203,27 +1204,23 @@
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,s390_do_machine_check
- tm __PT_PSW+1(%r11),0x01 # returning to user ?
- jno .Lmcck_return
+ cghi %r2,0
+ je .Lmcck_return
lg %r1,__LC_KERNEL_STACK # switch to kernel stack
mvc STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1
- TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
- jno .Lmcck_return
TRACE_IRQS_OFF
brasl %r14,s390_handle_mcck
TRACE_IRQS_ON
.Lmcck_return:
- lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
jno 0f
BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
stpt __LC_EXIT_TIMER
- mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
0: lmg %r11,%r15,__PT_R11(%r11)
b __LC_RETURN_MCCK_LPSWE
@@ -1281,265 +1278,23 @@
ENDPROC(stack_overflow)
#endif
-ENTRY(cleanup_critical)
- cghi %r9,__LC_RETURN_LPSWE
- je .Lcleanup_lpswe
#if IS_ENABLED(CONFIG_KVM)
- clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
- jl 0f
- clg %r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
- jl .Lcleanup_sie
-#endif
- clg %r9,BASED(.Lcleanup_table) # system_call
- jl 0f
- clg %r9,BASED(.Lcleanup_table+8) # .Lsysc_do_svc
- jl .Lcleanup_system_call
- clg %r9,BASED(.Lcleanup_table+16) # .Lsysc_tif
- jl 0f
- clg %r9,BASED(.Lcleanup_table+24) # .Lsysc_restore
- jl .Lcleanup_sysc_tif
- clg %r9,BASED(.Lcleanup_table+32) # .Lsysc_done
- jl .Lcleanup_sysc_restore
- clg %r9,BASED(.Lcleanup_table+40) # .Lio_tif
- jl 0f
- clg %r9,BASED(.Lcleanup_table+48) # .Lio_restore
- jl .Lcleanup_io_tif
- clg %r9,BASED(.Lcleanup_table+56) # .Lio_done
- jl .Lcleanup_io_restore
- clg %r9,BASED(.Lcleanup_table+64) # psw_idle
- jl 0f
- clg %r9,BASED(.Lcleanup_table+72) # .Lpsw_idle_end
- jl .Lcleanup_idle
- clg %r9,BASED(.Lcleanup_table+80) # save_fpu_regs
- jl 0f
- clg %r9,BASED(.Lcleanup_table+88) # .Lsave_fpu_regs_end
- jl .Lcleanup_save_fpu_regs
- clg %r9,BASED(.Lcleanup_table+96) # load_fpu_regs
- jl 0f
- clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
- jl .Lcleanup_load_fpu_regs
-0: BR_EX %r14,%r11
-ENDPROC(cleanup_critical)
-
- .align 8
-.Lcleanup_table:
- .quad system_call
- .quad .Lsysc_do_svc
- .quad .Lsysc_tif
- .quad .Lsysc_restore
- .quad .Lsysc_done
- .quad .Lio_tif
- .quad .Lio_restore
- .quad .Lio_done
- .quad psw_idle
- .quad .Lpsw_idle_end
- .quad save_fpu_regs
- .quad .Lsave_fpu_regs_end
- .quad load_fpu_regs
- .quad .Lload_fpu_regs_end
-
-#if IS_ENABLED(CONFIG_KVM)
-.Lcleanup_table_sie:
- .quad .Lsie_gmap
- .quad .Lsie_done
-
.Lcleanup_sie:
- cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
- je 1f
- slg %r9,BASED(.Lsie_crit_mcck_start)
- clg %r9,BASED(.Lsie_crit_mcck_length)
- jh 1f
- oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+ cghi %r11,__LC_SAVE_AREA_ASYNC #Is this in normal interrupt?
+ je 1f
+ larl %r13,.Lsie_entry
+ slgr %r9,%r13
+ lghi %r13,.Lsie_skip - .Lsie_entry
+ clgr %r9,%r13
+ jh 1f
+ oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+1: BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
lg %r9,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
BR_EX %r14,%r11
-#endif
-.Lcleanup_system_call:
- # check if stpt has been executed
- clg %r9,BASED(.Lcleanup_system_call_insn)
- jh 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: # check if stmg has been executed
- clg %r9,BASED(.Lcleanup_system_call_insn+8)
- jh 0f
- mvc __LC_SAVE_AREA_SYNC(64),0(%r11)
-0: # check if base register setup + TIF bit load has been done
- clg %r9,BASED(.Lcleanup_system_call_insn+16)
- jhe 0f
- # set up saved register r12 task struct pointer
- stg %r12,32(%r11)
- # set up saved register r13 __TASK_thread offset
- mvc 40(8,%r11),BASED(.Lcleanup_system_call_const)
-0: # check if the user time update has been done
- clg %r9,BASED(.Lcleanup_system_call_insn+24)
- jh 0f
- lg %r15,__LC_EXIT_TIMER
- slg %r15,__LC_SYNC_ENTER_TIMER
- alg %r15,__LC_USER_TIMER
- stg %r15,__LC_USER_TIMER
-0: # check if the system time update has been done
- clg %r9,BASED(.Lcleanup_system_call_insn+32)
- jh 0f
- lg %r15,__LC_LAST_UPDATE_TIMER
- slg %r15,__LC_EXIT_TIMER
- alg %r15,__LC_SYSTEM_TIMER
- stg %r15,__LC_SYSTEM_TIMER
-0: # update accounting time stamp
- mvc __LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
- BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
- # set up saved register r11
- lg %r15,__LC_KERNEL_STACK
- la %r9,STACK_FRAME_OVERHEAD(%r15)
- stg %r9,24(%r11) # r11 pt_regs pointer
- # fill pt_regs
- mvc __PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
- stmg %r0,%r7,__PT_R0(%r9)
- mvc __PT_PSW(16,%r9),__LC_SVC_OLD_PSW
- mvc __PT_INT_CODE(4,%r9),__LC_SVC_ILC
- xc __PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
- mvi __PT_FLAGS+7(%r9),_PIF_SYSCALL
- # setup saved register r15
- stg %r15,56(%r11) # r15 stack pointer
- # set new psw address and exit
- larl %r9,.Lsysc_do_svc
- BR_EX %r14,%r11
-.Lcleanup_system_call_insn:
- .quad system_call
- .quad .Lsysc_stmg
- .quad .Lsysc_per
- .quad .Lsysc_vtime+36
- .quad .Lsysc_vtime+42
-.Lcleanup_system_call_const:
- .quad __TASK_thread
-
-.Lcleanup_sysc_tif:
- larl %r9,.Lsysc_tif
- BR_EX %r14,%r11
-
-.Lcleanup_sysc_restore:
- # check if stpt has been executed
- clg %r9,BASED(.Lcleanup_sysc_restore_insn)
- jh 0f
- mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: clg %r9,BASED(.Lcleanup_sysc_restore_insn+8)
- je 1f
- lg %r9,24(%r11) # get saved pointer to pt_regs
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
- mvc 0(64,%r11),__PT_R8(%r9)
- lmg %r0,%r7,__PT_R0(%r9)
-.Lcleanup_lpswe:
-1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_EX %r14,%r11
-.Lcleanup_sysc_restore_insn:
- .quad .Lsysc_exit_timer
- .quad .Lsysc_done - 4
-
-.Lcleanup_io_tif:
- larl %r9,.Lio_tif
- BR_EX %r14,%r11
-
-.Lcleanup_io_restore:
- # check if stpt has been executed
- clg %r9,BASED(.Lcleanup_io_restore_insn)
- jh 0f
- mvc __LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0: clg %r9,BASED(.Lcleanup_io_restore_insn+8)
- je 1f
- lg %r9,24(%r11) # get saved r11 pointer to pt_regs
- mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
- mvc 0(64,%r11),__PT_R8(%r9)
- lmg %r0,%r7,__PT_R0(%r9)
-1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_EX %r14,%r11
-.Lcleanup_io_restore_insn:
- .quad .Lio_exit_timer
- .quad .Lio_done - 4
-
-.Lcleanup_idle:
- ni __LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
- # copy interrupt clock & cpu timer
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
- cghi %r11,__LC_SAVE_AREA_ASYNC
- je 0f
- mvc __CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
- mvc __TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0: # check if stck & stpt have been executed
- clg %r9,BASED(.Lcleanup_idle_insn)
- jhe 1f
- mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
- mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1: # calculate idle cycles
- clg %r9,BASED(.Lcleanup_idle_insn)
- jl 3f
- larl %r1,smp_cpu_mtid
- llgf %r1,0(%r1)
- ltgr %r1,%r1
- jz 3f
- .insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
- larl %r3,mt_cycles
- ag %r3,__LC_PERCPU_OFFSET
- la %r4,__SF_EMPTY+16(%r15)
-2: lg %r0,0(%r3)
- slg %r0,0(%r4)
- alg %r0,64(%r4)
- stg %r0,0(%r3)
- la %r3,8(%r3)
- la %r4,8(%r4)
- brct %r1,2b
-3: # account system time going idle
- lg %r9,__LC_STEAL_TIMER
- alg %r9,__CLOCK_IDLE_ENTER(%r2)
- slg %r9,__LC_LAST_UPDATE_CLOCK
- stg %r9,__LC_STEAL_TIMER
- mvc __LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
- lg %r9,__LC_SYSTEM_TIMER
- alg %r9,__LC_LAST_UPDATE_TIMER
- slg %r9,__TIMER_IDLE_ENTER(%r2)
- stg %r9,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
- # prepare return psw
- nihh %r8,0xfcfd # clear irq & wait state bits
- lg %r9,48(%r11) # return from psw_idle
- BR_EX %r14,%r11
-.Lcleanup_idle_insn:
- .quad .Lpsw_idle_lpsw
-
-.Lcleanup_save_fpu_regs:
- larl %r9,save_fpu_regs
- BR_EX %r14,%r11
-
-.Lcleanup_load_fpu_regs:
- larl %r9,load_fpu_regs
- BR_EX %r14,%r11
-
-/*
- * Integer constants
- */
- .align 8
-.Lcritical_start:
- .quad .L__critical_start
-.Lcritical_length:
- .quad .L__critical_end - .L__critical_start
-#if IS_ENABLED(CONFIG_KVM)
-.Lsie_critical_start:
- .quad .Lsie_gmap
-.Lsie_critical_length:
- .quad .Lsie_done - .Lsie_gmap
-.Lsie_crit_mcck_start:
- .quad .Lsie_entry
-.Lsie_crit_mcck_length:
- .quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
#define SYSCALL(esame,emu) .quad __s390x_ ## esame
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index b2956d4..d2ca3fe 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -9,7 +9,6 @@
#include <asm/idle.h>
extern void *restart_stack;
-extern unsigned long suspend_zero_pages;
void system_call(void);
void pgm_check_handler(void);
@@ -17,13 +16,15 @@
void io_int_handler(void);
void mcck_int_handler(void);
void restart_int_handler(void);
-void restart_call_handler(void);
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
+void do_secure_storage_violation(struct pt_regs *regs);
void addressing_exception(struct pt_regs *regs);
void data_exception(struct pt_regs *regs);
@@ -45,6 +46,7 @@
void transaction_exception(struct pt_regs *regs);
void translation_exception(struct pt_regs *regs);
void vector_exception(struct pt_regs *regs);
+void monitor_event_exception(struct pt_regs *regs);
void do_per_trap(struct pt_regs *regs);
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
@@ -58,12 +60,10 @@
void __init init_IRQ(void);
void do_IRQ(struct pt_regs *regs, int irq);
void do_restart(void);
-void __init startup_init_nobss(void);
void __init startup_init(void);
void die(struct pt_regs *regs, const char *str);
int setup_profiling_timer(unsigned int multiplier);
void __init time_init(void);
-void s390_early_resume(void);
unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
@@ -88,4 +88,6 @@
unsigned long stack_alloc(void);
void stack_free(unsigned long stack);
+extern char kprobes_insn_page[];
+
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 7df6b26..923eccc 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -73,31 +73,6 @@
#endif
}
-static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
- if (insn->opc == BREAKPOINT_INSTRUCTION)
- return 1;
-#endif
- return 0;
-}
-
-static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
- insn->opc = BREAKPOINT_INSTRUCTION;
- insn->disp = KPROBE_ON_FTRACE_NOP;
-#endif
-}
-
-static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
- insn->opc = BREAKPOINT_INSTRUCTION;
- insn->disp = KPROBE_ON_FTRACE_CALL;
-#endif
-}
-
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
@@ -109,22 +84,12 @@
{
struct ftrace_insn orig, new, old;
- if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
if (addr == MCOUNT_ADDR) {
/* Initial code replacement */
ftrace_generate_orig_insn(&orig);
ftrace_generate_nop_insn(&new);
- } else if (is_kprobe_on_ftrace(&old)) {
- /*
- * If we find a breakpoint instruction, a kprobe has been
- * placed at the beginning of the function. We write the
- * constant KPROBE_ON_FTRACE_NOP into the remaining four
- * bytes of the original instruction so that the kprobes
- * handler can execute a nop, if it reaches this breakpoint.
- */
- ftrace_generate_kprobe_call_insn(&orig);
- ftrace_generate_kprobe_nop_insn(&new);
} else {
/* Replace ftrace call with a nop. */
ftrace_generate_call_insn(&orig, rec->ip);
@@ -141,23 +106,12 @@
{
struct ftrace_insn orig, new, old;
- if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+ if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
- if (is_kprobe_on_ftrace(&old)) {
- /*
- * If we find a breakpoint instruction, a kprobe has been
- * placed at the beginning of the function. We write the
- * constant KPROBE_ON_FTRACE_CALL into the remaining four
- * bytes of the original instruction so that the kprobes
- * handler can execute a brasl if it reaches this breakpoint.
- */
- ftrace_generate_kprobe_nop_insn(&orig);
- ftrace_generate_kprobe_call_insn(&new);
- } else {
- /* Replace nop with an ftrace call. */
- ftrace_generate_nop_insn(&orig);
- ftrace_generate_call_insn(&new, rec->ip);
- }
+ /* Replace nop with an ftrace call. */
+ ftrace_generate_nop_insn(&orig);
+ ftrace_generate_call_insn(&new, rec->ip);
+
/* Verify that the to be replaced code matches what we expect. */
if (memcmp(&orig, &old, sizeof(old)))
return -EINVAL;
@@ -243,3 +197,45 @@
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb;
+ struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
+
+ if (unlikely(!p) || kprobe_disabled(p))
+ return;
+
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ return;
+ }
+
+ __this_cpu_write(current_kprobe, p);
+
+ kcb = get_kprobe_ctlblk();
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ instruction_pointer_set(regs, ip);
+
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+
+ instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE);
+
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ }
+ __this_cpu_write(current_kprobe, NULL);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ return 0;
+}
+#endif
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 0d9ee19..8b88dbb 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,30 +26,17 @@
0: larl %r1,tod_clock_base
mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
- larl %r0,boot_vdso_data
- stg %r0,__LC_VDSO_PER_CPU
#
# Setup stack
#
larl %r14,init_task
stg %r14,__LC_CURRENT
- larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
+ larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
#ifdef CONFIG_KASAN
brasl %r14,kasan_early_init
#endif
-#
-# Early machine initialization and detection functions.
-#
- brasl %r14,startup_init
-
-# check control registers
- stctg %c0,%c15,0(%r15)
- oi 6(%r15),0x60 # enable sigp emergency & external call
- oi 4(%r15),0x10 # switch on low address proctection
- lctlg %c0,%c15,0(%r15)
-
- lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess
- brasl %r14,start_kernel # go to C code
+ brasl %r14,startup_init # s390 specific early init
+ brasl %r14,start_kernel # common init code
#
# We returned from start_kernel ?!? PANIK
#
@@ -59,4 +46,3 @@
.align 16
.LPG1:
.Ldw: .quad 0x0002000180000000,0x0000000000000000
-.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 8f84568..2b85096 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/sched/cputime.h>
+#include <trace/events/power.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include "entry.h"
@@ -24,28 +25,27 @@
{
struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
unsigned long long idle_time;
- unsigned long psw_mask;
+ unsigned long psw_mask, flags;
- trace_hardirqs_on();
/* Wait for external, I/O or machine check interrupt. */
psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
+ raw_local_irq_save(flags);
/* Call the assembler magic in entry.S */
psw_idle(idle, psw_mask);
-
- trace_hardirqs_off();
+ raw_local_irq_restore(flags);
/* Account time spent with enabled wait psw loaded as idle time. */
- write_seqcount_begin(&idle->seqcount);
+ raw_write_seqcount_begin(&idle->seqcount);
idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
idle->idle_time += idle_time;
idle->idle_count++;
account_idle_time(cputime_to_nsecs(idle_time));
- write_seqcount_end(&idle->seqcount);
+ raw_write_seqcount_end(&idle->seqcount);
}
NOKPROBE_SYMBOL(enabled_wait);
@@ -118,22 +118,16 @@
void arch_cpu_idle_enter(void)
{
- local_mcck_disable();
}
void arch_cpu_idle(void)
{
- if (!test_cpu_flag(CIF_MCCK_PENDING))
- /* Halt the cpu and keep track of cpu time accounting. */
- enabled_wait();
- local_irq_enable();
+ enabled_wait();
+ raw_local_irq_enable();
}
void arch_cpu_idle_exit(void)
{
- local_mcck_enable();
- if (test_cpu_flag(CIF_MCCK_PENDING))
- s390_handle_mcck();
}
void arch_cpu_idle_dead(void)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6837aff..6da0690 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -39,10 +39,13 @@
#define IPL_CCW_STR "ccw"
#define IPL_FCP_STR "fcp"
#define IPL_FCP_DUMP_STR "fcp_dump"
+#define IPL_NVME_STR "nvme"
+#define IPL_NVME_DUMP_STR "nvme_dump"
#define IPL_NSS_STR "nss"
#define DUMP_CCW_STR "ccw"
#define DUMP_FCP_STR "fcp"
+#define DUMP_NVME_STR "nvme"
#define DUMP_NONE_STR "none"
/*
@@ -93,6 +96,10 @@
return IPL_FCP_DUMP_STR;
case IPL_TYPE_NSS:
return IPL_NSS_STR;
+ case IPL_TYPE_NVME:
+ return IPL_NVME_STR;
+ case IPL_TYPE_NVME_DUMP:
+ return IPL_NVME_DUMP_STR;
case IPL_TYPE_UNKNOWN:
default:
return IPL_UNKNOWN_STR;
@@ -103,6 +110,7 @@
DUMP_TYPE_NONE = 1,
DUMP_TYPE_CCW = 2,
DUMP_TYPE_FCP = 4,
+ DUMP_TYPE_NVME = 8,
};
static char *dump_type_str(enum dump_type type)
@@ -114,6 +122,8 @@
return DUMP_CCW_STR;
case DUMP_TYPE_FCP:
return DUMP_FCP_STR;
+ case DUMP_TYPE_NVME:
+ return DUMP_NVME_STR;
default:
return NULL;
}
@@ -133,6 +143,7 @@
static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_nvme;
static struct ipl_parameter_block *reipl_block_ccw;
static struct ipl_parameter_block *reipl_block_nss;
static struct ipl_parameter_block *reipl_block_actual;
@@ -140,10 +151,15 @@
static int dump_capabilities = DUMP_TYPE_NONE;
static enum dump_type dump_type = DUMP_TYPE_NONE;
static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_nvme;
static struct ipl_parameter_block *dump_block_ccw;
static struct sclp_ipl_info sclp_ipl_info;
+static bool reipl_nvme_clear;
+static bool reipl_fcp_clear;
+static bool reipl_ccw_clear;
+
static inline int __diag308(unsigned long subcode, void *addr)
{
register unsigned long _addr asm("0") = (unsigned long) addr;
@@ -174,7 +190,7 @@
struct kobj_attribute *attr, \
char *page) \
{ \
- return snprintf(page, PAGE_SIZE, _format, ##args); \
+ return scnprintf(page, PAGE_SIZE, _format, ##args); \
}
#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk) \
@@ -258,6 +274,11 @@
return IPL_TYPE_FCP_DUMP;
else
return IPL_TYPE_FCP;
+ case IPL_PBT_NVME:
+ if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+ return IPL_TYPE_NVME_DUMP;
+ else
+ return IPL_TYPE_NVME;
}
return IPL_TYPE_UNKNOWN;
}
@@ -314,6 +335,9 @@
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+ case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
+ return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
default:
return 0;
}
@@ -342,15 +366,35 @@
return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
+
+static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf,
+ loff_t off, size_t count)
+{
+ unsigned int size = ipl_block.nvme.scp_data_len;
+ void *scp_data = &ipl_block.nvme.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
static struct bin_attribute ipl_scp_data_attr =
__BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
+static struct bin_attribute ipl_nvme_scp_data_attr =
+ __BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
+
static struct bin_attribute *ipl_fcp_bin_attrs[] = {
&ipl_parameter_attr,
&ipl_scp_data_attr,
NULL,
};
+static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+ &ipl_parameter_attr,
+ &ipl_nvme_scp_data_attr,
+ NULL,
+};
+
/* FCP ipl device attributes */
DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
@@ -362,6 +406,16 @@
DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
(unsigned long long)ipl_block.fcp.br_lba);
+/* NVMe ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n",
+ (unsigned long long)ipl_block.nvme.fid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n",
+ (unsigned long long)ipl_block.nvme.nsid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
+ (unsigned long long)ipl_block.nvme.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
+ (unsigned long long)ipl_block.nvme.br_lba);
+
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -396,6 +450,24 @@
.bin_attrs = ipl_fcp_bin_attrs,
};
+static struct attribute *ipl_nvme_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_nvme_fid_attr.attr,
+ &sys_ipl_nvme_nsid_attr.attr,
+ &sys_ipl_nvme_bootprog_attr.attr,
+ &sys_ipl_nvme_br_lba_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_nvme_attr_group = {
+ .attrs = ipl_nvme_attrs,
+ .bin_attrs = ipl_nvme_bin_attrs,
+};
+
+
/* CCW ipl device attributes */
static struct attribute *ipl_ccw_attrs_vm[] = {
@@ -471,6 +543,10 @@
case IPL_TYPE_FCP_DUMP:
rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
break;
+ case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
+ rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
+ break;
default:
rc = sysfs_create_group(&ipl_kset->kobj,
&ipl_unknown_attr_group);
@@ -691,6 +767,21 @@
__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
reipl_fcp_loadparm_store);
+static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%u\n", reipl_fcp_clear);
+}
+
+static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (strtobool(buf, &reipl_fcp_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
static struct attribute *reipl_fcp_attrs[] = {
&sys_reipl_fcp_device_attr.attr,
&sys_reipl_fcp_wwpn_attr.attr,
@@ -706,6 +797,114 @@
.bin_attrs = reipl_fcp_bin_attrs,
};
+static struct kobj_attribute sys_reipl_fcp_clear_attr =
+ __ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+
+/* NVME reipl device attributes */
+
+static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t size = reipl_block_nvme->nvme.scp_data_len;
+ void *scp_data = reipl_block_nvme->nvme.scp_data;
+
+ return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *attr,
+ char *buf, loff_t off, size_t count)
+{
+ size_t scpdata_len = count;
+ size_t padding;
+
+ if (off)
+ return -EINVAL;
+
+ memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
+ if (scpdata_len % 8) {
+ padding = 8 - (scpdata_len % 8);
+ memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
+ 0, padding);
+ scpdata_len += padding;
+ }
+
+ reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+ reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
+ reipl_block_nvme->nvme.scp_data_len = scpdata_len;
+
+ return count;
+}
+
+static struct bin_attribute sys_reipl_nvme_scp_data_attr =
+ __BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read,
+ reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+ &sys_reipl_nvme_scp_data_attr,
+ NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
+ reipl_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
+ reipl_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+ reipl_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
+ reipl_block_nvme->nvme.br_lba);
+
+/* nvme wrapper */
+static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_nvme, page);
+}
+
+static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_nvme, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nvme_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show,
+ reipl_nvme_loadparm_store);
+
+static struct attribute *reipl_nvme_attrs[] = {
+ &sys_reipl_nvme_fid_attr.attr,
+ &sys_reipl_nvme_nsid_attr.attr,
+ &sys_reipl_nvme_bootprog_attr.attr,
+ &sys_reipl_nvme_br_lba_attr.attr,
+ &sys_reipl_nvme_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_nvme_attr_group = {
+ .attrs = reipl_nvme_attrs,
+ .bin_attrs = reipl_nvme_bin_attrs
+};
+
+static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%u\n", reipl_nvme_clear);
+}
+
+static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (strtobool(buf, &reipl_nvme_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_nvme_clear_attr =
+ __ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store);
+
/* CCW reipl device attributes */
DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
@@ -741,16 +940,36 @@
__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
reipl_ccw_loadparm_store);
+static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%u\n", reipl_ccw_clear);
+}
+
+static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ if (strtobool(buf, &reipl_ccw_clear) < 0)
+ return -EINVAL;
+ return len;
+}
+
+static struct kobj_attribute sys_reipl_ccw_clear_attr =
+ __ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store);
+
static struct attribute *reipl_ccw_attrs_vm[] = {
&sys_reipl_ccw_device_attr.attr,
&sys_reipl_ccw_loadparm_attr.attr,
&sys_reipl_ccw_vmparm_attr.attr,
+ &sys_reipl_ccw_clear_attr.attr,
NULL,
};
static struct attribute *reipl_ccw_attrs_lpar[] = {
&sys_reipl_ccw_device_attr.attr,
&sys_reipl_ccw_loadparm_attr.attr,
+ &sys_reipl_ccw_clear_attr.attr,
NULL,
};
@@ -850,6 +1069,9 @@
case IPL_TYPE_FCP:
reipl_block_actual = reipl_block_fcp;
break;
+ case IPL_TYPE_NVME:
+ reipl_block_actual = reipl_block_nvme;
+ break;
case IPL_TYPE_NSS:
reipl_block_actual = reipl_block_nss;
break;
@@ -876,6 +1098,8 @@
rc = reipl_set_type(IPL_TYPE_CCW);
else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_FCP);
+ else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
+ rc = reipl_set_type(IPL_TYPE_NVME);
else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
rc = reipl_set_type(IPL_TYPE_NSS);
return (rc != 0) ? rc : len;
@@ -886,17 +1110,31 @@
static struct kset *reipl_kset;
static struct kset *reipl_fcp_kset;
+static struct kset *reipl_nvme_kset;
static void __reipl_run(void *unused)
{
switch (reipl_type) {
case IPL_TYPE_CCW:
diag308(DIAG308_SET, reipl_block_ccw);
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ if (reipl_ccw_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
break;
case IPL_TYPE_FCP:
diag308(DIAG308_SET, reipl_block_fcp);
- diag308(DIAG308_LOAD_CLEAR, NULL);
+ if (reipl_fcp_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
+ break;
+ case IPL_TYPE_NVME:
+ diag308(DIAG308_SET, reipl_block_nvme);
+ if (reipl_nvme_clear)
+ diag308(DIAG308_LOAD_CLEAR, NULL);
+ else
+ diag308(DIAG308_LOAD_NORMAL, NULL);
break;
case IPL_TYPE_NSS:
diag308(DIAG308_SET, reipl_block_nss);
@@ -906,6 +1144,7 @@
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_FCP_DUMP:
+ case IPL_TYPE_NVME_DUMP:
break;
}
disabled_wait();
@@ -1008,10 +1247,16 @@
}
rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
- if (rc) {
- kset_unregister(reipl_fcp_kset);
- free_page((unsigned long) reipl_block_fcp);
- return rc;
+ if (rc)
+ goto out1;
+
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_fcp_kset->kobj,
+ &sys_reipl_fcp_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else {
+ reipl_fcp_clear = true;
}
if (ipl_info.type == IPL_TYPE_FCP) {
@@ -1032,6 +1277,69 @@
}
reipl_capabilities |= IPL_TYPE_FCP;
return 0;
+
+out2:
+ sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+out1:
+ kset_unregister(reipl_fcp_kset);
+ free_page((unsigned long) reipl_block_fcp);
+ return rc;
+}
+
+static int __init reipl_nvme_init(void)
+{
+ int rc;
+
+ reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!reipl_block_nvme)
+ return -ENOMEM;
+
+ /* sysfs: create kset for mixing attr group and bin attrs */
+ reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL,
+ &reipl_kset->kobj);
+ if (!reipl_nvme_kset) {
+ free_page((unsigned long) reipl_block_nvme);
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+ if (rc)
+ goto out1;
+
+ if (test_facility(141)) {
+ rc = sysfs_create_file(&reipl_nvme_kset->kobj,
+ &sys_reipl_nvme_clear_attr.attr);
+ if (rc)
+ goto out2;
+ } else {
+ reipl_nvme_clear = true;
+ }
+
+ if (ipl_info.type == IPL_TYPE_NVME) {
+ memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block));
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * is invalid in the IPL parameter block, so take it
+ * always from sclp_ipl_info.
+ */
+ memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm,
+ LOADPARM_LEN);
+ } else {
+ reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+ reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+ reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+ reipl_block_nvme->nvme.pbt = IPL_PBT_NVME;
+ reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL;
+ }
+ reipl_capabilities |= IPL_TYPE_NVME;
+ return 0;
+
+out2:
+ sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+out1:
+ kset_unregister(reipl_nvme_kset);
+ free_page((unsigned long) reipl_block_nvme);
+ return rc;
}
static int __init reipl_type_init(void)
@@ -1049,6 +1357,9 @@
if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
memcpy(reipl_block_fcp, reipl_block, size);
reipl_type = IPL_TYPE_FCP;
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) {
+ memcpy(reipl_block_nvme, reipl_block, size);
+ reipl_type = IPL_TYPE_NVME;
} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
memcpy(reipl_block_ccw, reipl_block, size);
reipl_type = IPL_TYPE_CCW;
@@ -1075,6 +1386,9 @@
rc = reipl_fcp_init();
if (rc)
return rc;
+ rc = reipl_nvme_init();
+ if (rc)
+ return rc;
rc = reipl_nss_init();
if (rc)
return rc;
@@ -1118,6 +1432,29 @@
.attrs = dump_fcp_attrs,
};
+/* NVME dump device attributes */
+DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
+ dump_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
+ dump_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+ dump_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
+ dump_block_nvme->nvme.br_lba);
+
+static struct attribute *dump_nvme_attrs[] = {
+ &sys_dump_nvme_fid_attr.attr,
+ &sys_dump_nvme_nsid_attr.attr,
+ &sys_dump_nvme_bootprog_attr.attr,
+ &sys_dump_nvme_br_lba_attr.attr,
+ NULL,
+};
+
+static struct attribute_group dump_nvme_attr_group = {
+ .name = IPL_NVME_STR,
+ .attrs = dump_nvme_attrs,
+};
+
/* CCW dump device attributes */
DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
@@ -1159,6 +1496,8 @@
rc = dump_set_type(DUMP_TYPE_CCW);
else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
rc = dump_set_type(DUMP_TYPE_FCP);
+ else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
+ rc = dump_set_type(DUMP_TYPE_NVME);
return (rc != 0) ? rc : len;
}
@@ -1186,6 +1525,9 @@
case DUMP_TYPE_FCP:
diag308_dump(dump_block_fcp);
break;
+ case DUMP_TYPE_NVME:
+ diag308_dump(dump_block_nvme);
+ break;
default:
break;
}
@@ -1242,6 +1584,29 @@
return 0;
}
+static int __init dump_nvme_init(void)
+{
+ int rc;
+
+ if (!sclp_ipl_info.has_dump)
+ return 0; /* LDIPL DUMP is not installed */
+ dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+ if (!dump_block_nvme)
+ return -ENOMEM;
+ rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group);
+ if (rc) {
+ free_page((unsigned long)dump_block_nvme);
+ return rc;
+ }
+ dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+ dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+ dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
+ dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
+ dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+ dump_capabilities |= DUMP_TYPE_NVME;
+ return 0;
+}
+
static int __init dump_init(void)
{
int rc;
@@ -1260,6 +1625,9 @@
rc = dump_fcp_init();
if (rc)
return rc;
+ rc = dump_nvme_init();
+ if (rc)
+ return rc;
dump_set_type(DUMP_TYPE_NONE);
return 0;
}
@@ -1691,6 +2059,11 @@
ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
ipl_info.data.fcp.lun = ipl_block.fcp.lun;
break;
+ case IPL_TYPE_NVME:
+ case IPL_TYPE_NVME_DUMP:
+ ipl_info.data.nvme.fid = ipl_block.nvme.fid;
+ ipl_info.data.nvme.nsid = ipl_block.nvme.nsid;
+ break;
case IPL_TYPE_NSS:
case IPL_TYPE_UNKNOWN:
/* We have no info to copy */
@@ -1783,7 +2156,7 @@
buf = vzalloc(report->size);
if (!buf)
- return ERR_PTR(-ENOMEM);
+ goto out;
ptr = buf;
memcpy(ptr, report->ipib, report->ipib->hdr.len);
@@ -1822,6 +2195,7 @@
}
BUG_ON(ptr > buf + report->size);
+out:
return buf;
}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index da550cb..3514420 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -95,14 +95,6 @@
{.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
};
-void __init init_IRQ(void)
-{
- BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
- init_cio_interrupts();
- init_airq_interrupts();
- init_ext_interrupts();
-}
-
void do_IRQ(struct pt_regs *regs, int irq)
{
struct pt_regs *old_regs;
@@ -294,7 +286,7 @@
return IRQ_HANDLED;
}
-void __init init_ext_interrupts(void)
+static void __init init_ext_interrupts(void)
{
int idx;
@@ -307,6 +299,14 @@
panic("Failed to register EXT interrupt\n");
}
+void __init init_IRQ(void)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+ init_cio_interrupts();
+ init_airq_interrupts();
+ init_ext_interrupts();
+}
+
static DEFINE_SPINLOCK(irq_subclass_lock);
static unsigned char irq_subclass_refcount[64];
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 6f13883..aae24dc 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -7,6 +7,7 @@
* s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
*/
+#include <linux/moduleloader.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
@@ -21,6 +22,7 @@
#include <asm/set_memory.h>
#include <asm/sections.h>
#include <asm/dis.h>
+#include "entry.h"
DEFINE_PER_CPU(struct kprobe *, current_kprobe);
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -30,19 +32,32 @@
DEFINE_INSN_CACHE_OPS(s390_insn);
static int insn_page_in_use;
-static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+void *alloc_insn_page(void)
+{
+ void *page;
+
+ page = module_alloc(PAGE_SIZE);
+ if (!page)
+ return NULL;
+ __set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X);
+ return page;
+}
+
+void free_insn_page(void *page)
+{
+ module_memfree(page);
+}
static void *alloc_s390_insn_page(void)
{
if (xchg(&insn_page_in_use, 1) == 1)
return NULL;
- set_memory_x((unsigned long) &insn_page, 1);
- return &insn_page;
+ return &kprobes_insn_page;
}
static void free_s390_insn_page(void *page)
{
- set_memory_nx((unsigned long) page, 1);
xchg(&insn_page_in_use, 0);
}
@@ -56,36 +71,29 @@
static void copy_instruction(struct kprobe *p)
{
- unsigned long ip = (unsigned long) p->addr;
+ kprobe_opcode_t insn[MAX_INSN_SIZE];
s64 disp, new_disp;
u64 addr, new_addr;
+ unsigned int len;
- if (ftrace_location(ip) == ip) {
+ len = insn_length(*p->addr >> 8);
+ memcpy(&insn, p->addr, len);
+ p->opcode = insn[0];
+ if (probe_is_insn_relative_long(&insn[0])) {
/*
- * If kprobes patches the instruction that is morphed by
- * ftrace make sure that kprobes always sees the branch
- * "jg .+24" that skips the mcount block or the "brcl 0,0"
- * in case of hotpatch.
+ * For pc-relative instructions in RIL-b or RIL-c format patch
+ * the RI2 displacement field. We have already made sure that
+ * the insn slot for the patched instruction is within the same
+ * 2GB area as the original instruction (either kernel image or
+ * module area). Therefore the new displacement will always fit.
*/
- ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
- p->ainsn.is_ftrace_insn = 1;
- } else
- memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
- p->opcode = p->ainsn.insn[0];
- if (!probe_is_insn_relative_long(p->ainsn.insn))
- return;
- /*
- * For pc-relative instructions in RIL-b or RIL-c format patch the
- * RI2 displacement field. We have already made sure that the insn
- * slot for the patched instruction is within the same 2GB area
- * as the original instruction (either kernel image or module area).
- * Therefore the new displacement will always fit.
- */
- disp = *(s32 *)&p->ainsn.insn[1];
- addr = (u64)(unsigned long)p->addr;
- new_addr = (u64)(unsigned long)p->ainsn.insn;
- new_disp = ((addr + (disp * 2)) - new_addr) / 2;
- *(s32 *)&p->ainsn.insn[1] = new_disp;
+ disp = *(s32 *)&insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&insn[1] = new_disp;
+ }
+ s390_kernel_write(p->ainsn.insn, &insn, len);
}
NOKPROBE_SYMBOL(copy_instruction);
@@ -136,11 +144,6 @@
}
NOKPROBE_SYMBOL(arch_prepare_kprobe);
-int arch_check_ftrace_location(struct kprobe *p)
-{
- return 0;
-}
-
struct swap_insn_args {
struct kprobe *p;
unsigned int arm_kprobe : 1;
@@ -149,28 +152,11 @@
static int swap_instruction(void *data)
{
struct swap_insn_args *args = data;
- struct ftrace_insn new_insn, *insn;
struct kprobe *p = args->p;
- size_t len;
+ u16 opc;
- new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
- len = sizeof(new_insn.opc);
- if (!p->ainsn.is_ftrace_insn)
- goto skip_ftrace;
- len = sizeof(new_insn);
- insn = (struct ftrace_insn *) p->addr;
- if (args->arm_kprobe) {
- if (is_ftrace_nop(insn))
- new_insn.disp = KPROBE_ON_FTRACE_NOP;
- else
- new_insn.disp = KPROBE_ON_FTRACE_CALL;
- } else {
- ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
- if (insn->disp == KPROBE_ON_FTRACE_NOP)
- ftrace_generate_nop_insn(&new_insn);
- }
-skip_ftrace:
- s390_kernel_write(p->addr, &new_insn, len);
+ opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+ s390_kernel_write(p->addr, &opc, sizeof(opc));
return 0;
}
NOKPROBE_SYMBOL(swap_instruction);
@@ -261,6 +247,7 @@
void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
{
ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
+ ri->fp = NULL;
/* Replace the return addr with trampoline addr */
regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
@@ -364,83 +351,7 @@
*/
static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
- struct kretprobe_instance *ri;
- struct hlist_head *head, empty_rp;
- struct hlist_node *tmp;
- unsigned long flags, orig_ret_address;
- unsigned long trampoline_address;
- kprobe_opcode_t *correct_ret_addr;
-
- INIT_HLIST_HEAD(&empty_rp);
- kretprobe_hash_lock(current, &head, &flags);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more than one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- ri = NULL;
- orig_ret_address = 0;
- correct_ret_addr = NULL;
- trampoline_address = (unsigned long) &kretprobe_trampoline;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long) ri->ret_addr;
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
- correct_ret_addr = ri->ret_addr;
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- orig_ret_address = (unsigned long) ri->ret_addr;
-
- if (ri->rp && ri->rp->handler) {
- ri->ret_addr = correct_ret_addr;
- ri->rp->handler(ri, regs);
- }
-
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- regs->psw.addr = orig_ret_address;
-
- kretprobe_hash_unlock(current, &flags);
-
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
+ regs->psw.addr = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
@@ -464,24 +375,6 @@
unsigned long ip = regs->psw.addr;
int fixup = probe_get_fixup_type(p->ainsn.insn);
- /* Check if the kprobes location is an enabled ftrace caller */
- if (p->ainsn.is_ftrace_insn) {
- struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
- struct ftrace_insn call_insn;
-
- ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
- /*
- * A kprobe on an enabled ftrace call site actually single
- * stepped an unconditional branch (ftrace nop equivalent).
- * Now we need to fixup things and pretend that a brasl r0,...
- * was executed instead.
- */
- if (insn->disp == KPROBE_ON_FTRACE_CALL) {
- ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
- regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
- }
- }
-
if (fixup & FIXUP_PSW_NORMAL)
ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
@@ -574,10 +467,8 @@
* zero, try to fix up.
*/
entry = s390_search_extables(regs->psw.addr);
- if (entry) {
- regs->psw.addr = extable_fixup(entry);
+ if (entry && ex_handle(entry, regs))
return 1;
- }
/*
* fixup_exception() could not handle it,
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
new file mode 100644
index 0000000..f6cb022
--- /dev/null
+++ b/arch/s390/kernel/kprobes_insn_page.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+/*
+ * insn_page is a special 4k aligned dummy function for kprobes.
+ * It will contain all kprobed instructions that are out-of-line executed.
+ * The page must be within the kernel image to guarantee that the
+ * out-of-line instructions are within 2GB distance of their original
+ * location. Using a dummy function ensures that the insn_page is within
+ * the text section of the kernel and mapped read-only/executable from
+ * the beginning on, thus avoiding to split large mappings if the page
+ * would be in the data section instead.
+ */
+ .section .kprobes.text, "ax"
+ .align 4096
+ENTRY(kprobes_insn_page)
+ .rept 2048
+ .word 0x07fe
+ .endr
+ENDPROC(kprobes_insn_page)
+ .previous
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 452502f..3b89597 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -167,7 +167,7 @@
*/
static void lgr_timer_set(void)
{
- mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+ mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
}
/*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index cb8b1cc..d91989c 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -14,11 +14,8 @@
#include <linux/reboot.h>
#include <linux/ftrace.h>
#include <linux/debug_locks.h>
-#include <linux/suspend.h>
#include <asm/cio.h>
#include <asm/setup.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/smp.h>
#include <asm/ipl.h>
#include <asm/diag.h>
@@ -39,36 +36,6 @@
#ifdef CONFIG_CRASH_DUMP
/*
- * PM notifier callback for kdump
- */
-static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
- void *ptr)
-{
- switch (action) {
- case PM_SUSPEND_PREPARE:
- case PM_HIBERNATION_PREPARE:
- if (kexec_crash_image)
- arch_kexec_unprotect_crashkres();
- break;
- case PM_POST_SUSPEND:
- case PM_POST_HIBERNATION:
- if (kexec_crash_image)
- arch_kexec_protect_crashkres();
- break;
- default:
- return NOTIFY_DONE;
- }
- return NOTIFY_OK;
-}
-
-static int __init machine_kdump_pm_init(void)
-{
- pm_notifier(machine_kdump_pm_cb, 0);
- return 0;
-}
-arch_initcall(machine_kdump_pm_init);
-
-/*
* Reset the system, copy boot CPU registers to absolute zero,
* and jump to the kdump image
*/
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index f9e4baa..76cd098 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -12,6 +12,7 @@
#include <linux/kexec.h>
#include <linux/module_signature.h>
#include <linux/verification.h>
+#include <linux/vmalloc.h>
#include <asm/boot_data.h>
#include <asm/ipl.h>
#include <asm/setup.h>
@@ -170,6 +171,7 @@
struct kexec_buf buf;
unsigned long addr;
void *ptr, *end;
+ int ret;
buf.image = image;
@@ -199,9 +201,13 @@
ptr += len;
}
+ ret = -ENOMEM;
buf.buffer = ipl_report_finish(data->report);
+ if (!buf.buffer)
+ goto out;
buf.bufsz = data->report->size;
buf.memsz = buf.bufsz;
+ image->arch.ipl_buf = buf.buffer;
data->memsz += buf.memsz;
@@ -209,7 +215,9 @@
data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
*lc_ipl_parmblock_ptr = (__u32)buf.mem;
- return kexec_add_buffer(&buf);
+ ret = kexec_add_buffer(&buf);
+out:
+ return ret;
}
void *kexec_file_add_components(struct kimage *image,
@@ -269,6 +277,7 @@
{
Elf_Rela *relas;
int i, r_type;
+ int ret;
relas = (void *)pi->ehdr + relsec->sh_offset;
@@ -303,7 +312,11 @@
addr = section->sh_addr + relas[i].r_offset;
r_type = ELF64_R_TYPE(relas[i].r_info);
- arch_kexec_do_relocs(r_type, loc, val, addr);
+ ret = arch_kexec_do_relocs(r_type, loc, val, addr);
+ if (ret) {
+ pr_err("Unknown rela relocation: %d\n", r_type);
+ return -ENOEXEC;
+ }
}
return 0;
}
@@ -321,3 +334,11 @@
return kexec_image_probe_default(image, buf, buf_len);
}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ vfree(image->arch.ipl_buf);
+ image->arch.ipl_buf = NULL;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 14bad57..be9e85e 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -42,6 +42,9 @@
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
+ lghi %r14,0 # save condition code
+ ipm %r14 # don't put any instructions
+ sllg %r14,%r14,16 # clobbering CC before this point
lgr %r1,%r15
#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
aghi %r0,MCOUNT_RETURN_FIXUP
@@ -54,6 +57,9 @@
# allocate pt_regs and stack frame for ftrace_trace_function
aghi %r15,-STACK_FRAME_SIZE
stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+ stg %r14,(STACK_PTREGS_PSW)(%r15)
+ lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address
+ stosm (STACK_PTREGS_PSW)(%r15),0
aghi %r1,-TRACED_FUNC_FRAME_SIZE
stg %r1,__SF_BACKCHAIN(%r15)
stg %r0,(STACK_PTREGS_PSW+8)(%r15)
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index ba8f19b..b81bc96 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -19,6 +19,7 @@
#include <linux/kasan.h>
#include <linux/moduleloader.h>
#include <linux/bug.h>
+#include <linux/memory.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
@@ -29,7 +30,7 @@
#define DEBUGP(fmt , ...)
#endif
-#define PLT_ENTRY_SIZE 20
+#define PLT_ENTRY_SIZE 22
void *module_alloc(unsigned long size)
{
@@ -174,10 +175,12 @@
}
static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
- int sign, int bits, int shift)
+ int sign, int bits, int shift,
+ void *(*write)(void *dest, const void *src, size_t len))
{
unsigned long umax;
long min, max;
+ void *dest = (void *)loc;
if (val & ((1UL << shift) - 1))
return -ENOEXEC;
@@ -194,26 +197,33 @@
return -ENOEXEC;
}
- if (bits == 8)
- *(unsigned char *) loc = val;
- else if (bits == 12)
- *(unsigned short *) loc = (val & 0xfff) |
+ if (bits == 8) {
+ unsigned char tmp = val;
+ write(dest, &tmp, 1);
+ } else if (bits == 12) {
+ unsigned short tmp = (val & 0xfff) |
(*(unsigned short *) loc & 0xf000);
- else if (bits == 16)
- *(unsigned short *) loc = val;
- else if (bits == 20)
- *(unsigned int *) loc = (val & 0xfff) << 16 |
- (val & 0xff000) >> 4 |
- (*(unsigned int *) loc & 0xf00000ff);
- else if (bits == 32)
- *(unsigned int *) loc = val;
- else if (bits == 64)
- *(unsigned long *) loc = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 16) {
+ unsigned short tmp = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 20) {
+ unsigned int tmp = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff);
+ write(dest, &tmp, 4);
+ } else if (bits == 32) {
+ unsigned int tmp = val;
+ write(dest, &tmp, 4);
+ } else if (bits == 64) {
+ unsigned long tmp = val;
+ write(dest, &tmp, 8);
+ }
return 0;
}
static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
- const char *strtab, struct module *me)
+ const char *strtab, struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
struct mod_arch_syminfo *info;
Elf_Addr loc, val;
@@ -241,17 +251,17 @@
case R_390_64: /* Direct 64 bit. */
val += rela->r_addend;
if (r_type == R_390_8)
- rc = apply_rela_bits(loc, val, 0, 8, 0);
+ rc = apply_rela_bits(loc, val, 0, 8, 0, write);
else if (r_type == R_390_12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_PC16: /* PC relative 16 bit. */
case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
@@ -260,15 +270,15 @@
case R_390_PC64: /* PC relative 64 bit. */
val += rela->r_addend - loc;
if (r_type == R_390_PC16)
- rc = apply_rela_bits(loc, val, 1, 16, 0);
+ rc = apply_rela_bits(loc, val, 1, 16, 0, write);
else if (r_type == R_390_PC16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PC32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PC32)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_PC64)
- rc = apply_rela_bits(loc, val, 1, 64, 0);
+ rc = apply_rela_bits(loc, val, 1, 64, 0, write);
break;
case R_390_GOT12: /* 12 bit GOT offset. */
case R_390_GOT16: /* 16 bit GOT offset. */
@@ -283,33 +293,33 @@
case R_390_GOTPLT64: /* 64 bit offset to jump slot. */
case R_390_GOTPLTENT: /* 32 bit rel. offset to jump slot >> 1. */
if (info->got_initialized == 0) {
- Elf_Addr *gotent;
+ Elf_Addr *gotent = me->core_layout.base +
+ me->arch.got_offset +
+ info->got_offset;
- gotent = me->core_layout.base + me->arch.got_offset +
- info->got_offset;
- *gotent = val;
+ write(gotent, &val, sizeof(*gotent));
info->got_initialized = 1;
}
val = info->got_offset + rela->r_addend;
if (r_type == R_390_GOT12 ||
r_type == R_390_GOTPLT12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_GOT16 ||
r_type == R_390_GOTPLT16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOT20 ||
r_type == R_390_GOTPLT20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_GOT32 ||
r_type == R_390_GOTPLT32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOT64 ||
r_type == R_390_GOTPLT64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
val += (Elf_Addr) me->core_layout.base - loc;
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
@@ -320,25 +330,28 @@
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int *ip;
- ip = me->core_layout.base + me->arch.plt_offset +
- info->plt_offset;
- ip[0] = 0x0d10e310; /* basr 1,0 */
- ip[1] = 0x100a0004; /* lg 1,10(1) */
+ unsigned char insn[PLT_ENTRY_SIZE];
+ char *plt_base;
+ char *ip;
+
+ plt_base = me->core_layout.base + me->arch.plt_offset;
+ ip = plt_base + info->plt_offset;
+ *(int *)insn = 0x0d10e310; /* basr 1,0 */
+ *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
- unsigned int *ij;
- ij = me->core_layout.base +
- me->arch.plt_offset +
- me->arch.plt_size - PLT_ENTRY_SIZE;
- ip[2] = 0xa7f40000 + /* j __jump_r1 */
- (unsigned int)(u16)
- (((unsigned long) ij - 8 -
- (unsigned long) ip) / 2);
+ char *jump_r1;
+
+ jump_r1 = plt_base + me->arch.plt_size -
+ PLT_ENTRY_SIZE;
+ /* brcl 0xf,__jump_r1 */
+ *(short *)&insn[8] = 0xc0f4;
+ *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
} else {
- ip[2] = 0x07f10000; /* br %r1 */
+ *(int *)&insn[8] = 0x07f10000; /* br %r1 */
}
- ip[3] = (unsigned int) (val >> 32);
- ip[4] = (unsigned int) val;
+ *(long *)&insn[14] = val;
+
+ write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
}
if (r_type == R_390_PLTOFF16 ||
@@ -357,17 +370,17 @@
val += rela->r_addend - loc;
}
if (r_type == R_390_PLT16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PLTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_PLT32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PLT32 ||
r_type == R_390_PLTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_PLT64 ||
r_type == R_390_PLTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTOFF16: /* 16 bit offset to GOT. */
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
@@ -375,20 +388,20 @@
val = val + rela->r_addend -
((Elf_Addr) me->core_layout.base + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_GOTPCDBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
break;
case R_390_COPY:
case R_390_GLOB_DAT: /* Create GOT entry. */
@@ -412,9 +425,10 @@
return 0;
}
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
unsigned int symindex, unsigned int relsec,
- struct module *me)
+ struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
Elf_Addr base;
Elf_Sym *symtab;
@@ -430,13 +444,27 @@
n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
for (i = 0; i < n; i++, rela++) {
- rc = apply_rela(rela, base, symtab, strtab, me);
+ rc = apply_rela(rela, base, symtab, strtab, me, write);
if (rc)
return rc;
}
return 0;
}
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
+{
+ bool early = me->state == MODULE_STATE_UNFORMED;
+ void *(*write)(void *, const void *, size_t) = memcpy;
+
+ if (!early)
+ write = s390_kernel_write;
+
+ return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+ write);
+}
+
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0a487fa..86c8d53 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -148,7 +148,6 @@
local_mcck_disable();
mcck = *this_cpu_ptr(&cpu_mcck);
memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
- clear_cpu_flag(CIF_MCCK_PENDING);
local_mcck_enable();
local_irq_restore(flags);
@@ -333,7 +332,7 @@
/*
* machine check handler.
*/
-void notrace s390_do_machine_check(struct pt_regs *regs)
+int notrace s390_do_machine_check(struct pt_regs *regs)
{
static int ipd_count;
static DEFINE_SPINLOCK(ipd_lock);
@@ -342,6 +341,7 @@
unsigned long long tmp;
union mci mci;
unsigned long mcck_dam_code;
+ int mcck_pending = 0;
nmi_enter();
inc_irq_stat(NMI_NMI);
@@ -400,7 +400,7 @@
*/
mcck->kill_task = 1;
mcck->mcck_code = mci.val;
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
/*
@@ -420,8 +420,7 @@
mcck->stp_queue |= stp_sync_check();
if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
mcck->stp_queue |= stp_island_check();
- if (mcck->stp_queue)
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
/*
@@ -442,12 +441,12 @@
if (mci.cp) {
/* Channel report word pending */
mcck->channel_report = 1;
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
if (mci.w) {
/* Warning pending */
mcck->warning = 1;
- set_cpu_flag(CIF_MCCK_PENDING);
+ mcck_pending = 1;
}
/*
@@ -462,7 +461,17 @@
*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
}
clear_cpu_flag(CIF_MCCK_GUEST);
+
+ if (user_mode(regs) && mcck_pending) {
+ nmi_exit();
+ return 1;
+ }
+
+ if (mcck_pending)
+ schedule_mcck_handler();
+
nmi_exit();
+ return 0;
}
NOKPROBE_SYMBOL(s390_do_machine_check);
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
new file mode 100644
index 0000000..51c5a9f
--- /dev/null
+++ b/arch/s390/kernel/numa.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <asm/numa.h>
+
+struct pglist_data *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+void __init numa_setup(void)
+{
+ int nid;
+
+ nodes_clear(node_possible_map);
+ node_set(0, node_possible_map);
+ node_set_online(0);
+ for (nid = 0; nid < MAX_NUMNODES; nid++) {
+ NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
+ if (!NODE_DATA(nid))
+ panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+ __func__, sizeof(pg_data_t), 8);
+ }
+ NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ NODE_DATA(0)->node_id = 0;
+}
+
+static int __init numa_init_late(void)
+{
+ register_one_node(0);
+ return 0;
+}
+arch_initcall(numa_init_late);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 8b33e03..37265f5 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -238,6 +238,64 @@
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
@@ -516,6 +574,67 @@
NULL,
};
+static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TEND),
+ CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND),
+ CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+ CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+ CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS),
+ CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS),
+ CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS),
+ CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
+ CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH),
+ CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+ CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+ NULL,
+};
+
/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
static struct attribute_group cpumcf_pmu_events_group = {
@@ -624,9 +743,11 @@
break;
case 0x3906:
case 0x3907:
+ model = cpumcf_z14_pmu_event_attr;
+ break;
case 0x8561:
case 0x8562:
- model = cpumcf_z14_pmu_event_attr;
+ model = cpumcf_z15_pmu_event_attr;
break;
default:
model = none;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b83bddf..19cd7b9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -156,8 +156,8 @@
}
}
- debug_sprintf_event(sfdbg, 5,
- "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
+ (unsigned long)sfb->sdbt);
memset(sfb, 0, sizeof(*sfb));
}
@@ -212,10 +212,11 @@
* the sampling buffer origin.
*/
if (sfb->sdbt != get_next_sdbt(tail)) {
- debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
- "sampling buffer is not linked: origin=%p"
- "tail=%p\n",
- (void *) sfb->sdbt, (void *) tail);
+ debug_sprintf_event(sfdbg, 3, "%s: "
+ "sampling buffer is not linked: origin %#lx"
+ " tail %#lx\n", __func__,
+ (unsigned long)sfb->sdbt,
+ (unsigned long)tail);
return -EINVAL;
}
@@ -264,8 +265,8 @@
*tail = (unsigned long) sfb->sdbt + 1;
sfb->tail = tail;
- debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
- " settings: sdbt=%lu sdb=%lu\n",
+ debug_sprintf_event(sfdbg, 4, "%s: new buffer"
+ " settings: sdbt %lu sdb %lu\n", __func__,
sfb->num_sdbt, sfb->num_sdb);
return rc;
}
@@ -305,12 +306,13 @@
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
if (rc) {
free_sampling_buffer(sfb);
- debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
- "realloc_sampling_buffer failed with rc=%i\n", rc);
+ debug_sprintf_event(sfdbg, 4, "%s: "
+ "realloc_sampling_buffer failed with rc %i\n",
+ __func__, rc);
} else
debug_sprintf_event(sfdbg, 4,
- "alloc_sampling_buffer: tear=%p dear=%p\n",
- sfb->sdbt, (void *) *sfb->sdbt);
+ "%s: tear %#lx dear %#lx\n", __func__,
+ (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
return rc;
}
@@ -370,28 +372,33 @@
static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
{
- unsigned long n_sdb, freq, factor;
+ unsigned long n_sdb, freq;
size_t sample_size;
/* Calculate sampling buffers using 4K pages
*
- * 1. Determine the sample data size which depends on the used
- * sampling functions, for example, basic-sampling or
- * basic-sampling with diagnostic-sampling.
+ * 1. The sampling size is 32 bytes for basic sampling. This size
+ * is the same for all machine types. Diagnostic
+ * sampling uses auxlilary data buffer setup which provides the
+ * memory for SDBs using linux common code auxiliary trace
+ * setup.
*
- * 2. Use the sampling frequency as input. The sampling buffer is
- * designed for almost one second. This can be adjusted through
- * the "factor" variable.
- * In any case, alloc_sampling_buffer() sets the Alert Request
+ * 2. Function alloc_sampling_buffer() sets the Alert Request
* Control indicator to trigger a measurement-alert to harvest
- * sample-data-blocks (sdb).
+ * sample-data-blocks (SDB). This is done per SDB. This
+ * measurement alert interrupt fires quick enough to handle
+ * one SDB, on very high frequency and work loads there might
+ * be 2 to 3 SBDs available for sample processing.
+ * Currently there is no need for setup alert request on every
+ * n-th page. This is counterproductive as one IRQ triggers
+ * a very high number of samples to be processed at one IRQ.
*
- * 3. Compute the number of sample-data-blocks and ensure a minimum
- * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not
- * exceed a "calculated" maximum. The symbolic maximum is
- * designed for basic-sampling only and needs to be increased if
- * diagnostic-sampling is active.
- * See also the remarks for these symbolic constants.
+ * 3. Use the sampling frequency as input.
+ * Compute the number of SDBs and ensure a minimum
+ * of CPUM_SF_MIN_SDB. Depending on frequency add some more
+ * SDBs to handle a higher sampling rate.
+ * Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
+ * (one SDB) for every 10000 HZ frequency increment.
*
* 4. Compute the number of sample-data-block-tables (SDBT) and
* ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
@@ -399,10 +406,7 @@
*/
sample_size = sizeof(struct hws_basic_entry);
freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
- factor = 1;
- n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
- if (n_sdb < CPUM_SF_MIN_SDB)
- n_sdb = CPUM_SF_MIN_SDB;
+ n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
/* If there is already a sampling buffer allocated, it is very likely
* that the sampling facility is enabled too. If the event to be
@@ -417,8 +421,8 @@
return 0;
debug_sprintf_event(sfdbg, 3,
- "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
- " sample_size=%lu cpuhw=%p\n",
+ "%s: rate %lu f %lu sdb %lu/%lu"
+ " sample_size %lu cpuhw %p\n", __func__,
SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
sample_size, cpuhw);
@@ -478,8 +482,8 @@
if (num)
sfb_account_allocs(num, hwc);
- debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
- " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
+ __func__, OVERFLOW_REG(hwc), ratio, num);
OVERFLOW_REG(hwc) = 0;
}
@@ -517,13 +521,13 @@
*/
rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
if (rc)
- debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
- "failed with rc=%i\n", rc);
+ debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
+ __func__, rc);
if (sfb_has_pending_allocs(sfb, hwc))
- debug_sprintf_event(sfdbg, 5, "sfb: extend: "
- "req=%lu alloc=%lu remaining=%lu\n",
- num, sfb->num_sdb - num_old,
+ debug_sprintf_event(sfdbg, 5, "%s: "
+ "req %lu alloc %lu remaining %lu\n",
+ __func__, num, sfb->num_sdb - num_old,
sfb_pending_allocs(sfb, hwc));
}
@@ -551,20 +555,22 @@
err = sf_disable();
if (err)
pr_err("Switching off the sampling facility failed "
- "with rc=%i\n", err);
+ "with rc %i\n", err);
debug_sprintf_event(sfdbg, 5,
- "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ "%s: initialized: cpuhw %p\n", __func__,
+ cpusf);
break;
case PMC_RELEASE:
cpusf->flags &= ~PMU_F_RESERVED;
err = sf_disable();
if (err) {
pr_err("Switching off the sampling facility failed "
- "with rc=%i\n", err);
+ "with rc %i\n", err);
} else
deallocate_buffers(cpusf);
debug_sprintf_event(sfdbg, 5,
- "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ "%s: released: cpuhw %p\n", __func__,
+ cpusf);
break;
}
if (err)
@@ -611,13 +617,6 @@
local64_set(&hwc->period_left, hwc->sample_period);
}
-static void hw_reset_registers(struct hw_perf_event *hwc,
- unsigned long *sdbt_origin)
-{
- /* (Re)set to first sample-data-block-table */
- TEAR_REG(hwc) = (unsigned long) sdbt_origin;
-}
-
static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
unsigned long rate)
{
@@ -673,7 +672,7 @@
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
- if (perf_output_begin(&handle, event, header.size))
+ if (perf_output_begin(&handle, data, event, header.size))
goto out;
/* Update the process ID (see also kernel/events/core.c) */
@@ -709,9 +708,9 @@
*/
if (sample_rate_to_freq(si, rate) >
sysctl_perf_event_sample_rate) {
- debug_sprintf_event(sfdbg, 1,
+ debug_sprintf_event(sfdbg, 1, "%s: "
"Sampling rate exceeds maximum "
- "perf sample rate\n");
+ "perf sample rate\n", __func__);
rate = 0;
}
}
@@ -756,10 +755,9 @@
attr->sample_period = rate;
SAMPL_RATE(hwc) = rate;
hw_init_period(hwc, SAMPL_RATE(hwc));
- debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
- "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
- event->attr.sample_period, event->attr.freq,
- SAMPLE_FREQ_MODE(hwc));
+ debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
+ __func__, event->cpu, event->attr.sample_period,
+ event->attr.freq, SAMPLE_FREQ_MODE(hwc));
return 0;
}
@@ -883,12 +881,21 @@
return err;
}
+static bool is_callchain_event(struct perf_event *event)
+{
+ u64 sample_type = event->attr.sample_type;
+
+ return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
+ PERF_SAMPLE_STACK_USER);
+}
+
static int cpumsf_pmu_event_init(struct perf_event *event)
{
int err;
/* No support for taken branch sampling */
- if (has_branch_stack(event))
+ /* No support for callchain, stacks and registers */
+ if (has_branch_stack(event) || is_callchain_event(event))
return -EOPNOTSUPP;
switch (event->attr.type) {
@@ -962,8 +969,7 @@
* buffer extents
*/
sfb_account_overflows(cpuhw, hwc);
- if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
- extend_sampling_buffer(&cpuhw->sfb, hwc);
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
}
/* Rate may be adjusted with ioctl() */
cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
@@ -976,7 +982,7 @@
err = lsctl(&cpuhw->lsctl);
if (err) {
cpuhw->flags &= ~PMU_F_ENABLED;
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ pr_err("Loading sampling controls failed: op %i err %i\n",
1, err);
return;
}
@@ -984,12 +990,11 @@
/* Load current program parameter */
lpp(&S390_lowcore.lpp);
- debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
- "interval:%lx tear=%p dear=%p\n",
+ debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
+ "interval %#lx tear %#lx dear %#lx\n", __func__,
cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
cpuhw->lsctl.cd, cpuhw->lsctl.interval,
- (void *) cpuhw->lsctl.tear,
- (void *) cpuhw->lsctl.dear);
+ cpuhw->lsctl.tear, cpuhw->lsctl.dear);
}
static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1012,13 +1017,14 @@
err = lsctl(&inactive);
if (err) {
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ pr_err("Loading sampling controls failed: op %i err %i\n",
2, err);
return;
}
/* Save state of TEAR and DEAR register contents */
- if (!qsi(&si)) {
+ err = qsi(&si);
+ if (!err) {
/* TEAR/DEAR values are valid only if the sampling facility is
* enabled. Note that cpumsf_pmu_disable() might be called even
* for a disabled sampling facility because cpumsf_pmu_enable()
@@ -1029,8 +1035,8 @@
cpuhw->lsctl.dear = si.dear;
}
} else
- debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
- "qsi() failed with err=%i\n", err);
+ debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
+ __func__, err);
cpuhw->flags &= ~PMU_F_ENABLED;
}
@@ -1143,15 +1149,6 @@
local64_add(count, &event->count);
}
-static void debug_sample_entry(struct hws_basic_entry *sample,
- struct hws_trailer_entry *te)
-{
- debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
- "sampling data entry: te->f=%i basic.def=%04x "
- "(%p)\n",
- te->f, sample->def, sample);
-}
-
/* hw_collect_samples() - Walk through a sample-data-block and collect samples
* @event: The perf event
* @sdbt: Sample-data-block table
@@ -1205,7 +1202,11 @@
/* Count discarded samples */
*overflow += 1;
} else {
- debug_sample_entry(sample, te);
+ debug_sprintf_event(sfdbg, 4,
+ "%s: Found unknown"
+ " sampling data entry: te->f %i"
+ " basic.def %#4x (%p)\n", __func__,
+ te->f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1280,9 +1281,9 @@
sampl_overflow += te->overflow;
/* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
- "overflow=%llu timestamp=%#llx\n",
- sdbt, te->overflow,
+ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
+ "overflow %llu timestamp %#llx\n",
+ __func__, (unsigned long)sdbt, te->overflow,
(te->f) ? trailer_timestamp(te) : 0ULL);
/* Collect all samples from a single sample-data-block and
@@ -1336,9 +1337,11 @@
}
if (sampl_overflow || event_overflow)
- debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
- "overflow stats: sample=%llu event=%llu\n",
- sampl_overflow, event_overflow);
+ debug_sprintf_event(sfdbg, 4, "%s: "
+ "overflows: sample %llu event %llu"
+ " total %llu num_sdb %llu\n",
+ __func__, sampl_overflow, event_overflow,
+ OVERFLOW_REG(hwc), num_sdb);
}
#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
@@ -1391,7 +1394,8 @@
te = aux_sdb_trailer(aux, aux->alert_mark);
te->flags &= ~SDB_TE_ALERT_REQ_MASK;
- debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+ debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
+ __func__, i, range_scan, aux->head);
}
/*
@@ -1424,6 +1428,10 @@
* SDBs between aux->head and aux->empty_mark are already ready
* for new data. range_scan is num of SDBs not within them.
*/
+ debug_sprintf_event(sfdbg, 6,
+ "%s: range %ld head %ld alert %ld empty %ld\n",
+ __func__, range, aux->head, aux->alert_mark,
+ aux->empty_mark);
if (range > AUX_SDB_NUM_EMPTY(aux)) {
range_scan = range - AUX_SDB_NUM_EMPTY(aux);
idx = aux->empty_mark + 1;
@@ -1449,15 +1457,11 @@
cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
cpuhw->lsctl.dear = aux->sdb_index[head];
- debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
- "head->alert_mark->empty_mark (num_alert, range)"
- "[%lx -> %lx -> %lx] (%lx, %lx) "
- "tear index %lx, tear %lx dear %lx\n",
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
+ "index %ld tear %#lx dear %#lx\n", __func__,
aux->head, aux->alert_mark, aux->empty_mark,
- AUX_SDB_NUM_ALERT(aux), range,
head / CPUM_SF_SDB_PER_TABLE,
- cpuhw->lsctl.tear,
- cpuhw->lsctl.dear);
+ cpuhw->lsctl.tear, cpuhw->lsctl.dear);
return 0;
}
@@ -1519,9 +1523,12 @@
unsigned long long *overflow)
{
unsigned long long orig_overflow, orig_flags, new_flags;
- unsigned long i, range_scan, idx;
+ unsigned long i, range_scan, idx, idx_old;
struct hws_trailer_entry *te;
+ debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
+ "empty %ld\n", __func__, range, aux->head,
+ aux->alert_mark, aux->empty_mark);
if (range <= AUX_SDB_NUM_EMPTY(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
@@ -1544,7 +1551,7 @@
* indicator fall into this range, set it.
*/
range_scan = range - AUX_SDB_NUM_EMPTY(aux);
- idx = aux->empty_mark + 1;
+ idx_old = idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
do {
@@ -1564,6 +1571,9 @@
/* Update empty_mark to new position */
aux->empty_mark = aux->head + range - 1;
+ debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
+ "empty %ld\n", __func__, range_scan, idx_old,
+ idx - 1, aux->empty_mark);
return true;
}
@@ -1585,8 +1595,9 @@
/* Inform user space new data arrived */
size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+ size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
- num_sdb = aux->sfb.num_sdb;
num_sdb = aux->sfb.num_sdb;
while (!done) {
@@ -1596,7 +1607,9 @@
pr_err("The AUX buffer with %lu pages for the "
"diagnostic-sampling mode is full\n",
num_sdb);
- debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
+ debug_sprintf_event(sfdbg, 1,
+ "%s: AUX buffer used up\n",
+ __func__);
break;
}
if (WARN_ON_ONCE(!aux))
@@ -1618,24 +1631,24 @@
size = range << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
- "pages to overflow\n", num_sdb);
- debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
- "overflow %llx\n",
+ "pages to overflow\n", aux->sfb.num_sdb);
+ debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
+ "overflow %lld\n", __func__,
aux->head, range, overflow);
} else {
size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
- debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
"already full, try another\n",
+ __func__,
aux->head, aux->alert_mark);
}
}
if (done)
- debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
- "[%lx -> %lx -> %lx] (%lx, %lx)\n",
- aux->head, aux->alert_mark, aux->empty_mark,
- AUX_SDB_NUM_ALERT(aux), range);
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+ "empty %ld\n", __func__, aux->head,
+ aux->alert_mark, aux->empty_mark);
}
/*
@@ -1658,8 +1671,7 @@
kfree(aux->sdb_index);
kfree(aux);
- debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
- "%lu SDBTs\n", num_sdbt);
+ debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
}
static void aux_sdb_init(unsigned long sdb)
@@ -1717,7 +1729,7 @@
sfb = &aux->sfb;
/* Allocate sdbt_index for fast reference */
- n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
+ n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
if (!aux->sdbt_index)
goto no_sdbt_index;
@@ -1767,8 +1779,7 @@
*/
aux->empty_mark = sfb->num_sdb - 1;
- debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
- " and %lu SDBs\n",
+ debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
sfb->num_sdbt, sfb->num_sdb);
return aux;
@@ -1822,9 +1833,9 @@
event->attr.sample_period = rate;
SAMPL_RATE(&event->hw) = rate;
hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
- debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
- "cpu:%d value:%llx period:%llx freq:%d\n",
- event->cpu, value,
+ debug_sprintf_event(sfdbg, 4, "%s:"
+ " cpu %d value %#llx period %#llx freq %d\n",
+ __func__, event->cpu, value,
event->attr.sample_period, do_freq);
return 0;
}
@@ -1900,7 +1911,7 @@
if (!SAMPL_DIAG_MODE(&event->hw)) {
cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
- hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+ TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
}
/* Ensure sampling functions are in the disabled state. If disabled,
@@ -2055,7 +2066,7 @@
/* Report measurement alerts only for non-PRA codes */
if (alert != CPU_MF_INT_SF_PRA)
- debug_sprintf_event(sfdbg, 6, "measurement alert: %#x\n",
+ debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
alert);
/* Sampling authorization change request */
@@ -2134,7 +2145,7 @@
sfb_set_limits(min, max);
pr_info("The sampling buffer limits have changed to: "
- "min=%lu max=%lu (diag=x%lu)\n",
+ "min %lu max %lu (diag %lu)\n",
CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
return 0;
}
@@ -2152,7 +2163,7 @@
static void __init pr_cpumsf_err(unsigned int reason)
{
pr_err("Sampling facility support for perf is not available: "
- "reason=%04x\n", reason);
+ "reason %#x\n", reason);
}
static int __init init_cpum_sampling_pmu(void)
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 4352a50..6e9e5d5 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -53,8 +53,7 @@
}
void perf_get_regs_user(struct perf_regs *regs_user,
- struct pt_regs *regs,
- struct pt_regs *regs_user_copy)
+ struct pt_regs *regs)
{
/*
* Use the regs from the first interruption and let
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 59dee9d..9a92638 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -78,10 +78,10 @@
PGM_CHECK(do_dat_exception) /* 3a */
PGM_CHECK(do_dat_exception) /* 3b */
PGM_CHECK_DEFAULT /* 3c */
-PGM_CHECK_DEFAULT /* 3d */
-PGM_CHECK_DEFAULT /* 3e */
-PGM_CHECK_DEFAULT /* 3f */
-PGM_CHECK_DEFAULT /* 40 */
+PGM_CHECK(do_secure_storage_access) /* 3d */
+PGM_CHECK(do_non_secure_storage_access) /* 3e */
+PGM_CHECK(do_secure_storage_violation) /* 3f */
+PGM_CHECK(monitor_event_exception) /* 40 */
PGM_CHECK_DEFAULT /* 41 */
PGM_CHECK_DEFAULT /* 42 */
PGM_CHECK_DEFAULT /* 43 */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 4e6299e..ec801d3 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -40,6 +40,7 @@
#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
+#include <asm/unwind.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -79,8 +80,8 @@
return 0;
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
+int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
+ unsigned long arg, struct task_struct *p, unsigned long tls)
{
struct fake_frame
{
@@ -159,29 +160,10 @@
asm volatile("sfpc %0" : : "d" (0));
}
-/*
- * fill in the FPU structure for a core dump.
- */
-int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
-{
- save_fpu_regs();
- fpregs->fpc = current->thread.fpu.fpc;
- fpregs->pad = 0;
- if (MACHINE_HAS_VX)
- convert_vx_to_fp((freg_t *)&fpregs->fprs,
- current->thread.fpu.vxrs);
- else
- memcpy(&fpregs->fprs, current->thread.fpu.fprs,
- sizeof(fpregs->fprs));
- return 1;
-}
-EXPORT_SYMBOL(dump_fpu);
-
unsigned long get_wchan(struct task_struct *p)
{
- struct stack_frame *sf, *low, *high;
- unsigned long return_address;
- int count;
+ struct unwind_state state;
+ unsigned long ip = 0;
if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
return 0;
@@ -189,26 +171,22 @@
if (!try_get_task_stack(p))
return 0;
- low = task_stack_page(p);
- high = (struct stack_frame *) task_pt_regs(p);
- sf = (struct stack_frame *) p->thread.ksp;
- if (sf <= low || sf > high) {
- return_address = 0;
- goto out;
- }
- for (count = 0; count < 16; count++) {
- sf = (struct stack_frame *)READ_ONCE_NOCHECK(sf->back_chain);
- if (sf <= low || sf > high) {
- return_address = 0;
- goto out;
+ unwind_for_each_frame(&state, p, NULL, 0) {
+ if (state.stack_info.type != STACK_TYPE_TASK) {
+ ip = 0;
+ break;
}
- return_address = READ_ONCE_NOCHECK(sf->gprs[8]);
- if (!in_sched_functions(return_address))
- goto out;
+
+ ip = unwind_get_return_address(&state);
+ if (!ip)
+ break;
+
+ if (!in_sched_functions(ip))
+ break;
}
-out:
+
put_task_stack(p);
- return return_address;
+ return ip;
}
unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 91b9b3f..c92d04f 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -151,10 +151,35 @@
}
}
+static void show_cpu_topology(struct seq_file *m, unsigned long n)
+{
+#ifdef CONFIG_SCHED_TOPOLOGY
+ seq_printf(m, "physical id : %d\n", topology_physical_package_id(n));
+ seq_printf(m, "core id : %d\n", topology_core_id(n));
+ seq_printf(m, "book id : %d\n", topology_book_id(n));
+ seq_printf(m, "drawer id : %d\n", topology_drawer_id(n));
+ seq_printf(m, "dedicated : %d\n", topology_cpu_dedicated(n));
+ seq_printf(m, "address : %d\n", smp_cpu_get_cpu_address(n));
+ seq_printf(m, "siblings : %d\n", cpumask_weight(topology_core_cpumask(n)));
+ seq_printf(m, "cpu cores : %d\n", topology_booted_cores(n));
+#endif /* CONFIG_SCHED_TOPOLOGY */
+}
+
+static void show_cpu_ids(struct seq_file *m, unsigned long n)
+{
+ struct cpuid *id = &per_cpu(cpu_info.cpu_id, n);
+
+ seq_printf(m, "version : %02X\n", id->version);
+ seq_printf(m, "identification : %06X\n", id->ident);
+ seq_printf(m, "machine : %04X\n", id->machine);
+}
+
static void show_cpu_mhz(struct seq_file *m, unsigned long n)
{
struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+ if (!machine_has_cpu_mhz)
+ return;
seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
seq_printf(m, "cpu MHz static : %d\n", c->cpu_mhz_static);
}
@@ -169,9 +194,9 @@
if (n == first)
show_cpu_summary(m, v);
- if (!machine_has_cpu_mhz)
- return 0;
seq_printf(m, "\ncpu number : %ld\n", n);
+ show_cpu_topology(m, n);
+ show_cpu_ids(m, n);
show_cpu_mhz(m, n);
return 0;
}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ad74472..a76dd27 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -25,8 +25,6 @@
#include <linux/compat.h>
#include <trace/syscall.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/switch_to.h>
@@ -869,6 +867,9 @@
unsigned long mask = -1UL;
long ret = -1;
+ if (is_compat_task())
+ mask = 0xffffffff;
+
/*
* The sysc_tracesys code in entry.S stored the system
* call number to gprs[2].
@@ -882,17 +883,35 @@
goto skip;
}
+#ifdef CONFIG_SECCOMP
/* Do the secure computing check after ptrace. */
- if (secure_computing(NULL)) {
- /* seccomp failures shouldn't expose any additional code. */
- goto skip;
+ if (unlikely(test_thread_flag(TIF_SECCOMP))) {
+ struct seccomp_data sd;
+
+ if (is_compat_task()) {
+ sd.instruction_pointer = regs->psw.addr & 0x7fffffff;
+ sd.arch = AUDIT_ARCH_S390;
+ } else {
+ sd.instruction_pointer = regs->psw.addr;
+ sd.arch = AUDIT_ARCH_S390X;
+ }
+
+ sd.nr = regs->int_code & 0xffff;
+ sd.args[0] = regs->orig_gpr2 & mask;
+ sd.args[1] = regs->gprs[3] & mask;
+ sd.args[2] = regs->gprs[4] & mask;
+ sd.args[3] = regs->gprs[5] & mask;
+ sd.args[4] = regs->gprs[6] & mask;
+ sd.args[5] = regs->gprs[7] & mask;
+
+ if (__secure_computing(&sd) == -1)
+ goto skip;
}
+#endif /* CONFIG_SECCOMP */
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->int_code & 0xffff);
- if (is_compat_task())
- mask = 0xffffffff;
audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask,
regs->gprs[3] &mask, regs->gprs[4] &mask,
@@ -925,28 +944,14 @@
static int s390_regs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ unsigned pos;
if (target == current)
save_access_regs(target->thread.acrs);
- if (kbuf) {
- unsigned long *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- unsigned long __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long))
+ membuf_store(&to, __peek_user(target, pos));
return 0;
}
@@ -987,8 +992,8 @@
}
static int s390_fpregs_get(struct task_struct *target,
- const struct user_regset *regset, unsigned int pos,
- unsigned int count, void *kbuf, void __user *ubuf)
+ const struct user_regset *regset,
+ struct membuf to)
{
_s390_fp_regs fp_regs;
@@ -998,8 +1003,7 @@
fp_regs.fpc = target->thread.fpu.fpc;
fpregs_store(&fp_regs, &target->thread.fpu);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &fp_regs, 0, -1);
+ return membuf_write(&to, &fp_regs, sizeof(fp_regs));
}
static int s390_fpregs_set(struct task_struct *target,
@@ -1046,20 +1050,9 @@
static int s390_last_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- if (count > 0) {
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = target->thread.last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(target->thread.last_break, u))
- return -EFAULT;
- }
- }
- return 0;
+ return membuf_store(&to, target->thread.last_break);
}
static int s390_last_break_set(struct task_struct *target,
@@ -1072,16 +1065,13 @@
static int s390_tdb_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct pt_regs *regs = task_pt_regs(target);
- unsigned char *data;
if (!(regs->int_code & 0x200))
return -ENODATA;
- data = target->thread.trap_tdb;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+ return membuf_write(&to, target->thread.trap_tdb, 256);
}
static int s390_tdb_set(struct task_struct *target,
@@ -1094,8 +1084,7 @@
static int s390_vxrs_low_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
__u64 vxrs[__NUM_VXRS_LOW];
int i;
@@ -1106,7 +1095,7 @@
save_fpu_regs();
for (i = 0; i < __NUM_VXRS_LOW; i++)
vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ return membuf_write(&to, vxrs, sizeof(vxrs));
}
static int s390_vxrs_low_set(struct task_struct *target,
@@ -1135,18 +1124,14 @@
static int s390_vxrs_high_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- __vector128 vxrs[__NUM_VXRS_HIGH];
-
if (!MACHINE_HAS_VX)
return -ENODEV;
if (target == current)
save_fpu_regs();
- memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+ __NUM_VXRS_HIGH * sizeof(__vector128));
}
static int s390_vxrs_high_set(struct task_struct *target,
@@ -1168,12 +1153,9 @@
static int s390_system_call_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- unsigned int *data = &target->thread.system_call;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(unsigned int));
+ return membuf_store(&to, target->thread.system_call);
}
static int s390_system_call_set(struct task_struct *target,
@@ -1188,8 +1170,7 @@
static int s390_gs_cb_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct gs_cb *data = target->thread.gs_cb;
@@ -1199,8 +1180,7 @@
return -ENODATA;
if (target == current)
save_gs_cb(data);
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct gs_cb));
+ return membuf_write(&to, data, sizeof(struct gs_cb));
}
static int s390_gs_cb_set(struct task_struct *target,
@@ -1244,8 +1224,7 @@
static int s390_gs_bc_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct gs_cb *data = target->thread.gs_bc_cb;
@@ -1253,8 +1232,7 @@
return -ENODEV;
if (!data)
return -ENODATA;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct gs_cb));
+ return membuf_write(&to, data, sizeof(struct gs_cb));
}
static int s390_gs_bc_set(struct task_struct *target,
@@ -1304,8 +1282,7 @@
static int s390_runtime_instr_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
struct runtime_instr_cb *data = target->thread.ri_cb;
@@ -1314,8 +1291,7 @@
if (!data)
return -ENODATA;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- data, 0, sizeof(struct runtime_instr_cb));
+ return membuf_write(&to, data, sizeof(struct runtime_instr_cb));
}
static int s390_runtime_instr_set(struct task_struct *target,
@@ -1375,7 +1351,7 @@
.n = sizeof(s390_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_regs_get,
+ .regset_get = s390_regs_get,
.set = s390_regs_set,
},
{
@@ -1383,7 +1359,7 @@
.n = sizeof(s390_fp_regs) / sizeof(long),
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_fpregs_get,
+ .regset_get = s390_fpregs_get,
.set = s390_fpregs_set,
},
{
@@ -1391,7 +1367,7 @@
.n = 1,
.size = sizeof(unsigned int),
.align = sizeof(unsigned int),
- .get = s390_system_call_get,
+ .regset_get = s390_system_call_get,
.set = s390_system_call_set,
},
{
@@ -1399,7 +1375,7 @@
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_last_break_get,
+ .regset_get = s390_last_break_get,
.set = s390_last_break_set,
},
{
@@ -1407,7 +1383,7 @@
.n = 1,
.size = 256,
.align = 1,
- .get = s390_tdb_get,
+ .regset_get = s390_tdb_get,
.set = s390_tdb_set,
},
{
@@ -1415,7 +1391,7 @@
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_vxrs_low_get,
+ .regset_get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
{
@@ -1423,7 +1399,7 @@
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
- .get = s390_vxrs_high_get,
+ .regset_get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
{
@@ -1431,7 +1407,7 @@
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_cb_get,
+ .regset_get = s390_gs_cb_get,
.set = s390_gs_cb_set,
},
{
@@ -1439,7 +1415,7 @@
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_bc_get,
+ .regset_get = s390_gs_bc_get,
.set = s390_gs_bc_set,
},
{
@@ -1447,13 +1423,13 @@
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_runtime_instr_get,
+ .regset_get = s390_runtime_instr_get,
.set = s390_runtime_instr_set,
},
};
static const struct user_regset_view user_s390_view = {
- .name = UTS_MACHINE,
+ .name = "s390x",
.e_machine = EM_S390,
.regsets = s390_regsets,
.n = ARRAY_SIZE(s390_regsets)
@@ -1462,28 +1438,15 @@
#ifdef CONFIG_COMPAT
static int s390_compat_regs_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
+ unsigned n;
+
if (target == current)
save_access_regs(target->thread.acrs);
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = __peek_user_compat(target, pos);
- count -= sizeof(*k);
- pos += sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(__peek_user_compat(target, pos), u++))
- return -EFAULT;
- count -= sizeof(*u);
- pos += sizeof(*u);
- }
- }
+ for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t))
+ membuf_store(&to, __peek_user_compat(target, n));
return 0;
}
@@ -1525,29 +1488,14 @@
static int s390_compat_regs_high_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
compat_ulong_t *gprs_high;
+ int i;
- gprs_high = (compat_ulong_t *)
- &task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
- if (kbuf) {
- compat_ulong_t *k = kbuf;
- while (count > 0) {
- *k++ = *gprs_high;
- gprs_high += 2;
- count -= sizeof(*k);
- }
- } else {
- compat_ulong_t __user *u = ubuf;
- while (count > 0) {
- if (__put_user(*gprs_high, u++))
- return -EFAULT;
- gprs_high += 2;
- count -= sizeof(*u);
- }
- }
+ gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs;
+ for (i = 0; i < NUM_GPRS; i++, gprs_high += 2)
+ membuf_store(&to, *gprs_high);
return 0;
}
@@ -1586,23 +1534,11 @@
static int s390_compat_last_break_get(struct task_struct *target,
const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
+ struct membuf to)
{
- compat_ulong_t last_break;
+ compat_ulong_t last_break = target->thread.last_break;
- if (count > 0) {
- last_break = target->thread.last_break;
- if (kbuf) {
- unsigned long *k = kbuf;
- *k = last_break;
- } else {
- unsigned long __user *u = ubuf;
- if (__put_user(last_break, u))
- return -EFAULT;
- }
- }
- return 0;
+ return membuf_store(&to, (unsigned long)last_break);
}
static int s390_compat_last_break_set(struct task_struct *target,
@@ -1619,7 +1555,7 @@
.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_compat_regs_get,
+ .regset_get = s390_compat_regs_get,
.set = s390_compat_regs_set,
},
{
@@ -1627,7 +1563,7 @@
.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_fpregs_get,
+ .regset_get = s390_fpregs_get,
.set = s390_fpregs_set,
},
{
@@ -1635,7 +1571,7 @@
.n = 1,
.size = sizeof(compat_uint_t),
.align = sizeof(compat_uint_t),
- .get = s390_system_call_get,
+ .regset_get = s390_system_call_get,
.set = s390_system_call_set,
},
{
@@ -1643,7 +1579,7 @@
.n = 1,
.size = sizeof(long),
.align = sizeof(long),
- .get = s390_compat_last_break_get,
+ .regset_get = s390_compat_last_break_get,
.set = s390_compat_last_break_set,
},
{
@@ -1651,7 +1587,7 @@
.n = 1,
.size = 256,
.align = 1,
- .get = s390_tdb_get,
+ .regset_get = s390_tdb_get,
.set = s390_tdb_set,
},
{
@@ -1659,7 +1595,7 @@
.n = __NUM_VXRS_LOW,
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_vxrs_low_get,
+ .regset_get = s390_vxrs_low_get,
.set = s390_vxrs_low_set,
},
{
@@ -1667,7 +1603,7 @@
.n = __NUM_VXRS_HIGH,
.size = sizeof(__vector128),
.align = sizeof(__vector128),
- .get = s390_vxrs_high_get,
+ .regset_get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
{
@@ -1675,7 +1611,7 @@
.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
.align = sizeof(compat_long_t),
- .get = s390_compat_regs_high_get,
+ .regset_get = s390_compat_regs_high_get,
.set = s390_compat_regs_high_set,
},
{
@@ -1683,7 +1619,7 @@
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_cb_get,
+ .regset_get = s390_gs_cb_get,
.set = s390_gs_cb_set,
},
{
@@ -1691,7 +1627,7 @@
.n = sizeof(struct gs_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_gs_bc_get,
+ .regset_get = s390_gs_bc_get,
.set = s390_gs_bc_set,
},
{
@@ -1699,7 +1635,7 @@
.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
.size = sizeof(__u64),
.align = sizeof(__u64),
- .get = s390_runtime_instr_get,
+ .regset_get = s390_runtime_instr_get,
.set = s390_runtime_instr_set,
},
};
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f661f17..f9f8721 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -37,7 +37,7 @@
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/kernel_stat.h>
-#include <linux/dma-contiguous.h>
+#include <linux/dma-map-ops.h>
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/pfn.h>
@@ -93,10 +93,6 @@
unsigned long int_hwcap = 0;
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
-int __bootdata_preserved(prot_virt_guest);
-#endif
-
int __bootdata(noexec_disabled);
int __bootdata(memory_end_set);
unsigned long __bootdata(memory_end);
@@ -106,12 +102,13 @@
struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
-unsigned long __bootdata_preserved(__swsusp_reset_dma);
unsigned long __bootdata_preserved(__stext_dma);
unsigned long __bootdata_preserved(__etext_dma);
unsigned long __bootdata_preserved(__sdma);
unsigned long __bootdata_preserved(__edma);
unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned int __bootdata_preserved(zlib_dfltcc_support);
+EXPORT_SYMBOL(zlib_dfltcc_support);
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -121,6 +118,7 @@
struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);
+unsigned long vmemmap_size;
unsigned long MODULES_VADDR;
unsigned long MODULES_END;
@@ -130,6 +128,12 @@
EXPORT_SYMBOL(lowcore_ptr);
/*
+ * The Write Back bit position in the physaddr is given by the SLPC PCI.
+ * Leaving the mask zero always uses write through which is safe
+ */
+unsigned long mio_wb_bit_mask __ro_after_init;
+
+/*
* This is set up by the setup-routine at boot-time
* for S390 need to find out, what we have to setup
* using address 0x10400 ...
@@ -242,14 +246,12 @@
SET_CONSOLE_SCLP;
#endif
}
- if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
- conswitchp = &dummy_con;
}
#ifdef CONFIG_CRASH_DUMP
static void __init setup_zfcpdump(void)
{
- if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+ if (!is_ipl_type_dump())
return;
if (OLDMEM_BASE)
return;
@@ -304,17 +306,14 @@
void (*pm_power_off)(void) = machine_power_off;
EXPORT_SYMBOL_GPL(pm_power_off);
-void *restart_stack __section(.data);
+void *restart_stack;
unsigned long stack_alloc(void)
{
#ifdef CONFIG_VMAP_STACK
- return (unsigned long)
- __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP,
- PAGE_KERNEL, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
+ return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
+ THREADINFO_GFP, NUMA_NO_NODE,
+ __builtin_return_address(0));
#else
return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
#endif
@@ -373,8 +372,12 @@
static void __init setup_lowcore_dat_off(void)
{
+ unsigned long int_psw_mask = PSW_KERNEL_BITS;
struct lowcore *lc;
+ if (IS_ENABLED(CONFIG_KASAN))
+ int_psw_mask |= PSW_MASK_DAT;
+
/*
* Setup lowcore for boot cpu
*/
@@ -386,16 +389,15 @@
lc->restart_psw.mask = PSW_KERNEL_BITS;
lc->restart_psw.addr = (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = PSW_KERNEL_BITS |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->nodat_stack = ((unsigned long) &init_thread_union)
@@ -410,7 +412,6 @@
memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
sizeof(lc->alt_stfle_fac_list));
nmi_alloc_boot_cpu(lc);
- vdso_alloc_boot_cpu(lc);
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
lc->async_enter_timer = S390_lowcore.async_enter_timer;
lc->exit_timer = S390_lowcore.exit_timer;
@@ -453,6 +454,7 @@
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+ lc->preempt_count = PREEMPT_DISABLED;
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
@@ -492,8 +494,9 @@
static void __init setup_resources(void)
{
struct resource *res, *std_res, *sub_res;
- struct memblock_region *reg;
+ phys_addr_t start, end;
int j;
+ u64 i;
code_resource.start = (unsigned long) _text;
code_resource.end = (unsigned long) _etext - 1;
@@ -502,7 +505,7 @@
bss_resource.start = (unsigned long) __bss_start;
bss_resource.end = (unsigned long) __bss_stop - 1;
- for_each_memblock(memory, reg) {
+ for_each_mem_range(i, &start, &end) {
res = memblock_alloc(sizeof(*res), 8);
if (!res)
panic("%s: Failed to allocate %zu bytes align=0x%x\n",
@@ -510,8 +513,13 @@
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
- res->start = reg->base;
- res->end = reg->base + reg->size - 1;
+ res->start = start;
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in resourses, end points to the last byte in
+ * the range.
+ */
+ res->end = end - 1;
request_resource(&iomem_resource, res);
for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@@ -554,19 +562,17 @@
unsigned long vmax, tmp;
/* Choose kernel address space layout: 3 or 4 levels. */
- if (IS_ENABLED(CONFIG_KASAN)) {
- vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
- ? _REGION1_SIZE
- : _REGION2_SIZE;
- } else {
- tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
- tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
- if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
- vmax = _REGION2_SIZE; /* 3-level kernel page table */
- else
- vmax = _REGION1_SIZE; /* 4-level kernel page table */
- }
-
+ tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+ tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+ if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+ vmax = _REGION2_SIZE; /* 3-level kernel page table */
+ else
+ vmax = _REGION1_SIZE; /* 4-level kernel page table */
+ if (is_prot_virt_host())
+ adjust_to_uv_max(&vmax);
+#ifdef CONFIG_KASAN
+ vmax = kasan_vmax;
+#endif
/* module area is at the end of the kernel address space. */
MODULES_END = vmax;
MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -585,9 +591,14 @@
/* Take care that memory_end is set and <= vmemmap */
memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap);
#ifdef CONFIG_KASAN
- /* fit in kasan shadow memory region between 1:1 and vmemmap */
memory_end = min(memory_end, KASAN_SHADOW_START);
- vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
+#endif
+ vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page);
+#ifdef CONFIG_KASAN
+ /* move vmemmap above kasan shadow only if stands in a way */
+ if (KASAN_SHADOW_END > (unsigned long)vmemmap &&
+ (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START)
+ vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
#endif
max_pfn = max_low_pfn = PFN_DOWN(memory_end);
memblock_remove(memory_end, ULONG_MAX);
@@ -598,9 +609,10 @@
#ifdef CONFIG_CRASH_DUMP
/*
- * When kdump is enabled, we have to ensure that no memory from
- * the area [0 - crashkernel memory size] and
- * [crashk_res.start - crashk_res.end] is set offline.
+ * When kdump is enabled, we have to ensure that no memory from the area
+ * [0 - crashkernel memory size] is set offline - it will be exchanged with
+ * the crashkernel memory region when kdump is triggered. The crashkernel
+ * memory region can never get offlined (pages are unmovable).
*/
static int kdump_mem_notifier(struct notifier_block *nb,
unsigned long action, void *data)
@@ -611,11 +623,7 @@
return NOTIFY_OK;
if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
return NOTIFY_BAD;
- if (arg->start_pfn > PFN_DOWN(crashk_res.end))
- return NOTIFY_OK;
- if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
- return NOTIFY_OK;
- return NOTIFY_BAD;
+ return NOTIFY_OK;
}
static struct notifier_block kdump_mem_nb = {
@@ -764,14 +772,6 @@
memblock_free(start, size);
}
-static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size)
-{
- memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
- start, start + size - 1);
- memblock_add_range(&memblock.memory, start, size, 0, 0);
- memblock_add_range(&memblock.physmem, start, size, 0, 0);
-}
-
static const char * __init get_mem_info_source(void)
{
switch (mem_detect.info_source) {
@@ -792,13 +792,16 @@
unsigned long start, end;
int i;
- memblock_dbg("physmem info source: %s (%hhd)\n",
- get_mem_info_source(), mem_detect.info_source);
+ pr_debug("physmem info source: %s (%hhd)\n",
+ get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
- for_each_mem_detect_block(i, &start, &end)
+ for_each_mem_detect_block(i, &start, &end) {
+ memblock_add(start, end - start);
memblock_physmem_add(start, end - start);
+ }
memblock_set_bottom_up(false);
+ memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
memblock_dump_all();
}
@@ -832,18 +835,16 @@
static void __init setup_memory(void)
{
- struct memblock_region *reg;
+ phys_addr_t start, end;
+ u64 i;
/*
* Init storage key for present memory
*/
- for_each_memblock(memory, reg) {
- storage_key_init_range(reg->base, reg->base + reg->size);
- }
- psw_set_key(PAGE_DEFAULT_KEY);
+ for_each_mem_range(i, &start, &end)
+ storage_key_init_range(start, end);
- /* Only cosmetics */
- memblock_enforce_memory_limit(memblock_end_of_DRAM());
+ psw_set_key(PAGE_DEFAULT_KEY);
}
/*
@@ -1034,8 +1035,7 @@
{
union diag318_info diag318_info = {
.cpnc = CPNC_LINUX,
- .cpvc_linux = 0,
- .cpvc_distro = {0},
+ .cpvc = 0,
};
if (!sclp.has_diag318)
@@ -1140,14 +1140,7 @@
free_mem_detect_info();
remove_oldmem();
- /*
- * Make sure all chunks are MAX_ORDER aligned so we don't need the
- * extra checks that HOLES_IN_ZONE would require.
- *
- * Is this still required?
- */
- memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
-
+ setup_uv();
setup_memory_end();
setup_memory();
dma_contiguous_reserve(memory_end);
@@ -1191,7 +1184,7 @@
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_init_branches();
- /* Setup zfcpdump support */
+ /* Setup zfcp/nvme dump support */
setup_zfcpdump();
/* Add system specific data to the random pool */
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index e6fca54..9e900a8 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -487,7 +487,7 @@
regs->gprs[2] = -EINTR;
break;
}
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOINTR:
regs->gprs[2] = regs->orig_gpr2;
regs->psw.addr =
@@ -514,7 +514,7 @@
case -ERESTART_RESTARTBLOCK:
/* Restart with sys_restart_syscall */
regs->int_code = __NR_restart_syscall;
- /* fallthrough */
+ fallthrough;
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
@@ -535,7 +535,6 @@
void do_notify_resume(struct pt_regs *regs)
{
- clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
rseq_handle_notify_resume(NULL, regs);
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8c51462..5674792 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -61,6 +61,7 @@
ec_schedule = 0,
ec_call_function_single,
ec_stop_cpu,
+ ec_mcck_pending,
};
enum {
@@ -145,7 +146,7 @@
static inline int pcpu_stopped(struct pcpu *pcpu)
{
- u32 uninitialized_var(status);
+ u32 status;
if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
0, &status) != SIGP_CC_STATUS_STORED)
@@ -214,6 +215,7 @@
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+ lc->preempt_count = PREEMPT_DISABLED;
if (nmi_alloc_per_cpu(lc))
goto out_async;
if (vdso_alloc_per_cpu(lc))
@@ -403,6 +405,11 @@
return -1;
}
+void schedule_mcck_handler(void)
+{
+ pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+}
+
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
@@ -415,14 +422,11 @@
void notrace smp_yield_cpu(int cpu)
{
- if (MACHINE_HAS_DIAG9C) {
- diag_stat_inc_norecursion(DIAG_STAT_X09C);
- asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
- } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
- diag_stat_inc_norecursion(DIAG_STAT_X044);
- asm volatile("diag 0,0,0x44");
- }
+ if (!MACHINE_HAS_DIAG9C)
+ return;
+ diag_stat_inc_norecursion(DIAG_STAT_X09C);
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (pcpu_devices[cpu].address));
}
/*
@@ -500,6 +504,8 @@
scheduler_ipi();
if (test_bit(ec_call_function_single, &bits))
generic_smp_call_function_single_interrupt();
+ if (test_bit(ec_mcck_pending, &bits))
+ s390_handle_mcck();
}
static void do_ext_call_interrupt(struct ext_code ext_code,
@@ -601,14 +607,14 @@
/*
* Collect CPU state of the previous, crashed system.
* There are four cases:
- * 1) standard zfcp dump
- * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * 1) standard zfcp/nvme dump
+ * condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The boot CPU state is located in
* the absolute lowcore of the memory stored in the HSA. The zcore code
* will copy the boot CPU state from the HSA.
- * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
- * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
+ * condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
* The state for all CPUs except the boot CPU needs to be collected
* with sigp stop-and-store-status. The firmware or the boot-loader
* stored the registers of the boot CPU in the absolute lowcore in the
@@ -655,7 +661,7 @@
unsigned long page;
bool is_boot_cpu;
- if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+ if (!(OLDMEM_BASE || is_ipl_type_dump()))
/* No previous system present, normal boot. */
return;
/* Allocate a page as dumping area for the store status sigps */
@@ -681,7 +687,7 @@
/* Get the vector registers */
smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
/*
- * For a zfcp dump OLDMEM_BASE == NULL and the registers
+ * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
* of the boot CPU are stored in the HSA. To retrieve
* these registers an SCLP request is required which is
* done by drivers/s390/char/zcore.c:init_cpu_info()
@@ -706,6 +712,11 @@
return pcpu_devices[cpu].polarization;
}
+int smp_cpu_get_cpu_address(int cpu)
+{
+ return pcpu_devices[cpu].address;
+}
+
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
{
static int use_sigp_detection;
@@ -853,16 +864,16 @@
set_cpu_flag(CIF_ASCE_SECONDARY);
cpu_init();
rcu_cpu_starting(cpu);
- preempt_disable();
init_cpu_timer();
vtime_init();
pfault_init();
- notify_cpu_starting(smp_processor_id());
+ notify_cpu_starting(cpu);
if (topology_cpu_dedicated(cpu))
set_cpu_flag(CIF_DEDICATED_CPU);
else
clear_cpu_flag(CIF_DEDICATED_CPU);
- set_cpu_online(smp_processor_id(), true);
+ set_cpu_online(cpu, true);
+ update_cpu_masks();
inc_irq_stat(CPU_RST);
local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -922,6 +933,7 @@
/* Handle possible pending IPIs */
smp_handle_ext_call();
set_cpu_online(smp_processor_id(), false);
+ update_cpu_masks();
/* Disable pseudo page faults on this cpu. */
pfault_fini();
/* Disable interrupt sources via control register. */
@@ -989,10 +1001,6 @@
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
}
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-}
-
void __init smp_setup_processor_id(void)
{
pcpu_devices[0].address = stap();
@@ -1122,6 +1130,7 @@
return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
}
+
static int smp_cpu_pre_down(unsigned int cpu)
{
struct device *s = &per_cpu(cpu_device, cpu)->dev;
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index f8fc4f8..7f1266c 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -9,6 +9,7 @@
#include <linux/stacktrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
+#include <asm/kprobes.h>
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
@@ -18,7 +19,49 @@
unwind_for_each_frame(&state, task, regs, 0) {
addr = unwind_get_return_address(&state);
- if (!addr || !consume_entry(cookie, addr, false))
+ if (!addr || !consume_entry(cookie, addr))
break;
}
}
+
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
+{
+ struct unwind_state state;
+ unsigned long addr;
+
+ unwind_for_each_frame(&state, task, NULL, 0) {
+ if (state.stack_info.type != STACK_TYPE_TASK)
+ return -EINVAL;
+
+ if (state.regs)
+ return -EINVAL;
+
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ return -EINVAL;
+
+#ifdef CONFIG_KPROBES
+ /*
+ * Mark stacktraces with kretprobed functions on them
+ * as unreliable.
+ */
+ if (state.ip == (unsigned long)kretprobe_trampoline)
+ return -EINVAL;
+#endif
+
+ if (!consume_entry(cookie, addr))
+ return -EINVAL;
+ }
+
+ /* Check for stack corruption */
+ if (unwind_error(&state))
+ return -EINVAL;
+ return 0;
+}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
deleted file mode 100644
index 75b7b30..0000000
--- a/arch/s390/kernel/suspend.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Suspend support specific for s390.
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- */
-
-#include <linux/pfn.h>
-#include <linux/suspend.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <asm/ctl_reg.h>
-#include <asm/ipl.h>
-#include <asm/cio.h>
-#include <asm/sections.h>
-#include "entry.h"
-
-/*
- * The restore of the saved pages in an hibernation image will set
- * the change and referenced bits in the storage key for each page.
- * Overindication of the referenced bits after an hibernation cycle
- * does not cause any harm but the overindication of the change bits
- * would cause trouble.
- * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
- * page to the most significant byte of the associated page frame
- * number in the hibernation image.
- */
-
-/*
- * Key storage is allocated as a linked list of pages.
- * The size of the keys array is (PAGE_SIZE - sizeof(long))
- */
-struct page_key_data {
- struct page_key_data *next;
- unsigned char data[];
-};
-
-#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *))
-
-static struct page_key_data *page_key_data;
-static struct page_key_data *page_key_rp, *page_key_wp;
-static unsigned long page_key_rx, page_key_wx;
-unsigned long suspend_zero_pages;
-
-/*
- * For each page in the hibernation image one additional byte is
- * stored in the most significant byte of the page frame number.
- * On suspend no additional memory is required but on resume the
- * keys need to be memorized until the page data has been restored.
- * Only then can the storage keys be set to their old state.
- */
-unsigned long page_key_additional_pages(unsigned long pages)
-{
- return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
-}
-
-/*
- * Free page_key_data list of arrays.
- */
-void page_key_free(void)
-{
- struct page_key_data *pkd;
-
- while (page_key_data) {
- pkd = page_key_data;
- page_key_data = pkd->next;
- free_page((unsigned long) pkd);
- }
-}
-
-/*
- * Allocate page_key_data list of arrays with enough room to store
- * one byte for each page in the hibernation image.
- */
-int page_key_alloc(unsigned long pages)
-{
- struct page_key_data *pk;
- unsigned long size;
-
- size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
- while (size--) {
- pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
- if (!pk) {
- page_key_free();
- return -ENOMEM;
- }
- pk->next = page_key_data;
- page_key_data = pk;
- }
- page_key_rp = page_key_wp = page_key_data;
- page_key_rx = page_key_wx = 0;
- return 0;
-}
-
-/*
- * Save the storage key into the upper 8 bits of the page frame number.
- */
-void page_key_read(unsigned long *pfn)
-{
- struct page *page;
- unsigned long addr;
- unsigned char key;
-
- page = pfn_to_page(*pfn);
- addr = (unsigned long) page_address(page);
- key = (unsigned char) page_get_storage_key(addr) & 0x7f;
- if (arch_test_page_nodat(page))
- key |= 0x80;
- *(unsigned char *) pfn = key;
-}
-
-/*
- * Extract the storage key from the upper 8 bits of the page frame number
- * and store it in the page_key_data list of arrays.
- */
-void page_key_memorize(unsigned long *pfn)
-{
- page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
- *(unsigned char *) pfn = 0;
- if (++page_key_wx < PAGE_KEY_DATA_SIZE)
- return;
- page_key_wp = page_key_wp->next;
- page_key_wx = 0;
-}
-
-/*
- * Get the next key from the page_key_data list of arrays and set the
- * storage key of the page referred by @address. If @address refers to
- * a "safe" page the swsusp_arch_resume code will transfer the storage
- * key from the buffer page to the original page.
- */
-void page_key_write(void *address)
-{
- struct page *page;
- unsigned char key;
-
- key = page_key_rp->data[page_key_rx];
- page_set_storage_key((unsigned long) address, key & 0x7f, 0);
- page = virt_to_page(address);
- if (key & 0x80)
- arch_set_page_nodat(page, 0);
- else
- arch_set_page_dat(page, 0);
- if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
- return;
- page_key_rp = page_key_rp->next;
- page_key_rx = 0;
-}
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
- unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
- unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1;
- unsigned long stext_pfn = PFN_DOWN(__pa(_stext));
-
- /* Always save lowcore pages (LC protection might be enabled). */
- if (pfn <= LC_PAGES)
- return 0;
- if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
- return 1;
- /* Skip memory holes and read-only pages (DCSS, ...). */
- if (pfn >= stext_pfn && pfn <= end_rodata_pfn)
- return 0;
- if (tprot(PFN_PHYS(pfn)))
- return 1;
- return 0;
-}
-
-/*
- * PM notifier callback for suspend
- */
-static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
- void *ptr)
-{
- switch (action) {
- case PM_SUSPEND_PREPARE:
- case PM_HIBERNATION_PREPARE:
- suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
- if (!suspend_zero_pages)
- return NOTIFY_BAD;
- break;
- case PM_POST_SUSPEND:
- case PM_POST_HIBERNATION:
- free_pages(suspend_zero_pages, LC_ORDER);
- break;
- default:
- return NOTIFY_DONE;
- }
- return NOTIFY_OK;
-}
-
-static int __init suspend_pm_init(void)
-{
- pm_notifier(suspend_pm_cb, 0);
- return 0;
-}
-arch_initcall(suspend_pm_init);
-
-void save_processor_state(void)
-{
- /* swsusp_arch_suspend() actually saves all cpu register contents.
- * Machine checks must be disabled since swsusp_arch_suspend() stores
- * register contents to their lowcore save areas. That's the same
- * place where register contents on machine checks would be saved.
- * To avoid register corruption disable machine checks.
- * We must also disable machine checks in the new psw mask for
- * program checks, since swsusp_arch_suspend() may generate program
- * checks. Disabling machine checks for all other new psw masks is
- * just paranoia.
- */
- local_mcck_disable();
- /* Disable lowcore protection */
- __ctl_clear_bit(0,28);
- S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
- S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
-}
-
-void restore_processor_state(void)
-{
- S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
- /* Enable lowcore protection */
- __ctl_set_bit(0,28);
- local_mcck_enable();
-}
-
-/* Called at the end of swsusp_arch_resume */
-void s390_early_resume(void)
-{
- lgr_info_log();
- channel_subsystem_reinit();
- zpci_rescan();
-}
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
deleted file mode 100644
index a7baf0b..0000000
--- a/arch/s390/kernel/swsusp.S
+++ /dev/null
@@ -1,276 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 64-bit swsusp implementation
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- * Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/sigp.h>
-
-/*
- * Save register context in absolute 0 lowcore and call swsusp_save() to
- * create in-memory kernel image. The context is saved in the designated
- * "store status" memory locations (see POP).
- * We return from this function twice. The first time during the suspend to
- * disk process. The second time via the swsusp_arch_resume() function
- * (see below) in the resume process.
- * This function runs with disabled interrupts.
- */
- GEN_BR_THUNK %r14
-
- .section .text
-ENTRY(swsusp_arch_suspend)
- lg %r1,__LC_NODAT_STACK
- stmg %r6,%r15,__SF_GPRS(%r1)
- aghi %r1,-STACK_FRAME_OVERHEAD
- stg %r15,__SF_BACKCHAIN(%r1)
- lgr %r15,%r1
-
- /* Store FPU registers */
- brasl %r14,save_fpu_regs
-
- /* Deactivate DAT */
- stnsm __SF_EMPTY(%r15),0xfb
-
- /* Store prefix register on stack */
- stpx __SF_EMPTY(%r15)
-
- /* Save prefix register contents for lowcore copy */
- llgf %r10,__SF_EMPTY(%r15)
-
- /* Get pointer to save area */
- lghi %r1,0x1000
-
- /* Save CPU address */
- stap __LC_EXT_CPU_ADDR(%r0)
-
- /* Store registers */
- mvc 0x318(4,%r1),__SF_EMPTY(%r15) /* move prefix to lowcore */
- stam %a0,%a15,0x340(%r1) /* store access registers */
- stctg %c0,%c15,0x380(%r1) /* store control registers */
- stmg %r0,%r15,0x280(%r1) /* store general registers */
-
- stpt 0x328(%r1) /* store timer */
- stck __SF_EMPTY(%r15) /* store clock */
- stckc 0x330(%r1) /* store clock comparator */
-
- /* Update cputime accounting before going to sleep */
- lg %r0,__LC_LAST_UPDATE_TIMER
- slg %r0,0x328(%r1)
- alg %r0,__LC_SYSTEM_TIMER
- stg %r0,__LC_SYSTEM_TIMER
- mvc __LC_LAST_UPDATE_TIMER(8),0x328(%r1)
- lg %r0,__LC_LAST_UPDATE_CLOCK
- slg %r0,__SF_EMPTY(%r15)
- alg %r0,__LC_STEAL_TIMER
- stg %r0,__LC_STEAL_TIMER
- mvc __LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
-
- /* Activate DAT */
- stosm __SF_EMPTY(%r15),0x04
-
- /* Set prefix page to zero */
- xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
- spx __SF_EMPTY(%r15)
-
- /* Save absolute zero pages */
- larl %r2,suspend_zero_pages
- lg %r2,0(%r2)
- lghi %r4,0
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Copy lowcore to absolute zero lowcore */
- lghi %r2,0
- lgr %r4,%r10
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Save image */
- brasl %r14,swsusp_save
-
- /* Restore prefix register and return */
- lghi %r1,0x1000
- spx 0x318(%r1)
- lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
- lghi %r2,0
- BR_EX %r14
-ENDPROC(swsusp_arch_suspend)
-
-/*
- * Restore saved memory image to correct place and restore register context.
- * Then we return to the function that called swsusp_arch_suspend().
- * swsusp_arch_resume() runs with disabled interrupts.
- */
-ENTRY(swsusp_arch_resume)
- stmg %r6,%r15,__SF_GPRS(%r15)
- lgr %r1,%r15
- aghi %r15,-STACK_FRAME_OVERHEAD
- stg %r1,__SF_BACKCHAIN(%r15)
-
- /* Make all free pages stable */
- lghi %r2,1
- brasl %r14,arch_set_page_states
-
- /* Set prefix page to zero */
- xc __SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
- spx __SF_EMPTY(%r15)
-
- /* Deactivate DAT */
- stnsm __SF_EMPTY(%r15),0xfb
-
- /* Restore saved image */
- larl %r1,restore_pblist
- lg %r1,0(%r1)
- ltgr %r1,%r1
- jz 2f
-0:
- lg %r2,8(%r1)
- lg %r4,0(%r1)
- iske %r0,%r4
- lghi %r3,PAGE_SIZE
- lghi %r5,PAGE_SIZE
-1:
- mvcle %r2,%r4,0
- jo 1b
- lg %r2,8(%r1)
- sske %r0,%r2
- lg %r1,16(%r1)
- ltgr %r1,%r1
- jnz 0b
-2:
- ptlb /* flush tlb */
-
- /* Reset System */
- larl %r1,.Lnew_pgm_check_psw
- epsw %r2,%r3
- stm %r2,%r3,0(%r1)
- mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
- larl %r1,__swsusp_reset_dma
- lg %r1,0(%r1)
- BASR_EX %r14,%r1
- larl %r1,smp_cpu_mt_shift
- icm %r1,15,0(%r1)
- jz smt_done
- llgfr %r1,%r1
-smt_loop:
- sigp %r1,%r0,SIGP_SET_MULTI_THREADING
- brc 8,smt_done /* accepted */
- brc 2,smt_loop /* busy, try again */
-smt_done:
- larl %r1,.Lnew_pgm_check_psw
- lpswe 0(%r1)
-pgm_check_entry:
-
- /* Switch to original suspend CPU */
- larl %r1,.Lresume_cpu /* Resume CPU address: r2 */
- stap 0(%r1)
- llgh %r2,0(%r1)
- llgh %r1,__LC_EXT_CPU_ADDR(%r0) /* Suspend CPU address: r1 */
- cgr %r1,%r2
- je restore_registers /* r1 = r2 -> nothing to do */
- larl %r4,.Lrestart_suspend_psw /* Set new restart PSW */
- mvc __LC_RST_NEW_PSW(16,%r0),0(%r4)
-3:
- sigp %r9,%r1,SIGP_INITIAL_CPU_RESET /* sigp initial cpu reset */
- brc 8,4f /* accepted */
- brc 2,3b /* busy, try again */
-
- /* Suspend CPU not available -> panic */
- larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
- larl %r2,.Lpanic_string
- brasl %r14,sclp_early_printk_force
- larl %r3,.Ldisabled_wait_31
- lpsw 0(%r3)
-4:
- /* Switch to suspend CPU */
- sigp %r9,%r1,SIGP_RESTART /* sigp restart to suspend CPU */
- brc 2,4b /* busy, try again */
-5:
- sigp %r9,%r2,SIGP_STOP /* sigp stop to current resume CPU */
- brc 2,5b /* busy, try again */
-6: j 6b
-
-restart_suspend:
- larl %r1,.Lresume_cpu
- llgh %r2,0(%r1)
-7:
- sigp %r9,%r2,SIGP_SENSE /* sigp sense, wait for resume CPU */
- brc 8,7b /* accepted, status 0, still running */
- brc 2,7b /* busy, try again */
- tmll %r9,0x40 /* Test if resume CPU is stopped */
- jz 7b
-
-restore_registers:
- /* Restore registers */
- lghi %r13,0x1000 /* %r1 = pointer to save area */
-
- /* Ignore time spent in suspended state. */
- llgf %r1,0x318(%r13)
- stck __LC_LAST_UPDATE_CLOCK(%r1)
- spt 0x328(%r13) /* reprogram timer */
- //sckc 0x330(%r13) /* set clock comparator */
-
- lctlg %c0,%c15,0x380(%r13) /* load control registers */
- lam %a0,%a15,0x340(%r13) /* load access registers */
-
- /* Load old stack */
- lg %r15,0x2f8(%r13)
-
- /* Save prefix register */
- mvc __SF_EMPTY(4,%r15),0x318(%r13)
-
- /* Restore absolute zero pages */
- lghi %r2,0
- larl %r4,suspend_zero_pages
- lg %r4,0(%r4)
- lghi %r3,2*PAGE_SIZE
- lghi %r5,2*PAGE_SIZE
-1: mvcle %r2,%r4,0
- jo 1b
-
- /* Restore prefix register */
- spx __SF_EMPTY(%r15)
-
- /* Activate DAT */
- stosm __SF_EMPTY(%r15),0x04
-
- /* Make all free pages unstable */
- lghi %r2,0
- brasl %r14,arch_set_page_states
-
- /* Call arch specific early resume code */
- brasl %r14,s390_early_resume
-
- /* Return 0 */
- lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
- lghi %r2,0
- BR_EX %r14
-ENDPROC(swsusp_arch_resume)
-
- .section .data..nosave,"aw",@progbits
- .align 8
-.Ldisabled_wait_31:
- .long 0x000a0000,0x00000000
-.Lpanic_string:
- .asciz "Resume not possible because suspend CPU is no longer available\n"
- .align 8
-.Lrestart_suspend_psw:
- .quad 0x0000000180000000,restart_suspend
-.Lnew_pgm_check_psw:
- .quad 0,pgm_check_entry
-.Lresume_cpu:
- .byte 0,0
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 3054e9c..28c1680 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
16 32 lchown - sys_lchown16
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid sys_getpid
-21 common mount sys_mount compat_sys_mount
+21 common mount sys_mount sys_mount
22 common umount sys_oldumount sys_oldumount
23 32 setuid - sys_setuid16
24 32 getuid - sys_getuid16
@@ -134,11 +134,11 @@
142 64 select sys_select -
143 common flock sys_flock sys_flock
144 common msync sys_msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv sys_readv
+146 common writev sys_writev sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl - -
150 common mlock sys_mlock sys_mlock
151 common munlock sys_munlock sys_munlock
152 common mlockall sys_mlockall sys_mlockall
@@ -316,7 +316,7 @@
306 common splice sys_splice sys_splice
307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
308 common tee sys_tee sys_tee
-309 common vmsplice sys_vmsplice compat_sys_vmsplice
+309 common vmsplice sys_vmsplice sys_vmsplice
310 common move_pages sys_move_pages compat_sys_move_pages
311 common getcpu sys_getcpu sys_getcpu
312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
337 common clock_adjtime sys_clock_adjtime sys_clock_adjtime32
338 common syncfs sys_syncfs sys_syncfs
339 common setns sys_setns sys_setns
-340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev
342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
343 common kcmp sys_kcmp sys_kcmp
344 common finit_module sys_finit_module sys_finit_module
@@ -372,8 +372,8 @@
362 common connect sys_connect sys_connect
363 common listen sys_listen sys_listen
364 common accept4 sys_accept4 sys_accept4
-365 common getsockopt sys_getsockopt compat_sys_getsockopt
-366 common setsockopt sys_setsockopt compat_sys_setsockopt
+365 common getsockopt sys_getsockopt sys_getsockopt
+366 common setsockopt sys_setsockopt sys_setsockopt
367 common getsockname sys_getsockname sys_getsockname
368 common getpeername sys_getpeername sys_getpeername
369 common sendto sys_sendto sys_sendto
@@ -438,3 +438,8 @@
433 common fspick sys_fspick sys_fspick
434 common pidfd_open sys_pidfd_open sys_pidfd_open
435 common clone3 sys_clone3 sys_clone3
+436 common close_range sys_close_range sys_close_range
+437 common openat2 sys_openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise sys_process_madvise
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 11c32b2..b651745 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -41,6 +41,9 @@
#include <linux/gfp.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
+#include <vdso/vsyscall.h>
+#include <vdso/clocksource.h>
+#include <vdso/helpers.h>
#include <asm/facility.h>
#include <asm/delay.h>
#include <asm/div64.h>
@@ -84,7 +87,7 @@
/* Initialize TOD steering parameters */
tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
- vdso_data->ts_end = tod_steering_end;
+ vdso_data->arch_data.tod_steering_end = tod_steering_end;
if (!test_facility(28))
return;
@@ -110,15 +113,6 @@
}
NOKPROBE_SYMBOL(sched_clock);
-/*
- * Monotonic_clock - returns # of nanoseconds passed since time_init()
- */
-unsigned long long monotonic_clock(void)
-{
- return sched_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
{
unsigned long long high, low, rem, sec, nsec;
@@ -246,7 +240,7 @@
preempt_disable(); /* protect from changes to steering parameters */
now = get_tod_clock();
adj = tod_steering_end - now;
- if (unlikely((s64) adj >= 0))
+ if (unlikely((s64) adj > 0))
/*
* manually steer by 1 cycle every 2^16 cycles. This
* corresponds to shifting the tod delta by 15. 1s is
@@ -262,10 +256,11 @@
.name = "tod",
.rating = 400,
.read = read_tod_clock,
- .mask = -1ULL,
+ .mask = CLOCKSOURCE_MASK(64),
.mult = 1000,
.shift = 12,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .vdso_clock_mode = VDSO_CLOCKMODE_TOD,
};
struct clocksource * __init clocksource_default_clock(void)
@@ -273,56 +268,6 @@
return &clocksource_tod;
}
-void update_vsyscall(struct timekeeper *tk)
-{
- u64 nsecps;
-
- if (tk->tkr_mono.clock != &clocksource_tod)
- return;
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_wmb();
- vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
- vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
- vdso_data->wtom_clock_sec =
- tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
- + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
- nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
- while (vdso_data->wtom_clock_nsec >= nsecps) {
- vdso_data->wtom_clock_nsec -= nsecps;
- vdso_data->wtom_clock_sec++;
- }
-
- vdso_data->xtime_coarse_sec = tk->xtime_sec;
- vdso_data->xtime_coarse_nsec =
- (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
- vdso_data->wtom_coarse_sec =
- vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_coarse_nsec =
- vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
- while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
- vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
- vdso_data->wtom_coarse_sec++;
- }
-
- vdso_data->tk_mult = tk->tkr_mono.mult;
- vdso_data->tk_shift = tk->tkr_mono.shift;
- vdso_data->hrtimer_res = hrtimer_resolution;
- smp_wmb();
- ++vdso_data->tb_update_count;
-}
-
-extern struct timezone sys_tz;
-
-void update_vsyscall_tz(void)
-{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
-
/*
* Initialize the TOD clock and the CPU timer of
* the boot cpu.
@@ -351,7 +296,7 @@
}
static DEFINE_PER_CPU(atomic_t, clock_sync_word);
-static DEFINE_MUTEX(clock_sync_mutex);
+static DEFINE_MUTEX(stp_mutex);
static unsigned long clock_sync_flags;
#define CLOCK_SYNC_HAS_STP 0
@@ -441,7 +386,6 @@
/* Epoch overflow */
tod_clock_base[0]++;
/* Adjust TOD steering parameters. */
- vdso_data->tb_update_count++;
now = get_tod_clock();
adj = tod_steering_end - now;
if (unlikely((s64) adj >= 0))
@@ -453,9 +397,9 @@
panic("TOD clock sync offset %lli is too large to drift\n",
tod_steering_delta);
tod_steering_end = now + (abs(tod_steering_delta) << 15);
- vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1;
- vdso_data->ts_end = tod_steering_end;
- vdso_data->tb_update_count++;
+ vdso_data->arch_data.tod_steering_end = tod_steering_end;
+ vdso_data->arch_data.tod_steering_delta = tod_steering_delta;
+
/* Update LPAR offset. */
if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
lpar_offset = qto.tod_epoch_difference;
@@ -502,7 +446,6 @@
static void *stp_page;
static void stp_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(stp_work_mutex);
static DECLARE_WORK(stp_work, stp_work_fn);
static struct timer_list stp_timer;
@@ -612,7 +555,7 @@
static int stp_sync_clock(void *data)
{
struct clock_sync_data *sync = data;
- unsigned long long clock_delta;
+ unsigned long long clock_delta, flags;
static int first;
int rc;
@@ -625,6 +568,7 @@
if (stp_info.todoff[0] || stp_info.todoff[1] ||
stp_info.todoff[2] || stp_info.todoff[3] ||
stp_info.tmd != 2) {
+ flags = vdso_update_begin();
rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
&clock_delta);
if (rc == 0) {
@@ -634,6 +578,7 @@
if (rc == 0 && stp_info.tmd != 2)
rc = -EAGAIN;
}
+ vdso_update_end(flags);
}
sync->in_sync = rc ? -EAGAIN : 1;
xchg(&first, 0);
@@ -653,6 +598,81 @@
return 0;
}
+static int stp_clear_leap(void)
+{
+ struct __kernel_timex txc;
+ int ret;
+
+ memset(&txc, 0, sizeof(txc));
+
+ ret = do_adjtimex(&txc);
+ if (ret < 0)
+ return ret;
+
+ txc.modes = ADJ_STATUS;
+ txc.status &= ~(STA_INS|STA_DEL);
+ return do_adjtimex(&txc);
+}
+
+static void stp_check_leap(void)
+{
+ struct stp_stzi stzi;
+ struct stp_lsoib *lsoib = &stzi.lsoib;
+ struct __kernel_timex txc;
+ int64_t timediff;
+ int leapdiff, ret;
+
+ if (!stp_info.lu || !check_sync_clock()) {
+ /*
+ * Either a scheduled leap second was removed by the operator,
+ * or STP is out of sync. In both cases, clear the leap second
+ * kernel flags.
+ */
+ if (stp_clear_leap() < 0)
+ pr_err("failed to clear leap second flags\n");
+ return;
+ }
+
+ if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
+ pr_err("stzi failed\n");
+ return;
+ }
+
+ timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
+ leapdiff = lsoib->nlso - lsoib->also;
+
+ if (leapdiff != 1 && leapdiff != -1) {
+ pr_err("Cannot schedule %d leap seconds\n", leapdiff);
+ return;
+ }
+
+ if (timediff < 0) {
+ if (stp_clear_leap() < 0)
+ pr_err("failed to clear leap second flags\n");
+ } else if (timediff < 7200) {
+ memset(&txc, 0, sizeof(txc));
+ ret = do_adjtimex(&txc);
+ if (ret < 0)
+ return;
+
+ txc.modes = ADJ_STATUS;
+ if (leapdiff > 0)
+ txc.status |= STA_INS;
+ else
+ txc.status |= STA_DEL;
+ ret = do_adjtimex(&txc);
+ if (ret < 0)
+ pr_err("failed to set leap second flags\n");
+ /* arm Timer to clear leap second flags */
+ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+ } else {
+ /* The day the leap second is scheduled for hasn't been reached. Retry
+ * in one hour.
+ */
+ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+ }
+}
+
/*
* STP work. Check for the STP state and take over the clock
* synchronization if the STP clock source is usable.
@@ -663,7 +683,7 @@
int rc;
/* prevent multiple execution. */
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (!stp_online) {
chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
@@ -671,7 +691,7 @@
goto out_unlock;
}
- rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
+ rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL);
if (rc)
goto out_unlock;
@@ -680,24 +700,25 @@
goto out_unlock;
/* Skip synchronization if the clock is already in sync. */
- if (check_sync_clock())
- goto out_unlock;
-
- memset(&stp_sync, 0, sizeof(stp_sync));
- cpus_read_lock();
- atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
- stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
- cpus_read_unlock();
+ if (!check_sync_clock()) {
+ memset(&stp_sync, 0, sizeof(stp_sync));
+ cpus_read_lock();
+ atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+ stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+ cpus_read_unlock();
+ }
if (!check_sync_clock())
/*
* There is a usable clock but the synchonization failed.
* Retry after a second.
*/
- mod_timer(&stp_timer, jiffies + HZ);
+ mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+ else if (stp_info.lu)
+ stp_check_leap();
out_unlock:
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
}
/*
@@ -708,151 +729,178 @@
.dev_name = "stp",
};
-static ssize_t stp_ctn_id_show(struct device *dev,
+static ssize_t ctn_id_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid())
ret = sprintf(buf, "%016llx\n",
*(unsigned long long *) stp_info.ctnid);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+static DEVICE_ATTR_RO(ctn_id);
-static ssize_t stp_ctn_type_show(struct device *dev,
+static ssize_t ctn_type_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid())
ret = sprintf(buf, "%i\n", stp_info.ctn);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+static DEVICE_ATTR_RO(ctn_type);
-static ssize_t stp_dst_offset_show(struct device *dev,
+static ssize_t dst_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x2000))
ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+static DEVICE_ATTR_RO(dst_offset);
-static ssize_t stp_leap_seconds_show(struct device *dev,
+static ssize_t leap_seconds_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x8000))
ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+static DEVICE_ATTR_RO(leap_seconds);
-static ssize_t stp_stratum_show(struct device *dev,
+static ssize_t leap_seconds_scheduled_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct stp_stzi stzi;
+ ssize_t ret;
+
+ mutex_lock(&stp_mutex);
+ if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) {
+ mutex_unlock(&stp_mutex);
+ return -ENODATA;
+ }
+
+ ret = chsc_stzi(stp_page, &stzi, sizeof(stzi));
+ mutex_unlock(&stp_mutex);
+ if (ret < 0)
+ return ret;
+
+ if (!stzi.lsoib.p)
+ return sprintf(buf, "0,0\n");
+
+ return sprintf(buf, "%llu,%d\n",
+ tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+ stzi.lsoib.nlso - stzi.lsoib.also);
+}
+
+static DEVICE_ATTR_RO(leap_seconds_scheduled);
+
+static ssize_t stratum_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid())
ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+static DEVICE_ATTR_RO(stratum);
-static ssize_t stp_time_offset_show(struct device *dev,
+static ssize_t time_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x0800))
ret = sprintf(buf, "%i\n", (int) stp_info.tto);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+static DEVICE_ATTR_RO(time_offset);
-static ssize_t stp_time_zone_offset_show(struct device *dev,
+static ssize_t time_zone_offset_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid() && (stp_info.vbits & 0x4000))
ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(time_zone_offset, 0400,
- stp_time_zone_offset_show, NULL);
+static DEVICE_ATTR_RO(time_zone_offset);
-static ssize_t stp_timing_mode_show(struct device *dev,
+static ssize_t timing_mode_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid())
ret = sprintf(buf, "%i\n", stp_info.tmd);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+static DEVICE_ATTR_RO(timing_mode);
-static ssize_t stp_timing_state_show(struct device *dev,
+static ssize_t timing_state_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t ret = -ENODATA;
- mutex_lock(&stp_work_mutex);
+ mutex_lock(&stp_mutex);
if (stpinfo_valid())
ret = sprintf(buf, "%i\n", stp_info.tst);
- mutex_unlock(&stp_work_mutex);
+ mutex_unlock(&stp_mutex);
return ret;
}
-static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+static DEVICE_ATTR_RO(timing_state);
-static ssize_t stp_online_show(struct device *dev,
+static ssize_t online_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%i\n", stp_online);
}
-static ssize_t stp_online_store(struct device *dev,
+static ssize_t online_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -863,14 +911,14 @@
return -EINVAL;
if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
return -EOPNOTSUPP;
- mutex_lock(&clock_sync_mutex);
+ mutex_lock(&stp_mutex);
stp_online = value;
if (stp_online)
set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
else
clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
queue_work(time_sync_wq, &stp_work);
- mutex_unlock(&clock_sync_mutex);
+ mutex_unlock(&stp_mutex);
return count;
}
@@ -878,18 +926,15 @@
* Can't use DEVICE_ATTR because the attribute should be named
* stp/online but dev_attr_online already exists in this file ..
*/
-static struct device_attribute dev_attr_stp_online = {
- .attr = { .name = "online", .mode = 0600 },
- .show = stp_online_show,
- .store = stp_online_store,
-};
+static DEVICE_ATTR_RW(online);
static struct device_attribute *stp_attributes[] = {
&dev_attr_ctn_id,
&dev_attr_ctn_type,
&dev_attr_dst_offset,
&dev_attr_leap_seconds,
- &dev_attr_stp_online,
+ &dev_attr_online,
+ &dev_attr_leap_seconds_scheduled,
&dev_attr_stratum,
&dev_attr_time_offset,
&dev_attr_time_zone_offset,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 3627953..ca47141 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -26,7 +26,6 @@
#include <linux/nodemask.h>
#include <linux/node.h>
#include <asm/sysinfo.h>
-#include <asm/numa.h>
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
@@ -63,8 +62,6 @@
struct cpu_topology_s390 cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
-cpumask_t cpus_with_topology;
-
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
cpumask_t mask;
@@ -86,11 +83,12 @@
cpumask_copy(&mask, cpu_present_mask);
break;
default:
- /* fallthrough */
+ fallthrough;
case TOPOLOGY_MODE_SINGLE:
cpumask_copy(&mask, cpumask_of(cpu));
break;
}
+ cpumask_and(&mask, &mask, cpu_online_mask);
return mask;
}
@@ -106,6 +104,7 @@
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_present(cpu + i))
cpumask_set_cpu(cpu + i, &mask);
+ cpumask_and(&mask, &mask, cpu_online_mask);
return mask;
}
@@ -138,7 +137,6 @@
cpumask_set_cpu(lcpu + i, &drawer->mask);
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
- cpumask_set_cpu(lcpu + i, &cpus_with_topology);
smp_cpu_set_polarization(lcpu + i, tl_core->pp);
}
}
@@ -245,10 +243,10 @@
return rc;
}
-static void update_cpu_masks(void)
+void update_cpu_masks(void)
{
- struct cpu_topology_s390 *topo;
- int cpu, id;
+ struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
+ int cpu, sibling, pkg_first, smt_first, id;
for_each_possible_cpu(cpu) {
topo = &cpu_topology[cpu];
@@ -256,6 +254,7 @@
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+ topo->booted_cores = 0;
if (topology_mode != TOPOLOGY_MODE_HW) {
id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
topo->thread_id = cpu;
@@ -263,11 +262,23 @@
topo->socket_id = id;
topo->book_id = id;
topo->drawer_id = id;
- if (cpu_present(cpu))
- cpumask_set_cpu(cpu, &cpus_with_topology);
}
}
- numa_update_cpu_topology();
+ for_each_online_cpu(cpu) {
+ topo = &cpu_topology[cpu];
+ pkg_first = cpumask_first(&topo->core_mask);
+ topo_package = &cpu_topology[pkg_first];
+ if (cpu == pkg_first) {
+ for_each_cpu(sibling, &topo->core_mask) {
+ topo_sibling = &cpu_topology[sibling];
+ smt_first = cpumask_first(&topo_sibling->thread_mask);
+ if (sibling == smt_first)
+ topo_package->booted_cores++;
+ }
+ } else {
+ topo->booted_cores = topo_package->booted_cores;
+ }
+ }
}
void store_topology(struct sysinfo_15_1_x *info)
@@ -289,7 +300,6 @@
int rc = 0;
mutex_lock(&smp_cpu_state_mutex);
- cpumask_clear(&cpus_with_topology);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
store_topology(info);
@@ -346,9 +356,9 @@
static void set_topology_timer(void)
{
if (atomic_add_unless(&topology_poll, -1, 0))
- mod_timer(&topology_timer, jiffies + HZ / 10);
+ mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
else
- mod_timer(&topology_timer, jiffies + HZ * 60);
+ mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
}
void topology_expect_change(void)
@@ -584,7 +594,7 @@
early_param("topology", topology_setup);
static int topology_ctl_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
int enabled = topology_is_enabled();
int new_mode;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 164c028..16934fa 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -50,16 +50,8 @@
} else {
const struct exception_table_entry *fixup;
fixup = s390_search_extables(regs->psw.addr);
- if (fixup)
- regs->psw.addr = extable_fixup(fixup);
- else {
- enum bug_trap_type btt;
-
- btt = report_bug(regs->psw.addr, regs);
- if (btt == BUG_TRAP_TYPE_WARN)
- return;
+ if (!fixup || !ex_handle(fixup, regs))
die(regs, str);
- }
}
}
@@ -245,6 +237,27 @@
do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
}
+void monitor_event_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+ if (user_mode(regs))
+ return;
+
+ switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
+ case BUG_TRAP_TYPE_NONE:
+ fixup = s390_search_extables(regs->psw.addr);
+ if (fixup)
+ ex_handle(fixup, regs);
+ break;
+ case BUG_TRAP_TYPE_WARN:
+ break;
+ case BUG_TRAP_TYPE_BUG:
+ die(regs, "monitor event");
+ break;
+ }
+}
+
void kernel_stack_overflow(struct pt_regs *regs)
{
bust_spinlocks(1);
@@ -255,8 +268,25 @@
}
NOKPROBE_SYMBOL(kernel_stack_overflow);
+static void __init test_monitor_call(void)
+{
+ int val = 1;
+
+ if (!IS_ENABLED(CONFIG_BUG))
+ return;
+ asm volatile(
+ " mc 0,0\n"
+ "0: xgr %0,%0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (val));
+ if (!val)
+ panic("Monitor call doesn't work!\n");
+}
+
void __init trap_init(void)
{
sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
local_mcck_enable();
+ test_monitor_call();
}
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 6e609b1..707fd99 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -36,6 +36,19 @@
return true;
}
+static inline bool is_final_pt_regs(struct unwind_state *state,
+ struct pt_regs *regs)
+{
+ /* user mode or kernel thread pt_regs at the bottom of task stack */
+ if (task_pt_regs(state->task) == regs)
+ return true;
+
+ /* user mode pt_regs at the bottom of irq stack */
+ return state->stack_info.type == STACK_TYPE_IRQ &&
+ state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+ READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
@@ -46,16 +59,7 @@
regs = state->regs;
if (unlikely(regs)) {
- if (state->reuse_sp) {
- sp = state->sp;
- state->reuse_sp = false;
- } else {
- sp = READ_ONCE_NOCHECK(regs->gprs[15]);
- if (unlikely(outside_of_stack(state, sp))) {
- if (!update_stack_info(state, sp))
- goto out_err;
- }
- }
+ sp = state->sp;
sf = (struct stack_frame *) sp;
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
@@ -81,21 +85,25 @@
/* No back-chain, look for a pt_regs structure */
sp = state->sp + STACK_FRAME_OVERHEAD;
if (!on_stack(info, sp, sizeof(struct pt_regs)))
- goto out_stop;
+ goto out_err;
regs = (struct pt_regs *) sp;
- if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
+ if (is_final_pt_regs(state, regs))
goto out_stop;
ip = READ_ONCE_NOCHECK(regs->psw.addr);
+ sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
reliable = true;
}
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* Decode any ftrace redirection */
- if (ip == (unsigned long) return_to_handler)
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- ip, (void *) sp);
-#endif
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (sp & 0x7)
+ goto out_err;
+
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
/* Update unwind state */
state->sp = sp;
@@ -113,13 +121,11 @@
EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
- struct pt_regs *regs, unsigned long sp)
+ struct pt_regs *regs, unsigned long first_frame)
{
struct stack_info *info = &state->stack_info;
- unsigned long *mask = &state->stack_mask;
- bool reliable, reuse_sp;
struct stack_frame *sf;
- unsigned long ip;
+ unsigned long ip, sp;
memset(state, 0, sizeof(*state));
state->task = task;
@@ -131,38 +137,46 @@
return;
}
+ /* Get the instruction pointer from pt_regs or the stack frame */
+ if (regs) {
+ ip = regs->psw.addr;
+ sp = regs->gprs[15];
+ } else if (task == current) {
+ sp = current_frame_address();
+ } else {
+ sp = task->thread.ksp;
+ }
+
/* Get current stack pointer and initialize stack info */
- if (get_stack_info(sp, task, info, mask) != 0 ||
- !on_stack(info, sp, sizeof(struct stack_frame))) {
+ if (!update_stack_info(state, sp)) {
/* Something is wrong with the stack pointer */
info->type = STACK_TYPE_UNKNOWN;
state->error = true;
return;
}
- /* Get the instruction pointer from pt_regs or the stack frame */
- if (regs) {
- ip = READ_ONCE_NOCHECK(regs->psw.addr);
- reliable = true;
- reuse_sp = true;
- } else {
- sf = (struct stack_frame *) sp;
+ if (!regs) {
+ /* Stack frame is within valid stack */
+ sf = (struct stack_frame *)sp;
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
- reliable = false;
- reuse_sp = false;
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* Decode any ftrace redirection */
- if (ip == (unsigned long) return_to_handler)
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- ip, NULL);
-#endif
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
/* Update unwind state */
state->sp = sp;
state->ip = ip;
- state->reliable = reliable;
- state->reuse_sp = reuse_sp;
+ state->reliable = true;
+
+ if (!first_frame)
+ return;
+ /* Skip through the call chain to the specified starting frame */
+ while (!unwind_done(state)) {
+ if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+ if (state->sp >= first_frame)
+ break;
+ }
+ unwind_next_frame(state);
+ }
}
EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 0000000..c811b23
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2020
+ */
+#define KMSG_COMPONENT "prot_virt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
+struct uv_info __bootdata_preserved(uv_info);
+
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+EXPORT_SYMBOL(prot_virt_host);
+EXPORT_SYMBOL(uv_info);
+
+static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
+{
+ struct uv_cb_init uvcb = {
+ .header.cmd = UVC_CMD_INIT_UV,
+ .header.len = sizeof(uvcb),
+ .stor_origin = stor_base,
+ .stor_len = stor_len,
+ };
+
+ if (uv_call(0, (uint64_t)&uvcb)) {
+ pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+ uvcb.header.rc, uvcb.header.rrc);
+ return -1;
+ }
+ return 0;
+}
+
+void __init setup_uv(void)
+{
+ unsigned long uv_stor_base;
+
+ /*
+ * keep these conditions in line with kasan init code has_uv_sec_stor_limit()
+ */
+ if (!is_prot_virt_host())
+ return;
+
+ if (is_prot_virt_guest()) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not available in protected guests.");
+ return;
+ }
+
+ if (!test_facility(158)) {
+ prot_virt_host = 0;
+ pr_warn("Protected virtualization not supported by the hardware.");
+ return;
+ }
+
+ uv_stor_base = (unsigned long)memblock_alloc_try_nid(
+ uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+ MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+ if (!uv_stor_base) {
+ pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+ uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
+ memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+ goto fail;
+ }
+
+ pr_info("Reserving %luMB as ultravisor base storage\n",
+ uv_info.uv_base_stor_len >> 20);
+ return;
+fail:
+ pr_info("Disabling support for protected virtualization");
+ prot_virt_host = 0;
+}
+
+void adjust_to_uv_max(unsigned long *vmax)
+{
+ if (uv_info.max_sec_stor_addr)
+ *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+static int uv_pin_shared(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr,
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Requests the Ultravisor to destroy a guest page and make it
+ * accessible to the host. The destroy clears the page instead of
+ * exporting.
+ *
+ * @paddr: Absolute host address of page to be destroyed
+ */
+int uv_destroy_page(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_DESTR_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb)) {
+ /*
+ * Older firmware uses 107/d as an indication of a non secure
+ * page. Let us emulate the newer variant (no-op).
+ */
+ if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd)
+ return 0;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+ struct uv_cb_cfs uvcb = {
+ .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .paddr = paddr
+ };
+
+ if (uv_call(0, (u64)&uvcb))
+ return -EINVAL;
+ return 0;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+ int res;
+
+ res = page_mapcount(page);
+ if (PageSwapCache(page)) {
+ res++;
+ } else if (page_mapping(page)) {
+ res++;
+ if (page_has_private(page))
+ res++;
+ }
+ return res;
+}
+
+static int make_secure_pte(pte_t *ptep, unsigned long addr,
+ struct page *exp_page, struct uv_cb_header *uvcb)
+{
+ pte_t entry = READ_ONCE(*ptep);
+ struct page *page;
+ int expected, rc = 0;
+
+ if (!pte_present(entry))
+ return -ENXIO;
+ if (pte_val(entry) & _PAGE_INVALID)
+ return -ENXIO;
+
+ page = pte_page(entry);
+ if (page != exp_page)
+ return -ENXIO;
+ if (PageWriteback(page))
+ return -EAGAIN;
+ expected = expected_page_refs(page);
+ if (!page_ref_freeze(page, expected))
+ return -EBUSY;
+ set_bit(PG_arch_1, &page->flags);
+ rc = uv_call(0, (u64)uvcb);
+ page_ref_unfreeze(page, expected);
+ /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
+ if (rc)
+ rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+ return rc;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+ struct vm_area_struct *vma;
+ bool local_drain = false;
+ spinlock_t *ptelock;
+ unsigned long uaddr;
+ struct page *page;
+ pte_t *ptep;
+ int rc;
+
+again:
+ rc = -EFAULT;
+ mmap_read_lock(gmap->mm);
+
+ uaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(uaddr))
+ goto out;
+ vma = find_vma(gmap->mm, uaddr);
+ if (!vma)
+ goto out;
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode. If
+ * userspace is playing dirty tricky with mapping huge pages later
+ * on this will result in a segmentation fault.
+ */
+ if (is_vm_hugetlb_page(vma))
+ goto out;
+
+ rc = -ENXIO;
+ page = follow_page(vma, uaddr, FOLL_WRITE);
+ if (IS_ERR_OR_NULL(page))
+ goto out;
+
+ lock_page(page);
+ ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+ rc = make_secure_pte(ptep, uaddr, page, uvcb);
+ pte_unmap_unlock(ptep, ptelock);
+ unlock_page(page);
+out:
+ mmap_read_unlock(gmap->mm);
+
+ if (rc == -EAGAIN) {
+ wait_on_page_writeback(page);
+ } else if (rc == -EBUSY) {
+ /*
+ * If we have tried a local drain and the page refcount
+ * still does not match our expected safe value, try with a
+ * system wide drain. This is needed if the pagevecs holding
+ * the page are on a different CPU.
+ */
+ if (local_drain) {
+ lru_add_drain_all();
+ /* We give up here, and let the caller try again */
+ return -EAGAIN;
+ }
+ /*
+ * We are here if the page refcount does not match the
+ * expected safe value. The main culprits are usually
+ * pagevecs. With lru_add_drain() we drain the pagevecs
+ * on the local CPU so that hopefully the refcount will
+ * reach the expected safe value.
+ */
+ lru_add_drain();
+ local_drain = true;
+ /* And now we try again immediately after draining */
+ goto again;
+ } else if (rc == -ENXIO) {
+ if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+ return -EFAULT;
+ return -EAGAIN;
+ }
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+ .header.len = sizeof(uvcb),
+ .guest_handle = gmap->guest_handle,
+ .gaddr = gaddr,
+ };
+
+ return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+ int rc = 0;
+
+ /* Hugepage cannot be protected, so nothing to do */
+ if (PageHuge(page))
+ return 0;
+
+ /*
+ * PG_arch_1 is used in 3 places:
+ * 1. for kernel page tables during early boot
+ * 2. for storage keys of huge pages and KVM
+ * 3. As an indication that this page might be secure. This can
+ * overindicate, e.g. we set the bit before calling
+ * convert_to_secure.
+ * As secure pages are never huge, all 3 variants can co-exists.
+ */
+ if (!test_bit(PG_arch_1, &page->flags))
+ return 0;
+
+ rc = uv_pin_shared(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ rc = uv_convert_from_secure(page_to_phys(page));
+ if (!rc) {
+ clear_bit(PG_arch_1, &page->flags);
+ return 0;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
+ uv_info.inst_calls_list[0],
+ uv_info.inst_calls_list[1],
+ uv_info.inst_calls_list[2],
+ uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+ __ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_feature_indications(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications);
+}
+
+static struct kobj_attribute uv_query_feature_indications_attr =
+ __ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_guest_cpu_id + 1);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+ __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%d\n",
+ uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+ __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return scnprintf(page, PAGE_SIZE, "%lx\n",
+ uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+ __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+ &uv_query_facilities_attr.attr,
+ &uv_query_feature_indications_attr.attr,
+ &uv_query_max_guest_cpus_attr.attr,
+ &uv_query_max_guest_vms_attr.attr,
+ &uv_query_max_guest_addr_attr.attr,
+ NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+ .attrs = uv_query_attrs,
+};
+
+static struct kset *uv_query_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+ int rc = -ENOMEM;
+
+ if (!test_facility(158))
+ return 0;
+
+ uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+ if (!uv_kobj)
+ return -ENOMEM;
+
+ uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+ if (!uv_query_kset)
+ goto out_kobj;
+
+ rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+ if (!rc)
+ return 0;
+
+ kset_unregister(uv_query_kset);
+out_kobj:
+ kobject_del(uv_kobj);
+ kobject_put(uv_kobj);
+ return rc;
+}
+device_initcall(uv_info_init);
+#endif
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ed1fc08..f9da5b1 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -20,8 +20,9 @@
#include <linux/security.h>
#include <linux/memblock.h>
#include <linux/compat.h>
+#include <linux/binfmts.h>
+#include <vdso/datapage.h>
#include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
@@ -29,13 +30,6 @@
#include <asm/vdso.h>
#include <asm/facility.h>
-#ifdef CONFIG_COMPAT_VDSO
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
-static unsigned int vdso32_pages;
-static struct page **vdso32_pagelist;
-#endif
-
extern char vdso64_start, vdso64_end;
static void *vdso64_kbase = &vdso64_start;
static unsigned int vdso64_pages;
@@ -55,12 +49,6 @@
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
- if (vma->vm_mm->context.compat_mm) {
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
- }
-#endif
if (vmf->pgoff >= vdso_pages)
return VM_FAULT_SIGBUS;
@@ -76,10 +64,6 @@
unsigned long vdso_pages;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
- if (vma->vm_mm->context.compat_mm)
- vdso_pages = vdso32_pages;
-#endif
if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
return -EINVAL;
@@ -114,35 +98,12 @@
struct vdso_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
-
-/*
- * Setup vdso data page.
- */
-static void __init vdso_init_data(struct vdso_data *vd)
-{
- vd->ectg_available = test_facility(31);
-}
-
+struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data;
/*
* Allocate/free per cpu vdso data.
*/
#define SEGMENT_ORDER 2
-/*
- * The initial vdso_data structure for the boot CPU. Eventually
- * it is replaced with a properly allocated structure in vdso_init.
- * This is necessary because a valid S390_lowcore.vdso_per_cpu_data
- * pointer is required to be able to return from an interrupt or
- * program check. See the exit paths in entry.S.
- */
-struct vdso_data boot_vdso_data __initdata;
-
-void __init vdso_alloc_boot_cpu(struct lowcore *lowcore)
-{
- lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data;
-}
-
int vdso_alloc_per_cpu(struct lowcore *lowcore)
{
unsigned long segment_table, page_table, page_frame;
@@ -209,12 +170,10 @@
if (!vdso_enabled)
return 0;
+ if (is_compat_task())
+ return 0;
+
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
- mm->context.compat_mm = is_compat_task();
- if (mm->context.compat_mm)
- vdso_pages = vdso32_pages;
-#endif
/*
* vDSO has a problem and was disabled, just don't "enable" it for
* the process
@@ -227,7 +186,7 @@
* it at vdso_base which is the "natural" base for it, but we might
* fail and end up putting it elsewhere.
*/
- if (down_write_killable(&mm->mmap_sem))
+ if (mmap_write_lock_killable(mm))
return -EINTR;
vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
if (IS_ERR_VALUE(vdso_base)) {
@@ -258,7 +217,7 @@
rc = 0;
out_up:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return rc;
}
@@ -266,25 +225,6 @@
{
int i;
- vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT_VDSO
- /* Calculate the size of the 32 bit vDSO */
- vdso32_pages = ((&vdso32_end - &vdso32_start
- + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
- /* Make sure pages are in the correct state */
- vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *),
- GFP_KERNEL);
- BUG_ON(vdso32_pagelist == NULL);
- for (i = 0; i < vdso32_pages - 1; i++) {
- struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- get_page(pg);
- vdso32_pagelist[i] = pg;
- }
- vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
- vdso32_pagelist[vdso32_pages] = NULL;
-#endif
-
/* Calculate the size of the 64 bit vDSO */
vdso64_pages = ((&vdso64_end - &vdso64_start
+ PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
deleted file mode 100644
index e45fba9..0000000
--- a/arch/s390/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
deleted file mode 100644
index aee9ffb..0000000
--- a/arch/s390/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
-
-KCOV_INSTRUMENT := n
-
-obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
-
-# Build rules
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
-
-KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_31 += -m31 -s
-
-KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- -Wl,--hash-style=both
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
- $(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S FORCE
- $(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
-quiet_cmd_vdso32as = VDSO32A $@
- cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
deleted file mode 100644
index eaf9cf1..0000000
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
- .text
- .align 4
- .globl __kernel_clock_getres
- .type __kernel_clock_getres,@function
-__kernel_clock_getres:
- CFI_STARTPROC
- basr %r1,0
- la %r1,4f-.(%r1)
- chi %r2,__CLOCK_REALTIME
- je 0f
- chi %r2,__CLOCK_MONOTONIC
- je 0f
- la %r1,5f-4f(%r1)
- chi %r2,__CLOCK_REALTIME_COARSE
- je 0f
- chi %r2,__CLOCK_MONOTONIC_COARSE
- jne 3f
-0: ltr %r3,%r3
- jz 2f /* res == NULL */
-1: l %r0,0(%r1)
- xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
- st %r0,4(%r3) /* store tp->tv_usec */
-2: lhi %r2,0
- br %r14
-3: lhi %r1,__NR_clock_getres /* fallback to svc */
- svc 0
- br %r14
- CFI_ENDPROC
-4: .long __CLOCK_REALTIME_RES
-5: .long __CLOCK_COARSE_RES
- .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
deleted file mode 100644
index ada5c11..0000000
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_clock_gettime
- .type __kernel_clock_gettime,@function
-__kernel_clock_gettime:
- CFI_STARTPROC
- ahi %r15,-16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- basr %r5,0
-0: al %r5,21f-0b(%r5) /* get &_vdso_data */
- chi %r2,__CLOCK_REALTIME_COARSE
- je 10f
- chi %r2,__CLOCK_REALTIME
- je 11f
- chi %r2,__CLOCK_MONOTONIC_COARSE
- je 9f
- chi %r2,__CLOCK_MONOTONIC
- jne 19f
-
- /* CLOCK_MONOTONIC */
-1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 1b
- stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,1(%r15)
- s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,2f
- ahi %r0,-1
-2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
- lr %r2,%r0
- l %r0,__VDSO_TK_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 3f
- a %r0,__VDSO_TK_MULT(%r5)
-3: alr %r0,%r2
- al %r0,__VDSO_WTOM_NSEC(%r5)
- al %r1,__VDSO_WTOM_NSEC+4(%r5)
- brc 12,5f
- ahi %r0,1
-5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srdl %r0,0(%r2) /* >> tk->shift */
- l %r2,__VDSO_WTOM_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 1b
- basr %r5,0
-6: ltr %r0,%r0
- jnz 7f
- cl %r1,20f-6b(%r5)
- jl 8f
-7: ahi %r2,1
- sl %r1,20f-6b(%r5)
- brc 3,6b
- ahi %r0,-1
- j 6b
-8: st %r2,0(%r3) /* store tp->tv_sec */
- st %r1,4(%r3) /* store tp->tv_nsec */
- lhi %r2,0
- ahi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* CLOCK_MONOTONIC_COARSE */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 9b
- l %r2,__VDSO_WTOM_CRS_SEC+4(%r5)
- l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 9b
- j 8b
-
- /* CLOCK_REALTIME_COARSE */
-10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 10b
- l %r2,__VDSO_XTIME_CRS_SEC+4(%r5)
- l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 10b
- j 17f
-
- /* CLOCK_REALTIME */
-11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 11b
- stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
- s %r0,1(%r15) /* no - ts_steering_end */
- sl %r1,5(%r15)
- brc 3,22f
- ahi %r0,-1
-22: ltr %r0,%r0 /* past end of steering? */
- jm 24f
- srdl %r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 23f
- lcr %r0,%r0 /* negative TOD offset */
- lcr %r1,%r1
- je 23f
- ahi %r0,-1
-23: a %r0,1(%r15) /* add TOD timestamp */
- al %r1,5(%r15)
- brc 12,25f
- ahi %r0,1
- j 25f
-24: lm %r0,%r1,1(%r15) /* load TOD timestamp */
-25: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,12f
- ahi %r0,-1
-12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
- lr %r2,%r0
- l %r0,__VDSO_TK_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 13f
- a %r0,__VDSO_TK_MULT(%r5)
-13: alr %r0,%r2
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,14f
- ahi %r0,1
-14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srdl %r0,0(%r2) /* >> tk->shift */
- l %r2,__VDSO_XTIME_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 11b
- basr %r5,0
-15: ltr %r0,%r0
- jnz 16f
- cl %r1,20f-15b(%r5)
- jl 17f
-16: ahi %r2,1
- sl %r1,20f-15b(%r5)
- brc 3,15b
- ahi %r0,-1
- j 15b
-17: st %r2,0(%r3) /* store tp->tv_sec */
- st %r1,4(%r3) /* store tp->tv_nsec */
- lhi %r2,0
- ahi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* Fallback to system call */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-19: lhi %r1,__NR_clock_gettime
- svc 0
- ahi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-
-20: .long 1000000000
-21: .long _vdso_data - 0b
- .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
deleted file mode 100644
index 25515f3..0000000
--- a/arch/s390/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of getcpu() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2016
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/dwarf.h>
-
- .text
- .align 4
- .globl __kernel_getcpu
- .type __kernel_getcpu,@function
-__kernel_getcpu:
- CFI_STARTPROC
- la %r4,0
- sacf 256
- l %r5,__VDSO_CPU_NR(%r4)
- l %r4,__VDSO_NODE_ID(%r4)
- sacf 0
- ltr %r2,%r2
- jz 2f
- st %r5,0(%r2)
-2: ltr %r3,%r3
- jz 3f
- st %r4,0(%r3)
-3: lhi %r2,0
- br %r14
- CFI_ENDPROC
- .size __kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index b23063f..0000000
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_gettimeofday
- .type __kernel_gettimeofday,@function
-__kernel_gettimeofday:
- CFI_STARTPROC
- ahi %r15,-16
- CFI_ADJUST_CFA_OFFSET 16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- basr %r5,0
-0: al %r5,13f-0b(%r5) /* get &_vdso_data */
-1: ltr %r3,%r3 /* check if tz is NULL */
- je 2f
- mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
-2: ltr %r2,%r2 /* check if tv is NULL */
- je 10f
- l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 1b
- stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
- s %r0,1(%r15)
- sl %r1,5(%r15)
- brc 3,14f
- ahi %r0,-1
-14: ltr %r0,%r0 /* past end of steering? */
- jm 16f
- srdl %r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 15f
- lcr %r0,%r0 /* negative TOD offset */
- lcr %r1,%r1
- je 15f
- ahi %r0,-1
-15: a %r0,1(%r15) /* add TOD timestamp */
- al %r1,5(%r15)
- brc 12,17f
- ahi %r0,1
- j 17f
-16: lm %r0,%r1,1(%r15) /* load TOD timestamp */
-17: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,3f
- ahi %r0,-1
-3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
- st %r0,0(%r15)
- l %r0,__VDSO_TK_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 4f
- a %r0,__VDSO_TK_MULT(%r5)
-4: al %r0,0(%r15)
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,5f
- ahi %r0,1
-5: mvc 0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 1b
- l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srdl %r0,0(%r4) /* >> tk->shift */
- l %r4,0(%r15) /* get tv_sec from stack */
- basr %r5,0
-6: ltr %r0,%r0
- jnz 7f
- cl %r1,11f-6b(%r5)
- jl 8f
-7: ahi %r4,1
- sl %r1,11f-6b(%r5)
- brc 3,6b
- ahi %r0,-1
- j 6b
-8: st %r4,0(%r2) /* store tv->tv_sec */
- ltr %r1,%r1
- m %r0,12f-6b(%r5)
- jnm 9f
- al %r0,12f-6b(%r5)
-9: srl %r0,6
- st %r0,4(%r2) /* store tv->tv_usec */
-10: slr %r2,%r2
- ahi %r15,16
- CFI_ADJUST_CFA_OFFSET -16
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-11: .long 1000000000
-12: .long 274877907
-13: .long _vdso_data - 0b
- .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
deleted file mode 100644
index db19d06..0000000
--- a/arch/s390/kernel/vdso32/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
- .long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 721c495..0000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
-
-SECTIONS
-{
- . = VDSO32_LBASE + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- .note : { *(.note.*) } :text :note
-
- . = ALIGN(16);
- .text : {
- *(.text .stub .text.* .gnu.linkonce.t.*)
- } :text
- PROVIDE(__etext = .);
- PROVIDE(_etext = .);
- PROVIDE(etext = .);
-
- /*
- * Other stuff is appended to the text segment:
- */
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
- .rodata1 : { *(.rodata1) }
-
- .dynamic : { *(.dynamic) } :text :dynamic
-
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
- .rela.dyn ALIGN(8) : { *(.rela.dyn) }
- .got ALIGN(8) : { *(.got .toc) }
-
- _end = .;
- PROVIDE(end = .);
-
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the
- * beginning of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* DWARF 3 */
- .debug_pubtypes 0 : { *(.debug_pubtypes) }
- .debug_ranges 0 : { *(.debug_ranges) }
- .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
- . = ALIGN(PAGE_SIZE);
- PROVIDE(_vdso_data = .);
-
- /DISCARD/ : {
- *(.note.GNU-stack)
- *(.branch_lt)
- *(.data .data.* .gnu.linkonce.d.* .sdata*)
- *(.bss .sbss .dynbss .dynsbss)
- }
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME 0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- VDSO_VERSION_STRING {
- global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_gettimeofday;
- __kernel_clock_gettime;
- __kernel_clock_getres;
- __kernel_getcpu;
-
- local: *;
- };
-}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index de2fb93..0000000
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
- __PAGE_ALIGNED_DATA
-
- .globl vdso32_start, vdso32_end
- .balign PAGE_SIZE
-vdso32_start:
- .incbin "arch/s390/kernel/vdso32/vdso32.so"
- .balign PAGE_SIZE
-vdso32_end:
-
- .previous
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
index 3fd18cf..4ec8068 100644
--- a/arch/s390/kernel/vdso64/.gitignore
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 4a66a1c..13cc5a3 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,17 +1,23 @@
# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
+# List of files in the vdso
KCOV_INSTRUMENT := n
+ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
+ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
-obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
+include $(srctree)/lib/vdso/Makefile
+obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o
+obj-cvdso64 = vdso64_generic.o
+CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
# Build rules
-targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64))
KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
KBUILD_AFLAGS_64 += -m64 -s
@@ -19,13 +25,13 @@
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \
- --hash-style=both --build-id -T
+ --hash-style=both --build-id=sha1 -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
obj-y += vdso64_wrapper.o
-extra-y += vdso64.lds
+targets += vdso64.lds
CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
# Disable gcov profiling, ubsan and kasan for VDSO code
@@ -37,7 +43,7 @@
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
$(call if_changed,ld)
# strip rule for the .so file
@@ -49,9 +55,14 @@
$(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
+$(obj-cvdso64): %.o: %.c FORCE
+ $(call if_changed_dep,vdso64cc)
+
# actual build commands
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso64cc = VDSO64C $@
+ cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
deleted file mode 100644
index 0c79caa..0000000
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
- .text
- .align 4
- .globl __kernel_clock_getres
- .type __kernel_clock_getres,@function
-__kernel_clock_getres:
- CFI_STARTPROC
- larl %r1,3f
- lg %r0,0(%r1)
- cghi %r2,__CLOCK_REALTIME_COARSE
- je 0f
- cghi %r2,__CLOCK_MONOTONIC_COARSE
- je 0f
- larl %r1,_vdso_data
- llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1)
- cghi %r2,__CLOCK_REALTIME
- je 0f
- cghi %r2,__CLOCK_MONOTONIC
- je 0f
- cghi %r2,__CLOCK_THREAD_CPUTIME_ID
- je 0f
- cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
- jne 2f
- larl %r5,_vdso_data
- icm %r0,15,__LC_ECTG_OK(%r5)
- jz 2f
-0: ltgr %r3,%r3
- jz 1f /* res == NULL */
- xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
- stg %r0,8(%r3) /* store tp->tv_usec */
-1: lghi %r2,0
- br %r14
-2: lghi %r1,__NR_clock_getres /* fallback to svc */
- svc 0
- br %r14
- CFI_ENDPROC
-3: .quad __CLOCK_COARSE_RES
- .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
deleted file mode 100644
index 9d2ee79..0000000
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_clock_gettime
- .type __kernel_clock_gettime,@function
-__kernel_clock_gettime:
- CFI_STARTPROC
- aghi %r15,-16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- larl %r5,_vdso_data
- cghi %r2,__CLOCK_REALTIME_COARSE
- je 4f
- cghi %r2,__CLOCK_REALTIME
- je 5f
- cghi %r2,-3 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
- je 9f
- cghi %r2,__CLOCK_MONOTONIC_COARSE
- je 3f
- cghi %r2,__CLOCK_MONOTONIC
- jne 12f
-
- /* CLOCK_MONOTONIC */
-0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 0b
- stcke 0(%r15) /* Store TOD clock */
- lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- lg %r0,__VDSO_WTOM_SEC(%r5)
- lg %r1,1(%r15)
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
- alg %r1,__VDSO_WTOM_NSEC(%r5)
- srlg %r1,%r1,0(%r2) /* >> tk->shift */
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 0b
- larl %r5,13f
-1: clg %r1,0(%r5)
- jl 2f
- slg %r1,0(%r5)
- aghi %r0,1
- j 1b
-2: stg %r0,0(%r3) /* store tp->tv_sec */
- stg %r1,8(%r3) /* store tp->tv_nsec */
- lghi %r2,0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* CLOCK_MONOTONIC_COARSE */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 3b
- lg %r0,__VDSO_WTOM_CRS_SEC(%r5)
- lg %r1,__VDSO_WTOM_CRS_NSEC(%r5)
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 3b
- j 2b
-
- /* CLOCK_REALTIME_COARSE */
-4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 4b
- lg %r0,__VDSO_XTIME_CRS_SEC(%r5)
- lg %r1,__VDSO_XTIME_CRS_NSEC(%r5)
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 4b
- j 7f
-
- /* CLOCK_REALTIME */
-5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 5b
- stcke 0(%r15) /* Store TOD clock */
- lg %r1,1(%r15)
- lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
- slgr %r0,%r1 /* now - ts_steering_end */
- ltgr %r0,%r0 /* past end of steering ? */
- jm 17f
- srlg %r0,%r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 18f
- lcgr %r0,%r0 /* negative TOD offset */
-18: algr %r1,%r0 /* add steering offset */
-17: lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- srlg %r1,%r1,0(%r2) /* >> tk->shift */
- lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 5b
- larl %r5,13f
-6: clg %r1,0(%r5)
- jl 7f
- slg %r1,0(%r5)
- aghi %r0,1
- j 6b
-7: stg %r0,0(%r3) /* store tp->tv_sec */
- stg %r1,8(%r3) /* store tp->tv_nsec */
- lghi %r2,0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* CPUCLOCK_VIRT for this thread */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9: lghi %r4,0
- icm %r0,15,__VDSO_ECTG_OK(%r5)
- jz 12f
- sacf 256 /* Magic ectg instruction */
- .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
- sacf 0
- algr %r1,%r0 /* r1 = cputime as TOD value */
- mghi %r1,1000 /* convert to nanoseconds */
- srlg %r1,%r1,12 /* r1 = cputime in nanosec */
- lgr %r4,%r1
- larl %r5,13f
- srlg %r1,%r1,9 /* divide by 1000000000 */
- mlg %r0,8(%r5)
- srlg %r0,%r0,11 /* r0 = tv_sec */
- stg %r0,0(%r3)
- msg %r0,0(%r5) /* calculate tv_nsec */
- slgr %r4,%r0 /* r4 = tv_nsec */
- stg %r4,8(%r3)
- lghi %r2,0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* Fallback to system call */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-12: lghi %r1,__NR_clock_gettime
- svc 0
- aghi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-
-13: .quad 1000000000
-14: .quad 19342813113834067
- .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
index 2446e9d..3c04f73 100644
--- a/arch/s390/kernel/vdso64/getcpu.S
+++ b/arch/s390/kernel/vdso64/getcpu.S
@@ -16,10 +16,8 @@
.type __kernel_getcpu,@function
__kernel_getcpu:
CFI_STARTPROC
- la %r4,0
sacf 256
- l %r5,__VDSO_CPU_NR(%r4)
- l %r4,__VDSO_NODE_ID(%r4)
+ lm %r4,%r5,__VDSO_GETCPU_VAL(%r0)
sacf 0
ltgr %r2,%r2
jz 2f
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index aebe10d..0000000
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_gettimeofday
- .type __kernel_gettimeofday,@function
-__kernel_gettimeofday:
- CFI_STARTPROC
- aghi %r15,-16
- CFI_ADJUST_CFA_OFFSET 16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- larl %r5,_vdso_data
-0: ltgr %r3,%r3 /* check if tz is NULL */
- je 1f
- mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
-1: ltgr %r2,%r2 /* check if tv is NULL */
- je 4f
- lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
- tmll %r4,0x0001 /* pending update ? loop */
- jnz 0b
- stcke 0(%r15) /* Store TOD clock */
- lg %r1,1(%r15)
- lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
- slgr %r0,%r1 /* now - ts_steering_end */
- ltgr %r0,%r0 /* past end of steering ? */
- jm 6f
- srlg %r0,%r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 7f
- lcgr %r0,%r0 /* negative TOD offset */
-7: algr %r1,%r0 /* add steering offset */
-6: sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
- alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
- clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
- jne 0b
- lgf %r5,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srlg %r1,%r1,0(%r5) /* >> tk->shift */
- larl %r5,5f
-2: clg %r1,0(%r5)
- jl 3f
- slg %r1,0(%r5)
- aghi %r0,1
- j 2b
-3: stg %r0,0(%r2) /* store tv->tv_sec */
- slgr %r0,%r0 /* tv_nsec -> tv_usec */
- ml %r0,8(%r5)
- srlg %r0,%r0,6
- stg %r0,8(%r2) /* store tv->tv_usec */
-4: lghi %r2,0
- aghi %r15,16
- CFI_ADJUST_CFA_OFFSET -16
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-5: .quad 1000000000
- .long 274877907
- .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c
new file mode 100644
index 0000000..a8cef7e
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_generic.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/vdso/gettimeofday.c"
+
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_getres(clock, ts);
+}
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
new file mode 100644
index 0000000..a775d7e
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/vdso.h>
+#include <asm/unistd.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
+
+/*
+ * Older glibc version called vdso without allocating a stackframe. This wrapper
+ * is just used to allocate a stackframe. See
+ * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e
+ * for details.
+ */
+.macro vdso_func func
+ .globl __kernel_\func
+ .type __kernel_\func,@function
+ .align 8
+__kernel_\func:
+ CFI_STARTPROC
+ aghi %r15,-WRAPPER_FRAME_SIZE
+ CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
+ CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+ stg %r14,STACK_FRAME_OVERHEAD(%r15)
+ brasl %r14,__s390_vdso_\func
+ lg %r14,STACK_FRAME_OVERHEAD(%r15)
+ aghi %r15,WRAPPER_FRAME_SIZE
+ CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+ CFI_RESTORE 15
+ br %r14
+ CFI_ENDPROC
+ .size __kernel_\func,.-__kernel_\func
+.endm
+
+vdso_func gettimeofday
+vdso_func clock_getres
+vdso_func clock_gettime
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 7e0eb40..177ccfb 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -15,6 +15,8 @@
/* Handle ro_after_init data on our own. */
#define RO_AFTER_INIT_DATA
+#define EMITS_PT_NOTE
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/vmlinux.lds.h>
@@ -50,11 +52,7 @@
_etext = .; /* End of text section */
} :text = 0x0700
- NOTES :text :note
-
- .dummy : { *(.dummy) } :data
-
- RO_DATA_SECTION(PAGE_SIZE)
+ RO_DATA(PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
_sdata = .; /* Start of data section */
@@ -64,12 +62,12 @@
.data..ro_after_init : {
*(.data..ro_after_init)
JUMP_TABLE_DATA
- }
+ } :data
EXCEPTION_TABLE(16)
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
- RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
BOOT_DATA_PRESERVED
_edata = .; /* End of data section */
@@ -183,6 +181,7 @@
/* Debugging sections. */
STABS_DEBUG
DWARF_DEBUG
+ ELF_DETAILS
/* Sections to be discarded */
DISCARDS
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 6e60cc2..579ec3a 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -248,9 +248,9 @@
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
__attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
/*
* Sorted add to a list. List is linear searched until first bigger
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index d3db3d7..67a8e77 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -7,7 +7,7 @@
menuconfig VIRTUALIZATION
def_bool y
prompt "KVM"
- ---help---
+ help
Say Y here to get to see options for using your Linux host to run other
operating systems inside virtual machines (guests).
This option alone does not add any kernel code.
@@ -33,7 +33,7 @@
select HAVE_KVM_NO_POLL
select SRCU
select KVM_VFIO
- ---help---
+ help
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
on any 64bit machine.
@@ -49,14 +49,10 @@
config KVM_S390_UCONTROL
bool "Userspace controlled virtual machines"
depends on KVM
- ---help---
+ help
Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
controlled by userspace.
If unsure, say N.
-# OK, it's a little counter-intuitive to do this, but it puts it neatly under
-# the virtualization menu.
-source "drivers/vhost/Kconfig"
-
endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 05ee90a..12decca 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -9,6 +9,6 @@
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
+kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 45634b3..5b8ec1c 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -2,7 +2,7 @@
/*
* handling diagnose instructions
*
- * Copyright IBM Corp. 2008, 2011
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -10,7 +10,6 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
-#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
@@ -158,14 +157,28 @@
tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
vcpu->stat.diagnose_9c++;
- VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
+ /* yield to self */
if (tid == vcpu->vcpu_id)
- return 0;
+ goto no_yield;
+ /* yield to invalid */
tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
- if (tcpu)
- kvm_vcpu_yield_to(tcpu);
+ if (!tcpu)
+ goto no_yield;
+
+ /* target already running */
+ if (READ_ONCE(tcpu->cpu) >= 0)
+ goto no_yield;
+
+ if (kvm_vcpu_yield_to(tcpu) <= 0)
+ goto no_yield;
+
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid);
+ return 0;
+no_yield:
+ VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
+ vcpu->stat.diagnose_9c_ignored++;
return 0;
}
@@ -187,6 +200,10 @@
return -EOPNOTSUPP;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 07d30ff..b9f85b2 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -9,8 +9,8 @@
#include <linux/vmalloc.h>
#include <linux/mm_types.h>
#include <linux/err.h>
+#include <linux/pgtable.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -505,7 +505,7 @@
switch (prot) {
case PROT_TYPE_IEP:
tec->b61 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_LA:
tec->b56 = 1;
break;
@@ -514,12 +514,12 @@
break;
case PROT_TYPE_ALC:
tec->b60 = 1;
- /* FALL THROUGH */
+ fallthrough;
case PROT_TYPE_DAT:
tec->b61 = 1;
break;
}
- /* FALL THROUGH */
+ fallthrough;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
case PGM_REGION_FIRST_TRANS:
@@ -534,7 +534,7 @@
tec->addr = gva >> PAGE_SHIFT;
tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
- /* FALL THROUGH */
+ fallthrough;
case PGM_ALEN_TRANSLATION:
case PGM_ALE_SEQUENCE:
case PGM_ASTE_VALIDITY:
@@ -677,7 +677,7 @@
dat_protection |= rfte.p;
ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -695,7 +695,7 @@
dat_protection |= rste.p;
ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -723,7 +723,7 @@
dat_protection |= rtte.fc0.p;
ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
}
- /* fallthrough */
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
@@ -976,7 +976,9 @@
* kvm_s390_shadow_tables - walk the guest page table and create shadow tables
* @sg: pointer to the shadow guest address space structure
* @saddr: faulting address in the shadow gmap
- * @pgt: pointer to the page table address result
+ * @pgt: pointer to the beginning of the page table for the given address if
+ * successful (return value 0), or to the first invalid DAT entry in
+ * case of exceptions (return value > 0)
* @fake: pgt references contiguous guest memory block, not a pgtable
*/
static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
@@ -1034,6 +1036,7 @@
rfte.val = ptr;
goto shadow_r2t;
}
+ *pgt = ptr + vaddr.rfx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
if (rc)
return rc;
@@ -1050,7 +1053,8 @@
rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION2: {
union region2_table_entry rste;
@@ -1059,6 +1063,7 @@
rste.val = ptr;
goto shadow_r3t;
}
+ *pgt = ptr + vaddr.rsx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
if (rc)
return rc;
@@ -1076,7 +1081,8 @@
rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_REGION3: {
union region3_table_entry rtte;
@@ -1085,6 +1091,7 @@
rtte.val = ptr;
goto shadow_sgt;
}
+ *pgt = ptr + vaddr.rtx * 8;
rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
if (rc)
return rc;
@@ -1111,7 +1118,8 @@
rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
if (rc)
return rc;
- } /* fallthrough */
+ }
+ fallthrough;
case ASCE_TYPE_SEGMENT: {
union segment_table_entry ste;
@@ -1120,6 +1128,7 @@
ste.val = ptr;
goto shadow_pgt;
}
+ *pgt = ptr + vaddr.sx * 8;
rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
if (rc)
return rc;
@@ -1154,6 +1163,8 @@
* @vcpu: virtual cpu
* @sg: pointer to the shadow guest address space structure
* @saddr: faulting address in the shadow gmap
+ * @datptr: will contain the address of the faulting DAT table entry, or of
+ * the valid leaf, plus some flags
*
* Returns: - 0 if the shadow fault was successfully resolved
* - > 0 (pgm exception code) on exceptions while faulting
@@ -1162,15 +1173,15 @@
* - -ENOMEM if out of memory
*/
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
- unsigned long saddr)
+ unsigned long saddr, unsigned long *datptr)
{
union vaddress vaddr;
union page_table_entry pte;
- unsigned long pgt;
+ unsigned long pgt = 0;
int dat_protection, fake;
int rc;
- down_read(&sg->mm->mmap_sem);
+ mmap_read_lock(sg->mm);
/*
* We don't want any guest-2 tables to change - so the parent
* tables/pointers we read stay valid - unshadowing is however
@@ -1188,8 +1199,20 @@
pte.val = pgt + vaddr.px * PAGE_SIZE;
goto shadow_page;
}
- if (!rc)
- rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+
+ switch (rc) {
+ case PGM_SEGMENT_TRANSLATION:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_FIRST_TRANS:
+ pgt |= PEI_NOT_PTE;
+ break;
+ case 0:
+ pgt += vaddr.px * 8;
+ rc = gmap_read_table(sg->parent, pgt, &pte.val);
+ }
+ if (datptr)
+ *datptr = pgt | dat_protection * PEI_DAT_PROT;
if (!rc && pte.i)
rc = PGM_PAGE_TRANSLATION;
if (!rc && pte.z)
@@ -1199,6 +1222,6 @@
if (!rc)
rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
ipte_unlock(vcpu);
- up_read(&sg->mm->mmap_sem);
+ mmap_read_unlock(sg->mm);
return rc;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 4c56de5..7c72a5e 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -387,7 +387,11 @@
int ipte_lock_held(struct kvm_vcpu *vcpu);
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
+/* MVPG PEI indication bits */
+#define PEI_DAT_PROT 2
+#define PEI_NOT_PTE 4
+
int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
- unsigned long saddr);
+ unsigned long saddr, unsigned long *datptr);
#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a389fa8..e7a7c49 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -2,7 +2,7 @@
/*
* in-kernel handling for sie intercepts
*
- * Copyright IBM Corp. 2008, 2014
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -12,10 +12,10 @@
#include <linux/errno.h>
#include <linux/pagemap.h>
-#include <asm/kvm_host.h>
#include <asm/asm-offsets.h>
#include <asm/irq.h>
#include <asm/sysinfo.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -79,6 +79,10 @@
return rc;
}
+ /*
+ * no need to check the return value of vcpu_stop as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
return -EOPNOTSUPP;
@@ -231,6 +235,13 @@
vcpu->stat.exit_program_interruption++;
+ /*
+ * Intercept 8 indicates a loop of specification exceptions
+ * for protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EOPNOTSUPP;
+
if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
rc = kvm_s390_handle_per_event(vcpu);
if (rc)
@@ -384,7 +395,7 @@
goto out;
}
- if (addr & ~PAGE_MASK)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
sctns = (void *)get_zeroed_page(GFP_KERNEL);
@@ -395,10 +406,15 @@
out:
if (!cc) {
- r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
- if (r) {
- free_page((unsigned long)sctns);
- return kvm_s390_inject_prog_cond(vcpu, r);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
+ sctns, PAGE_SIZE);
+ } else {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+ if (r) {
+ free_page((unsigned long)sctns);
+ return kvm_s390_inject_prog_cond(vcpu, r);
+ }
}
}
@@ -444,6 +460,77 @@
return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
}
+static int handle_pv_spx(struct kvm_vcpu *vcpu)
+{
+ u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+
+ kvm_s390_set_prefix(vcpu, pref);
+ trace_kvm_s390_handle_prefix(vcpu, 1, pref);
+ return 0;
+}
+
+static int handle_pv_sclp(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ /*
+ * 2 cases:
+ * a: an sccb answering interrupt was already pending or in flight.
+ * As the sccb value is not known we can simply set some value to
+ * trigger delivery of a saved SCCB. UV will then use its saved
+ * copy of the SCCB value.
+ * b: an error SCCB interrupt needs to be injected so we also inject
+ * a fake SCCB address. Firmware will use the proper one.
+ * This makes sure, that both errors and real sccb returns will only
+ * be delivered after a notification intercept (instruction has
+ * finished) but not after others.
+ */
+ fi->srv_signal.ext_params |= 0x43000;
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+{
+ struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+ struct uv_cb_cts uvcb = {
+ .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(vcpu->kvm),
+ .gaddr = guest_uvcb->paddr,
+ };
+ int rc;
+
+ if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) {
+ WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n",
+ guest_uvcb->header.cmd);
+ return 0;
+ }
+ rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+ /*
+ * If the unpin did not succeed, the guest will exit again for the UVC
+ * and we will retry the unpin.
+ */
+ if (rc == -EINVAL)
+ return 0;
+ return rc;
+}
+
+static int handle_pv_notification(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->ipa == 0xb210)
+ return handle_pv_spx(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb220)
+ return handle_pv_sclp(vcpu);
+ if (vcpu->arch.sie_block->ipa == 0xb9a4)
+ return handle_pv_uvc(vcpu);
+
+ return handle_instruction(vcpu);
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
int rc, per_rc = 0;
@@ -480,6 +567,28 @@
case ICPT_KSS:
rc = kvm_s390_skey_check_enable(vcpu);
break;
+ case ICPT_MCHKREQ:
+ case ICPT_INT_ENABLE:
+ /*
+ * PSW bit 13 or a CR (0, 6, 14) changed and we might
+ * now be able to deliver interrupts. The pre-run code
+ * will take care of this.
+ */
+ rc = 0;
+ break;
+ case ICPT_PV_INSTR:
+ rc = handle_instruction(vcpu);
+ break;
+ case ICPT_PV_NOTIFY:
+ rc = handle_pv_notification(vcpu);
+ break;
+ case ICPT_PV_PREF:
+ rc = 0;
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu));
+ gmap_convert_to_secure(vcpu->arch.gmap,
+ kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fa9483a..b51ab19 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2,7 +2,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008, 2015
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
@@ -324,8 +324,11 @@
static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
{
- return vcpu->kvm->arch.float_int.pending_irqs |
- vcpu->arch.local_int.pending_irqs;
+ unsigned long pending = vcpu->kvm->arch.float_int.pending_irqs |
+ vcpu->arch.local_int.pending_irqs;
+
+ pending &= ~vcpu->kvm->arch.float_int.masked_irqs;
+ return pending;
}
static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
@@ -383,10 +386,18 @@
__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
- if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+ if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+ __clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
+ }
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ /* PV guest cpus can have a single interruption injected at a time. */
+ if (kvm_s390_pv_cpu_get_handle(vcpu) &&
+ vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
+ active_mask &= ~(IRQ_PEND_EXT_II_MASK |
+ IRQ_PEND_IO_MASK |
+ IRQ_PEND_MCHK_MASK);
/*
* Check both floating and local interrupt's cr14 because
* bit IRQ_PEND_MCHK_REP could be set in both cases.
@@ -479,19 +490,23 @@
static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_cputm++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
- (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CPU_TIMER;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -499,19 +514,23 @@
static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
vcpu->stat.deliver_ckc++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
0, 0);
-
- rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
- (u16 __user *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_CLK_COMP;
+ } else {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+ (u16 __user *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -553,6 +572,20 @@
union mci mci;
int rc;
+ /*
+ * All other possible payload for a machine check (e.g. the register
+ * contents in the save area) will be handled by the ultravisor, as
+ * the hypervisor does not not have the needed information for
+ * protected guests.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
+ vcpu->arch.sie_block->mcic = mchk->mcic;
+ vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
+ vcpu->arch.sie_block->edc = mchk->ext_damage_code;
+ return 0;
+ }
+
mci.val = mchk->mcic;
/* take care of lazy register loading */
save_fpu_regs();
@@ -696,17 +729,21 @@
static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- int rc;
+ int rc = 0;
VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
vcpu->stat.deliver_restart_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
- rc = write_guest_lc(vcpu,
- offsetof(struct lowcore, restart_old_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_RESTART;
+ } else {
+ rc = write_guest_lc(vcpu,
+ offsetof(struct lowcore, restart_old_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ }
clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
return rc ? -EFAULT : 0;
}
@@ -748,6 +785,12 @@
vcpu->stat.deliver_emergency_signal++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
cpu_addr, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EMERGENCY_SIG;
+ vcpu->arch.sie_block->extcpuaddr = cpu_addr;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
(u16 *)__LC_EXT_INT_CODE);
@@ -776,6 +819,12 @@
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
KVM_S390_INT_EXTERNAL_CALL,
extcall.code, 0);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_EXTERNAL_CALL;
+ vcpu->arch.sie_block->extcpuaddr = extcall.code;
+ return 0;
+ }
rc = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
(u16 *)__LC_EXT_INT_CODE);
@@ -787,6 +836,21 @@
return rc ? -EFAULT : 0;
}
+static int __deliver_prog_pv(struct kvm_vcpu *vcpu, u16 code)
+{
+ switch (code) {
+ case PGM_SPECIFICATION:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_SPECIFICATION;
+ break;
+ case PGM_OPERAND:
+ vcpu->arch.sie_block->iictl = IICTL_CODE_OPERAND;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -807,6 +871,10 @@
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
pgm_info.code, 0);
+ /* PER is handled by the ultravisor */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
+
switch (pgm_info.code & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
case PGM_ASX_TRANSLATION:
@@ -818,7 +886,7 @@
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
nullifying = true;
- /* fall through */
+ fallthrough;
case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
@@ -902,20 +970,49 @@
return rc ? -EFAULT : 0;
}
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int write_sclp(struct kvm_vcpu *vcpu, u32 parm)
+{
+ int rc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+ vcpu->arch.sie_block->eic = EXT_IRQ_SERVICE_SIG;
+ vcpu->arch.sie_block->eiparams = parm;
+ return 0;
+ }
+
+ rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, parm,
+ (u32 *)__LC_EXT_PARAMS);
+
+ return rc ? -EFAULT : 0;
+}
+
static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_ext_info ext;
- int rc = 0;
spin_lock(&fi->lock);
- if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs) ||
+ !(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
spin_unlock(&fi->lock);
return 0;
}
ext = fi->srv_signal;
memset(&fi->srv_signal, 0, sizeof(ext));
clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
@@ -924,16 +1021,31 @@
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
ext.ext_params, 0);
- rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= put_guest_lc(vcpu, ext.ext_params,
- (u32 *)__LC_EXT_PARAMS);
+ return write_sclp(vcpu, ext.ext_params);
+}
- return rc ? -EFAULT : 0;
+static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_ext_info ext;
+
+ spin_lock(&fi->lock);
+ if (!(test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs))) {
+ spin_unlock(&fi->lock);
+ return 0;
+ }
+ ext = fi->srv_signal;
+ /* only clear the event bit */
+ fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
+ clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: sclp parameter event");
+ vcpu->stat.deliver_service_signal++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+ ext.ext_params, 0);
+
+ return write_sclp(vcpu, SCCB_EVENT_PENDING);
}
static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
@@ -1028,6 +1140,15 @@
{
int rc;
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->iictl = IICTL_CODE_IO;
+ vcpu->arch.sie_block->subchannel_id = io->subchannel_id;
+ vcpu->arch.sie_block->subchannel_nr = io->subchannel_nr;
+ vcpu->arch.sie_block->io_int_parm = io->io_int_parm;
+ vcpu->arch.sie_block->io_int_word = io->io_int_word;
+ return 0;
+ }
+
rc = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
@@ -1329,6 +1450,9 @@
case IRQ_PEND_EXT_SERVICE:
rc = __deliver_service(vcpu);
break;
+ case IRQ_PEND_EXT_SERVICE_EV:
+ rc = __deliver_service_ev(vcpu);
+ break;
case IRQ_PEND_PFAULT_DONE:
rc = __deliver_pfault_done(vcpu);
break;
@@ -1421,7 +1545,7 @@
if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
return -EINVAL;
- if (sclp.has_sigpif)
+ if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
return sca_inject_ext_call(vcpu, src_id);
if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
@@ -1477,8 +1601,7 @@
return 0;
}
-static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
- struct kvm_s390_irq *irq)
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -1682,9 +1805,6 @@
return inti;
}
-#define SCCB_MASK 0xFFFFFFF8
-#define SCCB_EVENT_PENDING 0x3
-
static int __inject_service(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti)
{
@@ -1693,6 +1813,11 @@
kvm->stat.inject_service_signal++;
spin_lock(&fi->lock);
fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+
+ /* We always allow events, track them separately from the sccb ints */
+ if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
+ set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+
/*
* Early versions of the QEMU s390 bios will inject several
* service interrupts after another without handling a
@@ -1774,7 +1899,14 @@
kvm->stat.inject_io++;
isc = int_word_to_isc(inti->io.io_int_word);
- if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
+ /*
+ * Do not make use of gisa in protected mode. We do not use the lock
+ * checking variant as this is just a performance optimization and we
+ * do not hold the lock here. This is ok as the code will pick
+ * interrupts from both "lists" for delivery.
+ */
+ if (!kvm_s390_pv_get_handle(kvm) &&
+ gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
gisa_set_ipm_gisc(gi->origin, isc);
kfree(inti);
@@ -1835,7 +1967,8 @@
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
if (!(type & KVM_S390_INT_IO_AI_MASK &&
- kvm->arch.gisa_int.origin))
+ kvm->arch.gisa_int.origin) ||
+ kvm_s390_pv_cpu_get_handle(dst_vcpu))
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
break;
default:
@@ -1982,6 +2115,13 @@
return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
}
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+ return test_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+}
+
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -2007,7 +2147,7 @@
rc = __inject_sigp_stop(vcpu, irq);
break;
case KVM_S390_RESTART:
- rc = __inject_sigp_restart(vcpu, irq);
+ rc = __inject_sigp_restart(vcpu);
break;
case KVM_S390_INT_CLOCK_COMP:
rc = __inject_ckc(vcpu);
@@ -2081,6 +2221,10 @@
struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
int i;
+ mutex_lock(&kvm->lock);
+ if (!kvm_s390_pv_is_protected(kvm))
+ fi->masked_irqs = 0;
+ mutex_unlock(&kvm->lock);
spin_lock(&fi->lock);
fi->pending_irqs = 0;
memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
@@ -2147,7 +2291,8 @@
n++;
}
}
- if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
+ test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) {
if (n == max_irqs) {
/* signal userspace to try again */
ret = -ENOMEM;
@@ -2328,9 +2473,6 @@
if (!adapter)
return -ENOMEM;
- INIT_LIST_HEAD(&adapter->maps);
- init_rwsem(&adapter->maps_lock);
- atomic_set(&adapter->nr_maps, 0);
adapter->id = adapter_info.id;
adapter->isc = adapter_info.isc;
adapter->maskable = adapter_info.maskable;
@@ -2355,87 +2497,12 @@
return ret;
}
-static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map;
- int ret;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- map = kzalloc(sizeof(*map), GFP_KERNEL);
- if (!map) {
- ret = -ENOMEM;
- goto out;
- }
- INIT_LIST_HEAD(&map->list);
- map->guest_addr = addr;
- map->addr = gmap_translate(kvm->arch.gmap, addr);
- if (map->addr == -EFAULT) {
- ret = -EFAULT;
- goto out;
- }
- ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
- if (ret < 0)
- goto out;
- BUG_ON(ret != 1);
- down_write(&adapter->maps_lock);
- if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
- list_add_tail(&map->list, &adapter->maps);
- ret = 0;
- } else {
- put_page(map->page);
- ret = -EINVAL;
- }
- up_write(&adapter->maps_lock);
-out:
- if (ret)
- kfree(map);
- return ret;
-}
-
-static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
-{
- struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
- struct s390_map_info *map, *tmp;
- int found = 0;
-
- if (!adapter || !addr)
- return -EINVAL;
-
- down_write(&adapter->maps_lock);
- list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
- if (map->guest_addr == addr) {
- found = 1;
- atomic_dec(&adapter->nr_maps);
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- break;
- }
- }
- up_write(&adapter->maps_lock);
-
- return found ? 0 : -EINVAL;
-}
-
void kvm_s390_destroy_adapters(struct kvm *kvm)
{
int i;
- struct s390_map_info *map, *tmp;
- for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
- if (!kvm->arch.adapters[i])
- continue;
- list_for_each_entry_safe(map, tmp,
- &kvm->arch.adapters[i]->maps, list) {
- list_del(&map->list);
- put_page(map->page);
- kfree(map);
- }
+ for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
kfree(kvm->arch.adapters[i]);
- }
}
static int modify_io_adapter(struct kvm_device *dev,
@@ -2457,11 +2524,14 @@
if (ret > 0)
ret = 0;
break;
+ /*
+ * The following operations are no longer needed and therefore no-ops.
+ * The gpa to hva translation is done when an IRQ route is set up. The
+ * set_irq code uses get_user_pages_remote() to do the actual write.
+ */
case KVM_S390_IO_ADAPTER_MAP:
- ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
- break;
case KVM_S390_IO_ADAPTER_UNMAP:
- ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+ ret = 0;
break;
default:
ret = -EINVAL;
@@ -2700,19 +2770,15 @@
return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
}
-static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
- u64 addr)
+static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
{
- struct s390_map_info *map;
+ struct page *page = NULL;
- if (!adapter)
- return NULL;
-
- list_for_each_entry(map, &adapter->maps, list) {
- if (map->guest_addr == addr)
- return map;
- }
- return NULL;
+ mmap_read_lock(kvm->mm);
+ get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
+ &page, NULL, NULL);
+ mmap_read_unlock(kvm->mm);
+ return page;
}
static int adapter_indicators_set(struct kvm *kvm,
@@ -2721,30 +2787,35 @@
{
unsigned long bit;
int summary_set, idx;
- struct s390_map_info *info;
+ struct page *ind_page, *summary_page;
void *map;
- info = get_map_info(adapter, adapter_int->ind_addr);
- if (!info)
+ ind_page = get_map_page(kvm, adapter_int->ind_addr);
+ if (!ind_page)
return -1;
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
- set_bit(bit, map);
- idx = srcu_read_lock(&kvm->srcu);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
- info = get_map_info(adapter, adapter_int->summary_addr);
- if (!info) {
- srcu_read_unlock(&kvm->srcu, idx);
+ summary_page = get_map_page(kvm, adapter_int->summary_addr);
+ if (!summary_page) {
+ put_page(ind_page);
return -1;
}
- map = page_address(info->page);
- bit = get_ind_bit(info->addr, adapter_int->summary_offset,
- adapter->swap);
+
+ idx = srcu_read_lock(&kvm->srcu);
+ map = page_address(ind_page);
+ bit = get_ind_bit(adapter_int->ind_addr,
+ adapter_int->ind_offset, adapter->swap);
+ set_bit(bit, map);
+ mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(ind_page);
+ map = page_address(summary_page);
+ bit = get_ind_bit(adapter_int->summary_addr,
+ adapter_int->summary_offset, adapter->swap);
summary_set = test_and_set_bit(bit, map);
- mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
- set_page_dirty_lock(info->page);
+ mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
+ set_page_dirty_lock(summary_page);
srcu_read_unlock(&kvm->srcu, idx);
+
+ put_page(ind_page);
+ put_page(summary_page);
return summary_set ? 0 : 1;
}
@@ -2766,9 +2837,7 @@
adapter = get_io_adapter(kvm, e->adapter.adapter_id);
if (!adapter)
return -1;
- down_read(&adapter->maps_lock);
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
- up_read(&adapter->maps_lock);
if ((ret > 0) && !adapter->masked) {
ret = kvm_s390_inject_airq(kvm, adapter);
if (ret == 0)
@@ -2819,23 +2888,27 @@
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int ret;
+ u64 uaddr;
switch (ue->type) {
+ /* we store the userspace addresses instead of the guest addresses */
case KVM_IRQ_ROUTING_S390_ADAPTER:
e->set = set_adapter_int;
- e->adapter.summary_addr = ue->u.adapter.summary_addr;
- e->adapter.ind_addr = ue->u.adapter.ind_addr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.summary_addr = uaddr;
+ uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
+ if (uaddr == -EFAULT)
+ return -EFAULT;
+ e->adapter.ind_addr = uaddr;
e->adapter.summary_offset = ue->u.adapter.summary_offset;
e->adapter.ind_offset = ue->u.adapter.ind_offset;
e->adapter.adapter_id = ue->u.adapter.adapter_id;
- ret = 0;
- break;
+ return 0;
default:
- ret = -EINVAL;
+ return -EINVAL;
}
-
- return ret;
}
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
@@ -2987,13 +3060,14 @@
int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
struct kvm_vcpu *vcpu;
+ u8 vcpu_isc_mask;
for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
vcpu = kvm_get_vcpu(kvm, vcpu_idx);
if (psw_ioint_disabled(vcpu))
continue;
- deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
- if (deliverable_mask) {
+ vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+ if (deliverable_mask & vcpu_isc_mask) {
/* lately kicked but not yet running */
if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
return;
@@ -3016,7 +3090,7 @@
__airqs_kick_single_vcpu(kvm, pending_mask);
hrtimer_forward_now(timer, ns_to_ktime(gi->expires));
return HRTIMER_RESTART;
- };
+ }
return HRTIMER_NORESTART;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9ed2fee..d8e9239 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
/*
* hosting IBM Z kernel virtual machines (s390x)
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -31,11 +31,11 @@
#include <linux/bitmap.h>
#include <linux/sched/signal.h>
#include <linux/string.h>
+#include <linux/pgtable.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/stp.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
@@ -44,6 +44,7 @@
#include <asm/cpacf.h>
#include <asm/timex.h>
#include <asm/ap.h>
+#include <asm/uv.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -56,109 +57,109 @@
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
(KVM_MAX_VCPUS + LOCAL_IRQS))
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "userspace_handled", VCPU_STAT(exit_userspace) },
- { "exit_null", VCPU_STAT(exit_null) },
- { "exit_validity", VCPU_STAT(exit_validity) },
- { "exit_stop_request", VCPU_STAT(exit_stop_request) },
- { "exit_external_request", VCPU_STAT(exit_external_request) },
- { "exit_io_request", VCPU_STAT(exit_io_request) },
- { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
- { "exit_instruction", VCPU_STAT(exit_instruction) },
- { "exit_pei", VCPU_STAT(exit_pei) },
- { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
- { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
- { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
- { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
- { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
- { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
- { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
- { "instruction_lctl", VCPU_STAT(instruction_lctl) },
- { "instruction_stctl", VCPU_STAT(instruction_stctl) },
- { "instruction_stctg", VCPU_STAT(instruction_stctg) },
- { "deliver_ckc", VCPU_STAT(deliver_ckc) },
- { "deliver_cputm", VCPU_STAT(deliver_cputm) },
- { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
- { "deliver_external_call", VCPU_STAT(deliver_external_call) },
- { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
- { "deliver_virtio", VCPU_STAT(deliver_virtio) },
- { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
- { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
- { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
- { "deliver_program", VCPU_STAT(deliver_program) },
- { "deliver_io", VCPU_STAT(deliver_io) },
- { "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
- { "exit_wait_state", VCPU_STAT(exit_wait_state) },
- { "inject_ckc", VCPU_STAT(inject_ckc) },
- { "inject_cputm", VCPU_STAT(inject_cputm) },
- { "inject_external_call", VCPU_STAT(inject_external_call) },
- { "inject_float_mchk", VM_STAT(inject_float_mchk) },
- { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
- { "inject_io", VM_STAT(inject_io) },
- { "inject_mchk", VCPU_STAT(inject_mchk) },
- { "inject_pfault_done", VM_STAT(inject_pfault_done) },
- { "inject_program", VCPU_STAT(inject_program) },
- { "inject_restart", VCPU_STAT(inject_restart) },
- { "inject_service_signal", VM_STAT(inject_service_signal) },
- { "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
- { "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
- { "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
- { "inject_virtio", VM_STAT(inject_virtio) },
- { "instruction_epsw", VCPU_STAT(instruction_epsw) },
- { "instruction_gs", VCPU_STAT(instruction_gs) },
- { "instruction_io_other", VCPU_STAT(instruction_io_other) },
- { "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
- { "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
- { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
- { "instruction_ptff", VCPU_STAT(instruction_ptff) },
- { "instruction_stidp", VCPU_STAT(instruction_stidp) },
- { "instruction_sck", VCPU_STAT(instruction_sck) },
- { "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
- { "instruction_spx", VCPU_STAT(instruction_spx) },
- { "instruction_stpx", VCPU_STAT(instruction_stpx) },
- { "instruction_stap", VCPU_STAT(instruction_stap) },
- { "instruction_iske", VCPU_STAT(instruction_iske) },
- { "instruction_ri", VCPU_STAT(instruction_ri) },
- { "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
- { "instruction_sske", VCPU_STAT(instruction_sske) },
- { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
- { "instruction_essa", VCPU_STAT(instruction_essa) },
- { "instruction_stsi", VCPU_STAT(instruction_stsi) },
- { "instruction_stfl", VCPU_STAT(instruction_stfl) },
- { "instruction_tb", VCPU_STAT(instruction_tb) },
- { "instruction_tpi", VCPU_STAT(instruction_tpi) },
- { "instruction_tprot", VCPU_STAT(instruction_tprot) },
- { "instruction_tsch", VCPU_STAT(instruction_tsch) },
- { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
- { "instruction_sie", VCPU_STAT(instruction_sie) },
- { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
- { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
- { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
- { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
- { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
- { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
- { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
- { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
- { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
- { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
- { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
- { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
- { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
- { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
- { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
- { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
- { "instruction_diag_10", VCPU_STAT(diagnose_10) },
- { "instruction_diag_44", VCPU_STAT(diagnose_44) },
- { "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
- { "instruction_diag_258", VCPU_STAT(diagnose_258) },
- { "instruction_diag_308", VCPU_STAT(diagnose_308) },
- { "instruction_diag_500", VCPU_STAT(diagnose_500) },
- { "instruction_diag_other", VCPU_STAT(diagnose_other) },
+ VCPU_STAT("userspace_handled", exit_userspace),
+ VCPU_STAT("exit_null", exit_null),
+ VCPU_STAT("exit_validity", exit_validity),
+ VCPU_STAT("exit_stop_request", exit_stop_request),
+ VCPU_STAT("exit_external_request", exit_external_request),
+ VCPU_STAT("exit_io_request", exit_io_request),
+ VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
+ VCPU_STAT("exit_instruction", exit_instruction),
+ VCPU_STAT("exit_pei", exit_pei),
+ VCPU_STAT("exit_program_interruption", exit_program_interruption),
+ VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
+ VCPU_STAT("exit_operation_exception", exit_operation_exception),
+ VCPU_STAT("halt_successful_poll", halt_successful_poll),
+ VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+ VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+ VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
+ VCPU_STAT("halt_wakeup", halt_wakeup),
+ VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+ VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+ VCPU_STAT("instruction_lctlg", instruction_lctlg),
+ VCPU_STAT("instruction_lctl", instruction_lctl),
+ VCPU_STAT("instruction_stctl", instruction_stctl),
+ VCPU_STAT("instruction_stctg", instruction_stctg),
+ VCPU_STAT("deliver_ckc", deliver_ckc),
+ VCPU_STAT("deliver_cputm", deliver_cputm),
+ VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
+ VCPU_STAT("deliver_external_call", deliver_external_call),
+ VCPU_STAT("deliver_service_signal", deliver_service_signal),
+ VCPU_STAT("deliver_virtio", deliver_virtio),
+ VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
+ VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
+ VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
+ VCPU_STAT("deliver_program", deliver_program),
+ VCPU_STAT("deliver_io", deliver_io),
+ VCPU_STAT("deliver_machine_check", deliver_machine_check),
+ VCPU_STAT("exit_wait_state", exit_wait_state),
+ VCPU_STAT("inject_ckc", inject_ckc),
+ VCPU_STAT("inject_cputm", inject_cputm),
+ VCPU_STAT("inject_external_call", inject_external_call),
+ VM_STAT("inject_float_mchk", inject_float_mchk),
+ VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
+ VM_STAT("inject_io", inject_io),
+ VCPU_STAT("inject_mchk", inject_mchk),
+ VM_STAT("inject_pfault_done", inject_pfault_done),
+ VCPU_STAT("inject_program", inject_program),
+ VCPU_STAT("inject_restart", inject_restart),
+ VM_STAT("inject_service_signal", inject_service_signal),
+ VCPU_STAT("inject_set_prefix", inject_set_prefix),
+ VCPU_STAT("inject_stop_signal", inject_stop_signal),
+ VCPU_STAT("inject_pfault_init", inject_pfault_init),
+ VM_STAT("inject_virtio", inject_virtio),
+ VCPU_STAT("instruction_epsw", instruction_epsw),
+ VCPU_STAT("instruction_gs", instruction_gs),
+ VCPU_STAT("instruction_io_other", instruction_io_other),
+ VCPU_STAT("instruction_lpsw", instruction_lpsw),
+ VCPU_STAT("instruction_lpswe", instruction_lpswe),
+ VCPU_STAT("instruction_pfmf", instruction_pfmf),
+ VCPU_STAT("instruction_ptff", instruction_ptff),
+ VCPU_STAT("instruction_stidp", instruction_stidp),
+ VCPU_STAT("instruction_sck", instruction_sck),
+ VCPU_STAT("instruction_sckpf", instruction_sckpf),
+ VCPU_STAT("instruction_spx", instruction_spx),
+ VCPU_STAT("instruction_stpx", instruction_stpx),
+ VCPU_STAT("instruction_stap", instruction_stap),
+ VCPU_STAT("instruction_iske", instruction_iske),
+ VCPU_STAT("instruction_ri", instruction_ri),
+ VCPU_STAT("instruction_rrbe", instruction_rrbe),
+ VCPU_STAT("instruction_sske", instruction_sske),
+ VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
+ VCPU_STAT("instruction_essa", instruction_essa),
+ VCPU_STAT("instruction_stsi", instruction_stsi),
+ VCPU_STAT("instruction_stfl", instruction_stfl),
+ VCPU_STAT("instruction_tb", instruction_tb),
+ VCPU_STAT("instruction_tpi", instruction_tpi),
+ VCPU_STAT("instruction_tprot", instruction_tprot),
+ VCPU_STAT("instruction_tsch", instruction_tsch),
+ VCPU_STAT("instruction_sthyi", instruction_sthyi),
+ VCPU_STAT("instruction_sie", instruction_sie),
+ VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
+ VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
+ VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
+ VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
+ VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
+ VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
+ VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
+ VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
+ VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
+ VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
+ VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
+ VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
+ VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
+ VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
+ VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
+ VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
+ VCPU_STAT("instruction_diag_10", diagnose_10),
+ VCPU_STAT("instruction_diag_44", diagnose_44),
+ VCPU_STAT("instruction_diag_9c", diagnose_9c),
+ VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
+ VCPU_STAT("instruction_diag_258", diagnose_258),
+ VCPU_STAT("instruction_diag_308", diagnose_308),
+ VCPU_STAT("instruction_diag_500", diagnose_500),
+ VCPU_STAT("instruction_diag_other", diagnose_other),
{ NULL }
};
@@ -183,6 +184,11 @@
module_param(halt_poll_max_steal, byte, 0644);
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
/*
* For now we handle at most 16 double words as this is what the s390 base
* kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -219,6 +225,7 @@
static struct gmap_notifier gmap_notifier;
static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
/* Section: not file related */
int kvm_arch_hardware_enable(void)
@@ -227,13 +234,15 @@
return 0;
}
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
{
return 0;
}
+/* forward declarations */
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
{
@@ -292,7 +301,7 @@
.notifier_call = kvm_clock_sync,
};
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_pte_notifier(&gmap_notifier);
@@ -453,16 +462,19 @@
int kvm_arch_init(void *opaque)
{
- int rc;
+ int rc = -ENOMEM;
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
if (!kvm_s390_dbf)
return -ENOMEM;
- if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
- rc = -ENOMEM;
- goto out_debug_unreg;
- }
+ kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+ if (!kvm_s390_dbf_uv)
+ goto out;
+
+ if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+ debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
+ goto out;
kvm_s390_cpu_feat_init();
@@ -470,19 +482,17 @@
rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
if (rc) {
pr_err("A FLIC registration call failed with rc=%d\n", rc);
- goto out_debug_unreg;
+ goto out;
}
rc = kvm_s390_gib_init(GAL_ISC);
if (rc)
- goto out_gib_destroy;
+ goto out;
return 0;
-out_gib_destroy:
- kvm_s390_gib_destroy();
-out_debug_unreg:
- debug_unregister(kvm_s390_dbf);
+out:
+ kvm_arch_exit();
return rc;
}
@@ -490,6 +500,7 @@
{
kvm_s390_gib_destroy();
debug_unregister(kvm_s390_dbf);
+ debug_unregister(kvm_s390_dbf_uv);
}
/* Section: device related */
@@ -532,6 +543,9 @@
case KVM_CAP_S390_CMMA_MIGRATION:
case KVM_CAP_S390_AIS:
case KVM_CAP_S390_AIS_MIGRATION:
+ case KVM_CAP_S390_VCPU_RESETS:
+ case KVM_CAP_SET_GUEST_DEBUG:
+ case KVM_CAP_S390_DIAG318:
r = 1;
break;
case KVM_CAP_S390_HPAGE_1M:
@@ -566,14 +580,16 @@
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
+ case KVM_CAP_S390_PROTECTED:
+ r = is_prot_virt_host();
+ break;
default:
r = 0;
}
return r;
}
-static void kvm_s390_sync_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
int i;
gfn_t cur_gfn, last_gfn;
@@ -614,9 +630,8 @@
{
int r;
unsigned long n;
- struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int is_dirty = 0;
+ int is_dirty;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
@@ -627,14 +642,7 @@
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- slots = kvm_memslots(kvm);
- memslot = id_to_memslot(slots, log->slot);
- r = -ENOENT;
- if (!memslot->dirty_bitmap)
- goto out;
-
- kvm_s390_sync_dirty_log(kvm, memslot);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
@@ -756,9 +764,9 @@
r = -EINVAL;
else {
r = 0;
- down_write(&kvm->mm->mmap_sem);
+ mmap_write_lock(kvm->mm);
kvm->mm->context.allow_gmap_hpage_1m = 1;
- up_write(&kvm->mm->mmap_sem);
+ mmap_write_unlock(kvm->mm);
/*
* We might have to create fake 4k page
* tables. To avoid that the hardware works on
@@ -1808,7 +1816,7 @@
if (!keys)
return -ENOMEM;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -1822,7 +1830,7 @@
break;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (!r) {
r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
@@ -1866,7 +1874,7 @@
goto out;
i = 0;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
while (i < args->count) {
unlocked = false;
@@ -1884,7 +1892,7 @@
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r) {
- r = fixup_user_fault(current, current->mm, hva,
+ r = fixup_user_fault(current->mm, hva,
FAULT_FLAG_WRITE, &unlocked);
if (r)
break;
@@ -1893,7 +1901,7 @@
i++;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
out:
kvfree(keys);
return r;
@@ -1998,6 +2006,9 @@
struct kvm_memslots *slots = kvm_memslots(kvm);
struct kvm_memory_slot *ms;
+ if (unlikely(!slots->used_slots))
+ return 0;
+
cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
ms = gfn_to_memslot(kvm, cur_gfn);
args->count = 0;
@@ -2079,14 +2090,14 @@
if (!values)
return -ENOMEM;
- down_read(&kvm->mm->mmap_sem);
+ mmap_read_lock(kvm->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
if (peek)
ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
else
ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(&kvm->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
if (kvm->arch.migration_mode)
args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
@@ -2136,7 +2147,7 @@
goto out;
}
- down_read(&kvm->mm->mmap_sem);
+ mmap_read_lock(kvm->mm);
srcu_idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -2151,18 +2162,206 @@
set_pgste_bits(kvm->mm, hva, mask, pgstev);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
- up_read(&kvm->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
if (!kvm->mm->context.uses_cmm) {
- down_write(&kvm->mm->mmap_sem);
+ mmap_write_lock(kvm->mm);
kvm->mm->context.uses_cmm = 1;
- up_write(&kvm->mm->mmap_sem);
+ mmap_write_unlock(kvm->mm);
}
out:
vfree(bits);
return r;
}
+static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+{
+ struct kvm_vcpu *vcpu;
+ u16 rc, rrc;
+ int ret = 0;
+ int i;
+
+ /*
+ * We ignore failures and try to destroy as many CPUs as possible.
+ * At the same time we must not free the assigned resources when
+ * this fails, as the ultravisor has still access to that memory.
+ * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+ * behind.
+ * We want to return the first failure rc and rrc, though.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
+ *rcp = rc;
+ *rrcp = rrc;
+ ret = -EIO;
+ }
+ mutex_unlock(&vcpu->mutex);
+ }
+ return ret;
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int i, r = 0;
+ u16 dummy;
+
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_lock(&vcpu->mutex);
+ r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+ mutex_unlock(&vcpu->mutex);
+ if (r)
+ break;
+ }
+ if (r)
+ kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+ return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+ int r = 0;
+ u16 dummy;
+ void __user *argp = (void __user *)cmd->data;
+
+ switch (cmd->cmd) {
+ case KVM_PV_ENABLE: {
+ r = -EINVAL;
+ if (kvm_s390_pv_is_protected(kvm))
+ break;
+
+ /*
+ * FMT 4 SIE needs esca. As we never switch back to bsca from
+ * esca, we need no cleanup in the error cases below
+ */
+ r = sca_switch_to_extended(kvm);
+ if (r)
+ break;
+
+ mmap_write_lock(current->mm);
+ r = gmap_mark_unmergeable();
+ mmap_write_unlock(current->mm);
+ if (r)
+ break;
+
+ r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ break;
+
+ r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+ if (r)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+ /* we need to block service interrupts from now on */
+ set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_DISABLE: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+ /*
+ * If a CPU could not be destroyed, destroy VM will also fail.
+ * There is no point in trying to destroy it. Instead return
+ * the rc and rrc from the first CPU that failed destroying.
+ */
+ if (r)
+ break;
+ r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+
+ /* no need to block service interrupts any more */
+ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+ break;
+ }
+ case KVM_PV_SET_SEC_PARMS: {
+ struct kvm_s390_pv_sec_parm parms = {};
+ void *hdr;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&parms, argp, sizeof(parms)))
+ break;
+
+ /* Currently restricted to 8KB */
+ r = -EINVAL;
+ if (parms.length > PAGE_SIZE * 2)
+ break;
+
+ r = -ENOMEM;
+ hdr = vmalloc(parms.length);
+ if (!hdr)
+ break;
+
+ r = -EFAULT;
+ if (!copy_from_user(hdr, (void __user *)parms.origin,
+ parms.length))
+ r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+ &cmd->rc, &cmd->rrc);
+
+ vfree(hdr);
+ break;
+ }
+ case KVM_PV_UNPACK: {
+ struct kvm_s390_pv_unp unp = {};
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&unp, argp, sizeof(unp)))
+ break;
+
+ r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_VERIFY: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+ cmd->rrc);
+ break;
+ }
+ case KVM_PV_PREP_RESET: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ case KVM_PV_UNSHARE_ALL: {
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ }
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2260,6 +2459,33 @@
mutex_unlock(&kvm->slots_lock);
break;
}
+ case KVM_S390_PV_COMMAND: {
+ struct kvm_pv_cmd args;
+
+ /* protvirt means user sigp */
+ kvm->arch.user_cpu_state_ctrl = 1;
+ r = 0;
+ if (!is_prot_virt_host()) {
+ r = -EINVAL;
+ break;
+ }
+ if (copy_from_user(&args, argp, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ if (args.flags) {
+ r = -EINVAL;
+ break;
+ }
+ mutex_lock(&kvm->lock);
+ r = kvm_s390_handle_pv(kvm, &args);
+ mutex_unlock(&kvm->lock);
+ if (copy_to_user(argp, &args, sizeof(args))) {
+ r = -EFAULT;
+ break;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -2509,7 +2735,8 @@
kvm->arch.use_skf = sclp.has_skey;
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
- kvm_s390_gisa_init(kvm);
+ if (use_gisa)
+ kvm_s390_gisa_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
@@ -2523,6 +2750,8 @@
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
+ u16 rc, rrc;
+
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
@@ -2535,10 +2764,10 @@
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
+ /* We can not hold the vcpu mutex here, we are already dying */
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
free_page((unsigned long)(vcpu->arch.sie_block));
-
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
@@ -2547,7 +2776,7 @@
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_destroy(vcpu);
+ kvm_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
@@ -2559,10 +2788,20 @@
void kvm_arch_destroy_vm(struct kvm *kvm)
{
+ u16 rc, rrc;
+
kvm_free_vcpus(kvm);
sca_dispose(kvm);
- debug_unregister(kvm->arch.dbf);
kvm_s390_gisa_destroy(kvm);
+ /*
+ * We are already at the end of life and kvm->lock is not taken.
+ * This is ok as the file descriptor is closed by now and nobody
+ * can mess with the pv state. To avoid lockdep_assert_held from
+ * complaining we do not use kvm_s390_pv_is_protected.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+ debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
@@ -2658,6 +2897,9 @@
unsigned int vcpu_idx;
u32 scaol, scaoh;
+ if (kvm->arch.use_esca)
+ return 0;
+
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
@@ -2709,39 +2951,6 @@
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
}
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
- kvm_clear_async_pf_completion_queue(vcpu);
- vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
- KVM_SYNC_GPRS |
- KVM_SYNC_ACRS |
- KVM_SYNC_CRS |
- KVM_SYNC_ARCH0 |
- KVM_SYNC_PFAULT;
- kvm_s390_set_prefix(vcpu, 0);
- if (test_kvm_facility(vcpu->kvm, 64))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
- if (test_kvm_facility(vcpu->kvm, 82))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
- if (test_kvm_facility(vcpu->kvm, 133))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
- if (test_kvm_facility(vcpu->kvm, 156))
- vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
- /* fprs can be synchronized via vrs, even if the guest has no vx. With
- * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
- */
- if (MACHINE_HAS_VX)
- vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
- else
- vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
-
- if (kvm_is_ucontrol(vcpu->kvm))
- return __kvm_ucontrol_vcpu_init(vcpu);
-
- return 0;
-}
-
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
@@ -2850,33 +3059,6 @@
}
-static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
-{
- /* this equals initial cpu reset in pop, but we don't switch to ESA */
- vcpu->arch.sie_block->gpsw.mask = 0UL;
- vcpu->arch.sie_block->gpsw.addr = 0UL;
- kvm_s390_set_prefix(vcpu, 0);
- kvm_s390_set_cpu_timer(vcpu, 0);
- vcpu->arch.sie_block->ckc = 0UL;
- vcpu->arch.sie_block->todpr = 0;
- memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
- vcpu->arch.sie_block->gcr[0] = CR0_UNUSED_56 |
- CR0_INTERRUPT_KEY_SUBMASK |
- CR0_MEASUREMENT_ALERT_SUBMASK;
- vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
- CR14_UNUSED_33 |
- CR14_EXTERNAL_DAMAGE_SUBMASK;
- vcpu->run->s.regs.fpc = 0;
- vcpu->arch.sie_block->gbea = 1;
- vcpu->arch.sie_block->pp = 0;
- vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
- vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
- kvm_clear_async_pf_completion_queue(vcpu);
- if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
- kvm_s390_vcpu_stop(vcpu);
- kvm_s390_clear_local_irqs(vcpu);
-}
-
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
mutex_lock(&vcpu->kvm->lock);
@@ -2966,9 +3148,10 @@
vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
}
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ u16 uvrc, uvrrc;
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
@@ -3036,29 +3219,33 @@
kvm_s390_vcpu_crypto_setup(vcpu);
+ mutex_lock(&vcpu->kvm->lock);
+ if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+ rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+ if (rc)
+ kvm_s390_vcpu_unsetup_cmma(vcpu);
+ }
+ mutex_unlock(&vcpu->kvm->lock);
+
return rc;
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
- unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
{
- struct kvm_vcpu *vcpu;
- struct sie_page *sie_page;
- int rc = -EINVAL;
-
if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
- goto out;
+ return -EINVAL;
+ return 0;
+}
- rc = -ENOMEM;
-
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+ struct sie_page *sie_page;
+ int rc;
BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
if (!sie_page)
- goto out_free_cpu;
+ return -ENOMEM;
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
@@ -3067,31 +3254,65 @@
vcpu->arch.sie_block->mso = 0;
vcpu->arch.sie_block->msl = sclp.hamax;
- vcpu->arch.sie_block->icpua = id;
+ vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
spin_lock_init(&vcpu->arch.local_int.lock);
- vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
+ vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
vcpu->arch.sie_block->gd |= GISA_FORMAT1;
seqcount_init(&vcpu->arch.cputm_seqcount);
- rc = kvm_vcpu_init(vcpu, kvm, id);
- if (rc)
- goto out_free_sie_block;
- VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
- vcpu->arch.sie_block);
- trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
+ vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
+ KVM_SYNC_GPRS |
+ KVM_SYNC_ACRS |
+ KVM_SYNC_CRS |
+ KVM_SYNC_ARCH0 |
+ KVM_SYNC_PFAULT |
+ KVM_SYNC_DIAG318;
+ kvm_s390_set_prefix(vcpu, 0);
+ if (test_kvm_facility(vcpu->kvm, 64))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ if (test_kvm_facility(vcpu->kvm, 82))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
+ if (test_kvm_facility(vcpu->kvm, 133))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+ if (test_kvm_facility(vcpu->kvm, 156))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
+ /* fprs can be synchronized via vrs, even if the guest has no vx. With
+ * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+ */
+ if (MACHINE_HAS_VX)
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+ else
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
- return vcpu;
+ if (kvm_is_ucontrol(vcpu->kvm)) {
+ rc = __kvm_ucontrol_vcpu_init(vcpu);
+ if (rc)
+ goto out_free_sie_block;
+ }
+
+ VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+ vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+ trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+
+ rc = kvm_s390_vcpu_setup(vcpu);
+ if (rc)
+ goto out_ucontrol_uninit;
+ return 0;
+
+out_ucontrol_uninit:
+ if (kvm_is_ucontrol(vcpu->kvm))
+ gmap_remove(vcpu->arch.gmap);
out_free_sie_block:
free_page((unsigned long)(vcpu->arch.sie_block));
-out_free_cpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(rc);
+ return rc;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
+ clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
@@ -3291,10 +3512,76 @@
return r;
}
-static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
{
- kvm_s390_vcpu_initial_reset(vcpu);
- return 0;
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
+
+ kvm_clear_async_pf_completion_queue(vcpu);
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
+ kvm_s390_clear_local_irqs(vcpu);
+}
+
+static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+ /* Initial reset is a superset of the normal reset */
+ kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+
+ /*
+ * This equals initial cpu reset in pop, but we don't switch to ESA.
+ * We do not only reset the internal data, but also ...
+ */
+ vcpu->arch.sie_block->gpsw.mask = 0;
+ vcpu->arch.sie_block->gpsw.addr = 0;
+ kvm_s390_set_prefix(vcpu, 0);
+ kvm_s390_set_cpu_timer(vcpu, 0);
+ vcpu->arch.sie_block->ckc = 0;
+ memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
+ vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
+ vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
+
+ /* ... the data in sync regs */
+ memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
+ vcpu->run->s.regs.ckc = 0;
+ vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
+ vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
+ vcpu->run->psw_addr = 0;
+ vcpu->run->psw_mask = 0;
+ vcpu->run->s.regs.todpr = 0;
+ vcpu->run->s.regs.cputm = 0;
+ vcpu->run->s.regs.ckc = 0;
+ vcpu->run->s.regs.pp = 0;
+ vcpu->run->s.regs.gbea = 1;
+ vcpu->run->s.regs.fpc = 0;
+ /*
+ * Do not reset these registers in the protected case, as some of
+ * them are overlayed and they are not accessible in this case
+ * anyway.
+ */
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->todpr = 0;
+ }
+}
+
+static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
+{
+ struct kvm_sync_regs *regs = &vcpu->run->s.regs;
+
+ /* Clear reset is a superset of the initial reset */
+ kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+
+ memset(®s->gprs, 0, sizeof(regs->gprs));
+ memset(®s->vrs, 0, sizeof(regs->vrs));
+ memset(®s->acrs, 0, sizeof(regs->acrs));
+ memset(®s->gscb, 0, sizeof(regs->gscb));
+
+ regs->etoken = 0;
+ regs->etoken_extension = 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -3468,14 +3755,20 @@
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
- kvm_s390_vcpu_stop(vcpu);
+ rc = kvm_s390_vcpu_stop(vcpu);
break;
case KVM_MP_STATE_OPERATING:
- kvm_s390_vcpu_start(vcpu);
+ rc = kvm_s390_vcpu_start(vcpu);
break;
case KVM_MP_STATE_LOAD:
+ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+ rc = -ENXIO;
+ break;
+ }
+ rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+ break;
case KVM_MP_STATE_CHECK_STOP:
- /* fall through - CHECK_STOP and LOAD are not supported yet */
+ fallthrough; /* CHECK_STOP and LOAD are not supported yet */
default:
rc = -ENXIO;
}
@@ -3633,11 +3926,13 @@
}
}
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+
+ return true;
}
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
@@ -3653,7 +3948,7 @@
/* s390 will always inject the page directly */
}
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
{
/*
* s390 will always inject the page directly,
@@ -3662,33 +3957,31 @@
return true;
}
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
{
hva_t hva;
struct kvm_arch_async_pf arch;
- int rc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
- return 0;
+ return false;
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare)
- return 0;
+ return false;
if (psw_extint_disabled(vcpu))
- return 0;
+ return false;
if (kvm_s390_vcpu_has_irq(vcpu, 0))
- return 0;
+ return false;
if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
- return 0;
+ return false;
if (!vcpu->arch.gmap->pfault_enabled)
- return 0;
+ return false;
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
hva += current->thread.gmap_addr & ~PAGE_MASK;
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
- return 0;
+ return false;
- rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
- return rc;
+ return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@@ -3708,9 +4001,6 @@
if (need_resched())
schedule();
- if (test_cpu_flag(CIF_MCCK_PENDING))
- s390_handle_mcck();
-
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc)
@@ -3825,9 +4115,11 @@
return vcpu_post_run_fault_in_sie(vcpu);
}
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
+ struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
/*
* We try to hold kvm->srcu during most of vcpu_run (except when run-
@@ -3849,8 +4141,28 @@
guest_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(sie_page->pv_grregs,
+ vcpu->run->s.regs.gprs,
+ sizeof(sie_page->pv_grregs));
+ }
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy(vcpu->run->s.regs.gprs,
+ sie_page->pv_grregs,
+ sizeof(sie_page->pv_grregs));
+ /*
+ * We're not allowed to inject interrupts on intercepts
+ * that leave the guest state in an "in-between" state
+ * where the next SIE entry will do a continuation.
+ * Fence interrupts in our "internal" PSW.
+ */
+ if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+ vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ }
+ }
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
@@ -3864,8 +4176,9 @@
return rc;
}
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
struct runtime_instr_cb *riccb;
struct gs_cb *gscb;
@@ -3873,16 +4186,7 @@
gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
- memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
- /* some control register changes require a tlb flush */
- kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
- }
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
- kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
- vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
@@ -3894,6 +4198,10 @@
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu);
}
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
+ vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
+ vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
+ }
/*
* If userspace sets the riccb (e.g. after migration) to a valid state,
* we should enable RI here instead of doing the lazy enablement.
@@ -3923,20 +4231,6 @@
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
- save_access_regs(vcpu->arch.host_acrs);
- restore_access_regs(vcpu->run->s.regs.acrs);
- /* save host (userspace) fprs/vrs */
- save_fpu_regs();
- vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
- vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
- if (MACHINE_HAS_VX)
- current->thread.fpu.regs = vcpu->run->s.regs.vrs;
- else
- current->thread.fpu.regs = vcpu->run->s.regs.fprs;
- current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
- if (test_fp_ctl(current->thread.fpu.fpc))
- /* User space provided an invalid FPC, let's clear it */
- current->thread.fpu.fpc = 0;
if (MACHINE_HAS_GS) {
preempt_disable();
__ctl_set_bit(2, 4);
@@ -3952,33 +4246,68 @@
preempt_enable();
}
/* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *kvm_run = vcpu->run;
+
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ }
+ save_access_regs(vcpu->arch.host_acrs);
+ restore_access_regs(vcpu->run->s.regs.acrs);
+ /* save host (userspace) fprs/vrs */
+ save_fpu_regs();
+ vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+ vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+ if (MACHINE_HAS_VX)
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ else
+ current->thread.fpu.regs = vcpu->run->s.regs.fprs;
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+ if (test_fp_ctl(current->thread.fpu.fpc))
+ /* User space provided an invalid FPC, let's clear it */
+ current->thread.fpu.fpc = 0;
+
+ /* Sync fmt2 only data */
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+ sync_regs_fmt2(vcpu);
+ } else {
+ /*
+ * In several places we have to modify our internal view to
+ * not do things that are disallowed by the ultravisor. For
+ * example we must not inject interrupts after specific exits
+ * (e.g. 112 prefix page not secure). We do this by turning
+ * off the machine check, external and I/O interrupt bits
+ * of our PSW copy. To avoid getting validity intercepts, we
+ * do only accept the condition code from userspace.
+ */
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+ vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+ PSW_MASK_CC;
+ }
kvm_run->kvm_dirty_regs = 0;
}
-static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs_fmt2(struct kvm_vcpu *vcpu)
{
- kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
- kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
- kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
- memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
- kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
- kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+ struct kvm_run *kvm_run = vcpu->run;
+
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
- kvm_run->s.regs.pft = vcpu->arch.pfault_token;
- kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
- kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
- save_access_regs(vcpu->run->s.regs.acrs);
- restore_access_regs(vcpu->arch.host_acrs);
- /* Save guest register state */
- save_fpu_regs();
- vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- /* Restore will be done lazily at return */
- current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
- current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+ kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
if (MACHINE_HAS_GS) {
preempt_disable();
__ctl_set_bit(2, 4);
@@ -3994,8 +4323,34 @@
/* SIE will save etoken directly into SDNX and therefore kvm_run */
}
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs(struct kvm_vcpu *vcpu)
{
+ struct kvm_run *kvm_run = vcpu->run;
+
+ kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+ kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+ kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+ memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+ kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
+ kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+ kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+ kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+ kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+ save_access_regs(vcpu->run->s.regs.acrs);
+ restore_access_regs(vcpu->arch.host_acrs);
+ /* Save guest register state */
+ save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+ /* Restore will be done lazily at return */
+ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+ current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+ store_regs_fmt2(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *kvm_run = vcpu->run;
int rc;
if (kvm_run->immediate_exit)
@@ -4015,6 +4370,10 @@
kvm_sigset_activate(vcpu);
+ /*
+ * no need to check the return value of vcpu_start as it can only have
+ * an error for protvirt, but protvirt means user cpu state
+ */
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
@@ -4024,7 +4383,7 @@
goto out;
}
- sync_regs(vcpu, kvm_run);
+ sync_regs(vcpu);
enable_cpu_timer_accounting(vcpu);
might_fault();
@@ -4046,7 +4405,7 @@
}
disable_cpu_timer_accounting(vcpu);
- store_regs(vcpu, kvm_run);
+ store_regs(vcpu);
kvm_sigset_deactivate(vcpu);
@@ -4152,18 +4511,27 @@
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
if (!is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Let's tell the UV that we want to change into the operating state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
for (i = 0; i < online_vcpus; i++) {
if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
started_vcpus++;
@@ -4183,31 +4551,52 @@
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
/*
+ * The real PSW might have changed due to a RESTART interpreted by the
+ * ultravisor. We block all interrupts and let the next sie exit
+ * refresh our view.
+ */
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+ /*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
{
- int i, online_vcpus, started_vcpus = 0;
+ int i, online_vcpus, r = 0, started_vcpus = 0;
struct kvm_vcpu *started_vcpu = NULL;
if (is_vcpu_stopped(vcpu))
- return;
+ return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
- /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
+ /* Let's tell the UV that we want to change into the stopped state */
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+ if (r) {
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+ return r;
+ }
+ }
+
+ /*
+ * Set the VCPU to STOPPED and THEN clear the interrupt flag,
+ * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
+ * have been fully processed. This will ensure that the VCPU
+ * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
+ */
+ kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
kvm_s390_clear_stop_irq(vcpu);
- kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
@@ -4226,7 +4615,7 @@
}
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
- return;
+ return 0;
}
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
@@ -4253,12 +4642,42 @@
return r;
}
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ int r = 0;
+
+ if (mop->flags || !mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset < mop->size)
+ return -EINVAL;
+ if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+ return -E2BIG;
+ if (!kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_SIDA_READ:
+ if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), mop->size))
+ r = -EFAULT;
+
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+ mop->sida_offset), uaddr, mop->size))
+ r = -EFAULT;
+ break;
+ }
+ return r;
+}
static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
- int r, srcu_idx;
+ int r = 0;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
| KVM_S390_MEMOP_F_CHECK_ONLY;
@@ -4268,14 +4687,15 @@
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ return -EINVAL;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
@@ -4301,12 +4721,8 @@
}
r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
break;
- default:
- r = -EINVAL;
}
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
@@ -4314,6 +4730,31 @@
return r;
}
+static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ int r, srcu_idx;
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ r = kvm_s390_guest_mem_op(vcpu, mop);
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ /* we are locked against sida going away by the vcpu->mutex */
+ r = kvm_s390_guest_sida_op(vcpu, mop);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ return r;
+}
+
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -4349,6 +4790,7 @@
void __user *argp = (void __user *)arg;
int idx;
long r;
+ u16 rc, rrc;
vcpu_load(vcpu);
@@ -4367,12 +4809,43 @@
r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
break;
}
+ case KVM_S390_CLEAR_RESET:
+ r = 0;
+ kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
+ break;
case KVM_S390_INITIAL_RESET:
- r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ r = 0;
+ kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET_INITIAL,
+ &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
+ break;
+ case KVM_S390_NORMAL_RESET:
+ r = 0;
+ kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+ UVC_CMD_CPU_RESET, &rc, &rrc);
+ VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+ rc, rrc);
+ }
break;
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+ r = -EINVAL;
+ if (kvm_s390_pv_cpu_is_protected(vcpu))
+ break;
r = -EFAULT;
if (copy_from_user(®, argp, sizeof(reg)))
break;
@@ -4435,7 +4908,7 @@
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
- r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+ r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
@@ -4495,12 +4968,6 @@
return VM_FAULT_SIGBUS;
}
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -4521,12 +4988,15 @@
if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
return -EINVAL;
+ /* When we are protected, we should not change the memory slots */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
@@ -4542,7 +5012,7 @@
old->npages * PAGE_SIZE);
if (rc)
break;
- /* FALLTHROUGH */
+ fallthrough;
case KVM_MR_CREATE:
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 63d94a5..a3e9b71 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -2,7 +2,7 @@
/*
* definition for kvm on s390
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -15,6 +15,7 @@
#include <linux/hrtimer.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/lockdep.h>
#include <asm/facility.h>
#include <asm/processor.h>
#include <asm/sclp.h>
@@ -25,6 +26,17 @@
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
extern debug_info_t *kvm_s390_dbf;
+extern debug_info_t *kvm_s390_dbf_uv;
+
+#define KVM_UV_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+ debug_sprintf_event((d_kvm)->arch.dbf, d_loglevel, d_string "\n", \
+ d_args); \
+ debug_sprintf_event(kvm_s390_dbf_uv, d_loglevel, \
+ "%d: " d_string "\n", (d_kvm)->userspace_pid, \
+ d_args); \
+} while (0)
+
#define KVM_EVENT(d_loglevel, d_string, d_args...)\
do { \
debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
@@ -196,6 +208,39 @@
return kvm->arch.user_cpu_state_ctrl != 0;
}
+/* implemented in pv.c */
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+
+static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
+{
+ return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.pv.handle;
+}
+
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+ lockdep_assert_held(&kvm->lock);
+ return !!kvm_s390_pv_get_handle(kvm);
+}
+
+static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+ lockdep_assert_held(&vcpu->mutex);
+ return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+
/* implemented in interrupt.c */
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -286,8 +331,8 @@
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
@@ -373,6 +418,7 @@
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
extern struct kvm_device_ops kvm_flic_ops;
int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu);
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
void __user *buf, int len);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 560310e..3b1a498 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
/*
* handling privileged instructions
*
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -13,6 +13,7 @@
#include <linux/errno.h>
#include <linux/compat.h>
#include <linux/mm_types.h>
+#include <linux/pgtable.h>
#include <asm/asm-offsets.h>
#include <asm/facility.h>
@@ -20,9 +21,7 @@
#include <asm/debug.h>
#include <asm/ebcdic.h>
#include <asm/sysinfo.h>
-#include <asm/pgtable.h>
#include <asm/page-states.h>
-#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/io.h>
#include <asm/ptrace.h>
@@ -270,18 +269,18 @@
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = get_guest_storage_key(current->mm, vmaddr, &key);
if (rc) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
goto retry;
}
}
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc < 0)
@@ -317,17 +316,17 @@
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
retry:
unlocked = false;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = reset_guest_reference_bit(current->mm, vmaddr);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
if (!rc) {
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
goto retry;
}
}
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc < 0)
@@ -385,19 +384,21 @@
if (kvm_is_error_hva(vmaddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
m3 & SSKE_NQ, m3 & SSKE_MR,
m3 & SSKE_MC);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (rc == -EAGAIN)
+ continue;
if (rc < 0)
return rc;
start += PAGE_SIZE;
@@ -874,7 +875,7 @@
operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
- if (operand2 & 0xfff)
+ if (!kvm_s390_pv_cpu_is_protected(vcpu) && (operand2 & 0xfff))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
switch (fc) {
@@ -895,8 +896,13 @@
handle_stsi_3_2_2(vcpu, (void *) mem);
break;
}
-
- rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+ memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
+ PAGE_SIZE);
+ rc = 0;
+ } else {
+ rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+ }
if (rc) {
rc = kvm_s390_inject_prog_cond(vcpu, rc);
goto out;
@@ -1086,15 +1092,15 @@
if (rc)
return rc;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
rc = cond_set_guest_storage_key(current->mm, vmaddr,
key, NULL, nq, mr, mc);
if (rc < 0) {
- rc = fixup_user_fault(current, current->mm, vmaddr,
+ rc = fixup_user_fault(current->mm, vmaddr,
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc == -EAGAIN)
@@ -1117,7 +1123,7 @@
}
/*
- * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
+ * Must be called with relevant read locks held (kvm->mm->mmap_lock, kvm->srcu)
*/
static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
{
@@ -1215,9 +1221,9 @@
* already correct, we do nothing and avoid the lock.
*/
if (vcpu->kvm->mm->context.uses_cmm == 0) {
- down_write(&vcpu->kvm->mm->mmap_sem);
+ mmap_write_lock(vcpu->kvm->mm);
vcpu->kvm->mm->context.uses_cmm = 1;
- up_write(&vcpu->kvm->mm->mmap_sem);
+ mmap_write_unlock(vcpu->kvm->mm);
}
/*
* If we are here, we are supposed to have CMMA enabled in
@@ -1234,11 +1240,11 @@
} else {
int srcu_idx;
- down_read(&vcpu->kvm->mm->mmap_sem);
+ mmap_read_lock(vcpu->kvm->mm);
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
i = __do_essa(vcpu, orc);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
- up_read(&vcpu->kvm->mm->mmap_sem);
+ mmap_read_unlock(vcpu->kvm->mm);
if (i < 0)
return i;
/* Account for the possible extra cbrl entry */
@@ -1246,10 +1252,10 @@
}
vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
for (i = 0; i < entries; ++i)
__gmap_zap(gmap, cbrlo[i]);
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return 0;
}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 0000000..8228878
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Protected Virtual Machines
+ *
+ * Copyright IBM Corp. 2019, 2020
+ * Author(s): Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/mman.h>
+#include "kvm-s390.h"
+
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ int cc;
+
+ if (!kvm_s390_pv_cpu_get_handle(vcpu))
+ return 0;
+
+ cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
+
+ KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+ vcpu->vcpu_id, *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
+
+ /* Intended memory leak for something that should never happen. */
+ if (!cc)
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+
+ free_page(sida_origin(vcpu->arch.sie_block));
+ vcpu->arch.sie_block->pv_handle_cpu = 0;
+ vcpu->arch.sie_block->pv_handle_config = 0;
+ memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+ vcpu->arch.sie_block->sdf = 0;
+ /*
+ * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
+ * Use the reset value of gbea to avoid leaking the kernel pointer of
+ * the just freed sida.
+ */
+ vcpu->arch.sie_block->gbea = 1;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+
+ return cc ? EIO : 0;
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_csc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CPU,
+ .header.len = sizeof(uvcb),
+ };
+ int cc;
+
+ if (kvm_s390_pv_cpu_get_handle(vcpu))
+ return -EINVAL;
+
+ vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+ get_order(uv_info.guest_cpu_stor_len));
+ if (!vcpu->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /* Input */
+ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
+ uvcb.num = vcpu->arch.sie_block->icpua;
+ uvcb.state_origin = (u64)vcpu->arch.sie_block;
+ uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+
+ /* Alloc Secure Instruction Data Area Designation */
+ vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+ if (!vcpu->arch.sie_block->sidad) {
+ free_pages(vcpu->arch.pv.stor_base,
+ get_order(uv_info.guest_cpu_stor_len));
+ return -ENOMEM;
+ }
+
+ cc = uv_call(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(vcpu->kvm, 3,
+ "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+ vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+ uvcb.header.rrc);
+
+ if (cc) {
+ u16 dummy;
+
+ kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
+ return -EIO;
+ }
+
+ /* Output */
+ vcpu->arch.pv.handle = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+ vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
+ vcpu->arch.sie_block->sdf = 2;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ return 0;
+}
+
+/* only free resources when the destroy was successful */
+static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+ vfree(kvm->arch.pv.stor_var);
+ free_pages(kvm->arch.pv.stor_base,
+ get_order(uv_info.guest_base_stor_len));
+ memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+}
+
+static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+ unsigned long base = uv_info.guest_base_stor_len;
+ unsigned long virt = uv_info.guest_virt_var_stor_len;
+ unsigned long npages = 0, vlen = 0;
+ struct kvm_memory_slot *memslot;
+
+ kvm->arch.pv.stor_var = NULL;
+ kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+ if (!kvm->arch.pv.stor_base)
+ return -ENOMEM;
+
+ /*
+ * Calculate current guest storage for allocation of the
+ * variable storage, which is based on the length in MB.
+ *
+ * Slots are sorted by GFN
+ */
+ mutex_lock(&kvm->slots_lock);
+ memslot = kvm_memslots(kvm)->memslots;
+ npages = memslot->base_gfn + memslot->npages;
+ mutex_unlock(&kvm->slots_lock);
+
+ kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+ /* Allocate variable storage */
+ vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
+ vlen += uv_info.guest_virt_base_stor_len;
+ kvm->arch.pv.stor_var = vzalloc(vlen);
+ if (!kvm->arch.pv.stor_var)
+ goto out_err;
+ return 0;
+
+out_err:
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -ENOMEM;
+}
+
+/* this should not fail, but if it does, we must not free the donated memory */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ int cc;
+
+ /* make all pages accessible before destroying the guest */
+ s390_reset_acc(kvm->mm);
+
+ cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+ WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+ atomic_set(&kvm->mm->context.is_protected, 0);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+ WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+ /* Inteded memory leak on "impossible" error */
+ if (!cc)
+ kvm_s390_pv_dealloc_vm(kvm);
+ return cc ? -EIO : 0;
+}
+
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_cgc uvcb = {
+ .header.cmd = UVC_CMD_CREATE_SEC_CONF,
+ .header.len = sizeof(uvcb)
+ };
+ int cc, ret;
+ u16 dummy;
+
+ ret = kvm_s390_pv_alloc_vm(kvm);
+ if (ret)
+ return ret;
+
+ /* Inputs */
+ uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+ uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+ uvcb.guest_asce = kvm->arch.gmap->asce;
+ uvcb.guest_sca = (unsigned long)kvm->arch.sca;
+ uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+ uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+ cc = uv_call_sched(0, (u64)&uvcb);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+ uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+
+ /* Outputs */
+ kvm->arch.pv.handle = uvcb.guest_handle;
+
+ if (cc) {
+ if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+ kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+ else
+ kvm_s390_pv_dealloc_vm(kvm);
+ return -EIO;
+ }
+ kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+ return 0;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+ u16 *rrc)
+{
+ struct uv_cb_ssc uvcb = {
+ .header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+ .header.len = sizeof(uvcb),
+ .sec_header_origin = (u64)hdr,
+ .sec_header_len = length,
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ };
+ int cc = uv_call(0, (u64)&uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+ *rc, *rrc);
+ if (!cc)
+ atomic_set(&kvm->mm->context.is_protected, 1);
+ return cc ? -EINVAL : 0;
+}
+
+static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
+ u64 offset, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_unp uvcb = {
+ .header.cmd = UVC_CMD_UNPACK_IMG,
+ .header.len = sizeof(uvcb),
+ .guest_handle = kvm_s390_pv_get_handle(kvm),
+ .gaddr = addr,
+ .tweak[0] = tweak,
+ .tweak[1] = offset,
+ };
+ int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+
+ if (ret && ret != -EAGAIN)
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
+ uvcb.gaddr, *rc, *rrc);
+ return ret;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+ unsigned long tweak, u16 *rc, u16 *rrc)
+{
+ u64 offset = 0;
+ int ret = 0;
+
+ if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
+ return -EINVAL;
+
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+ addr, size);
+
+ while (offset < size) {
+ ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
+ if (ret == -EAGAIN) {
+ cond_resched();
+ if (fatal_signal_pending(current))
+ break;
+ continue;
+ }
+ if (ret)
+ break;
+ addr += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ }
+ if (!ret)
+ KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
+ return ret;
+}
+
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
+{
+ struct uv_cb_cpu_set_state uvcb = {
+ .header.cmd = UVC_CMD_CPU_SET_STATE,
+ .header.len = sizeof(uvcb),
+ .cpu_handle = kvm_s390_pv_cpu_get_handle(vcpu),
+ .state = state,
+ };
+ int cc;
+
+ cc = uv_call(0, (u64)&uvcb);
+ KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
+ vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
+ if (cc)
+ return -EINVAL;
+ return 0;
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 683036c..3dc921e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -288,6 +288,34 @@
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
+ /*
+ * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders
+ * are processed asynchronously. Until the affected VCPU finishes
+ * its work and calls back into KVM to clear the (RESTART or STOP)
+ * interrupt, we need to return any new non-reset orders "busy".
+ *
+ * This is important because a single VCPU could issue:
+ * 1) SIGP STOP $DESTINATION
+ * 2) SIGP SENSE $DESTINATION
+ *
+ * If the SIGP SENSE would not be rejected as "busy", it could
+ * return an incorrect answer as to whether the VCPU is STOPPED
+ * or OPERATING.
+ */
+ if (order_code != SIGP_INITIAL_CPU_RESET &&
+ order_code != SIGP_CPU_RESET) {
+ /*
+ * Lockless check. Both SIGP STOP and SIGP (RE)START
+ * properly synchronize everything while processing
+ * their orders, while the guest cannot observe a
+ * difference when issuing other orders from two
+ * different VCPUs.
+ */
+ if (kvm_s390_is_stop_irq_pending(dst_vcpu) ||
+ kvm_s390_is_restart_irq_pending(dst_vcpu))
+ return SIGP_CC_BUSY;
+ }
+
switch (order_code) {
case SIGP_SENSE:
vcpu->stat.instruction_sigp_sense++;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4f6c22d..3fbf708 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -416,11 +416,6 @@
memcpy((void *)((u64)scb_o + 0xc0),
(void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
break;
- case ICPT_PARTEXEC:
- /* MVPG only */
- memcpy((void *)((u64)scb_o + 0xc0),
- (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
- break;
}
if (scb_s->ihcpu != 0xffffU)
@@ -548,6 +543,7 @@
scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
scb_s->hpid = HPID_VSIE;
+ scb_s->cpnc = scb_o->cpnc;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
@@ -618,10 +614,10 @@
/* with mso/msl, the prefix lies at offset *mso* */
prefix += scb_s->mso;
- rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+ rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
if (!rc && (scb_s->ecb & ECB_TE))
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- prefix + PAGE_SIZE);
+ prefix + PAGE_SIZE, NULL);
/*
* We don't have to mprotect, we will be called for all unshadows.
* SIE will detect if protection applies and trigger a validity.
@@ -912,7 +908,7 @@
current->thread.gmap_addr, 1);
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- current->thread.gmap_addr);
+ current->thread.gmap_addr, NULL);
if (rc > 0) {
rc = inject_fault(vcpu, rc,
current->thread.gmap_addr,
@@ -934,7 +930,7 @@
{
if (vsie_page->fault_addr)
kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
- vsie_page->fault_addr);
+ vsie_page->fault_addr, NULL);
vsie_page->fault_addr = 0;
}
@@ -982,6 +978,98 @@
}
/*
+ * Get a register for a nested guest.
+ * @vcpu the vcpu of the guest
+ * @vsie_page the vsie_page for the nested guest
+ * @reg the register number, the upper 4 bits are ignored.
+ * returns: the value of the register.
+ */
+static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
+{
+ /* no need to validate the parameter and/or perform error handling */
+ reg &= 0xf;
+ switch (reg) {
+ case 15:
+ return vsie_page->scb_s.gg15;
+ case 14:
+ return vsie_page->scb_s.gg14;
+ default:
+ return vcpu->run->s.regs.gprs[reg];
+ }
+}
+
+static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+ struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+ unsigned long pei_dest, pei_src, src, dest, mask, prefix;
+ u64 *pei_block = &vsie_page->scb_o->mcic;
+ int edat, rc_dest, rc_src;
+ union ctlreg0 cr0;
+
+ cr0.val = vcpu->arch.sie_block->gcr[0];
+ edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+ mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
+ prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
+
+ dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
+ dest = _kvm_s390_real_to_abs(prefix, dest) + scb_s->mso;
+ src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
+ src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
+
+ rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
+ rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
+ /*
+ * Either everything went well, or something non-critical went wrong
+ * e.g. because of a race. In either case, simply retry.
+ */
+ if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
+ retry_vsie_icpt(vsie_page);
+ return -EAGAIN;
+ }
+ /* Something more serious went wrong, propagate the error */
+ if (rc_dest < 0)
+ return rc_dest;
+ if (rc_src < 0)
+ return rc_src;
+
+ /* The only possible suppressing exception: just deliver it */
+ if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
+ clear_vsie_icpt(vsie_page);
+ rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
+ WARN_ON_ONCE(rc_dest);
+ return 1;
+ }
+
+ /*
+ * Forward the PEI intercept to the guest if it was a page fault, or
+ * also for segment and region table faults if EDAT applies.
+ */
+ if (edat) {
+ rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
+ rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
+ } else {
+ rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
+ rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
+ }
+ if (!rc_dest && !rc_src) {
+ pei_block[0] = pei_dest;
+ pei_block[1] = pei_src;
+ return 1;
+ }
+
+ retry_vsie_icpt(vsie_page);
+
+ /*
+ * The host has edat, and the guest does not, or it was an ASCE type
+ * exception. The host needs to inject the appropriate DAT interrupts
+ * into the guest.
+ */
+ if (rc_dest)
+ return inject_fault(vcpu, rc_dest, dest, 1);
+ return inject_fault(vcpu, rc_src, src, 0);
+}
+
+/*
* Run the vsie on a shadow scb and a shadow gmap, without any further
* sanity checks, handling SIE faults.
*
@@ -1000,11 +1088,6 @@
handle_last_fault(vcpu, vsie_page);
- if (need_resched())
- schedule();
- if (test_cpu_flag(CIF_MCCK_PENDING))
- s390_handle_mcck();
-
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
/* save current guest state of bp isolation override */
@@ -1072,6 +1155,10 @@
if ((scb_s->ipa & 0xf000) != 0xf000)
scb_s->ipa += 0x1000;
break;
+ case ICPT_PARTEXEC:
+ if (scb_s->ipa == 0xb254)
+ rc = vsie_handle_mvpg(vcpu, vsie_page);
+ break;
}
return rc;
}
@@ -1185,6 +1272,7 @@
kvm_s390_vcpu_has_irq(vcpu, 0) ||
kvm_s390_vcpu_sie_inhibited(vcpu))
break;
+ cond_resched();
}
if (rc == -EFAULT) {
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index d7c218e..6783339 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -11,3 +11,8 @@
# Instrumenting memory accesses to __user data (in different address space)
# produce false positives
KASAN_SANITIZE_uaccess.o := n
+
+obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
+CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+
+lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index d4aa107..8c0c68e 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -46,6 +46,7 @@
set_clock_comparator(end);
set_cpu_flag(CIF_IGNORE_IRQ);
psw_idle(&idle, psw_mask);
+ trace_hardirqs_off();
clear_cpu_flag(CIF_IGNORE_IRQ);
set_clock_comparator(S390_lowcore.clock_comparator);
__ctl_load(cr0, 0, 0);
diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c
new file mode 100644
index 0000000..8c9d4da
--- /dev/null
+++ b/arch/s390/lib/error-inject.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <asm/ptrace.h>
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ /*
+ * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some
+ * kernel function.
+ */
+ regs->psw.addr = regs->gprs[14];
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 30a7c8c..9b2dab5 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -74,7 +74,7 @@
{
int owner;
- asm volatile(
+ asm_inline volatile(
ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
@@ -85,7 +85,7 @@
{
int expected = old;
- asm volatile(
+ asm_inline volatile(
ALTERNATIVE("", ".long 0xb2fa0080", 49) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
@@ -242,7 +242,6 @@
void arch_spin_lock_wait(arch_spinlock_t *lp)
{
- /* Use classic spinlocks + niai if the steal time is >= 10% */
if (test_cpu_flag(CIF_DEDICATED_CPU))
arch_spin_lock_queued(lp);
else
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 0e30e6e..db4e539 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -246,14 +246,13 @@
#ifdef __HAVE_ARCH_STRRCHR
char *strrchr(const char *s, int c)
{
- size_t len = __strend(s) - s;
+ ssize_t len = __strend(s) - s;
- if (len)
- do {
- if (s[len] == (char) c)
- return (char *) s + len;
- } while (--len > 0);
- return NULL;
+ do {
+ if (s[len] == (char)c)
+ return (char *)s + len;
+ } while (--len >= 0);
+ return NULL;
}
EXPORT_SYMBOL(strrchr);
#endif
@@ -333,7 +332,7 @@
* memcmp - Compare two areas of memory
* @s1: One area of memory
* @s2: Another area of memory
- * @count: The size of the area.
+ * @n: The size of the area.
*/
#ifdef __HAVE_ARCH_MEMCMP
int memcmp(const void *s1, const void *s2, size_t n)
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
new file mode 100644
index 0000000..b0b67e6
--- /dev/null
+++ b/arch/s390/lib/test_unwind.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test module for unwind_for_each_frame
+ */
+
+#define pr_fmt(fmt) "test_unwind: " fmt
+#include <asm/unwind.h>
+#include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <linux/wait.h>
+#include <asm/irq.h>
+#include <asm/delay.h>
+
+#define BT_BUF_SIZE (PAGE_SIZE * 4)
+
+/*
+ * To avoid printk line limit split backtrace by lines
+ */
+static void print_backtrace(char *bt)
+{
+ char *p;
+
+ while (true) {
+ p = strsep(&bt, "\n");
+ if (!p)
+ break;
+ pr_err("%s\n", p);
+ }
+}
+
+/*
+ * Calls unwind_for_each_frame(task, regs, sp) and verifies that the result
+ * contains unwindme_func2 followed by unwindme_func1.
+ */
+static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
+ unsigned long sp)
+{
+ int frame_count, prev_is_func2, seen_func2_func1;
+ const int max_frames = 128;
+ struct unwind_state state;
+ size_t bt_pos = 0;
+ int ret = 0;
+ char *bt;
+
+ bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC);
+ if (!bt) {
+ pr_err("failed to allocate backtrace buffer\n");
+ return -ENOMEM;
+ }
+ /* Unwind. */
+ frame_count = 0;
+ prev_is_func2 = 0;
+ seen_func2_func1 = 0;
+ unwind_for_each_frame(&state, task, regs, sp) {
+ unsigned long addr = unwind_get_return_address(&state);
+ char sym[KSYM_SYMBOL_LEN];
+
+ if (frame_count++ == max_frames)
+ break;
+ if (state.reliable && !addr) {
+ pr_err("unwind state reliable but addr is 0\n");
+ kfree(bt);
+ return -EINVAL;
+ }
+ sprint_symbol(sym, addr);
+ if (bt_pos < BT_BUF_SIZE) {
+ bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos,
+ state.reliable ? " [%-7s%px] %pSR\n" :
+ "([%-7s%px] %pSR)\n",
+ stack_type_name(state.stack_info.type),
+ (void *)state.sp, (void *)state.ip);
+ if (bt_pos >= BT_BUF_SIZE)
+ pr_err("backtrace buffer is too small\n");
+ }
+ frame_count += 1;
+ if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
+ seen_func2_func1 = 1;
+ prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
+ }
+
+ /* Check the results. */
+ if (unwind_error(&state)) {
+ pr_err("unwind error\n");
+ ret = -EINVAL;
+ }
+ if (!seen_func2_func1) {
+ pr_err("unwindme_func2 and unwindme_func1 not found\n");
+ ret = -EINVAL;
+ }
+ if (frame_count == max_frames) {
+ pr_err("Maximum number of frames exceeded\n");
+ ret = -EINVAL;
+ }
+ if (ret)
+ print_backtrace(bt);
+ kfree(bt);
+ return ret;
+}
+
+/* State of the task being unwound. */
+struct unwindme {
+ int flags;
+ int ret;
+ struct task_struct *task;
+ struct completion task_ready;
+ wait_queue_head_t task_wq;
+ unsigned long sp;
+};
+
+static struct unwindme *unwindme;
+
+/* Values of unwindme.flags. */
+#define UWM_DEFAULT 0x0
+#define UWM_THREAD 0x1 /* Unwind a separate task. */
+#define UWM_REGS 0x2 /* Pass regs to test_unwind(). */
+#define UWM_SP 0x4 /* Pass sp to test_unwind(). */
+#define UWM_CALLER 0x8 /* Unwind starting from caller. */
+#define UWM_SWITCH_STACK 0x10 /* Use CALL_ON_STACK. */
+#define UWM_IRQ 0x20 /* Unwind from irq context. */
+#define UWM_PGM 0x40 /* Unwind from program check handler. */
+
+static __always_inline unsigned long get_psw_addr(void)
+{
+ unsigned long psw_addr;
+
+ asm volatile(
+ "basr %[psw_addr],0\n"
+ : [psw_addr] "=d" (psw_addr));
+ return psw_addr;
+}
+
+#ifdef CONFIG_KPROBES
+static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ struct unwindme *u = unwindme;
+
+ u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
+ (u->flags & UWM_SP) ? u->sp : 0);
+ return 0;
+}
+#endif
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func4(struct unwindme *u)
+{
+ if (!(u->flags & UWM_CALLER))
+ u->sp = current_frame_address();
+ if (u->flags & UWM_THREAD) {
+ complete(&u->task_ready);
+ wait_event(u->task_wq, kthread_should_park());
+ kthread_parkme();
+ return 0;
+#ifdef CONFIG_KPROBES
+ } else if (u->flags & UWM_PGM) {
+ struct kprobe kp;
+ int ret;
+
+ unwindme = u;
+ memset(&kp, 0, sizeof(kp));
+ kp.symbol_name = "do_report_trap";
+ kp.pre_handler = pgm_pre_handler;
+ ret = register_kprobe(&kp);
+ if (ret < 0) {
+ pr_err("register_kprobe failed %d\n", ret);
+ return -EINVAL;
+ }
+
+ /*
+ * Trigger operation exception; use insn notation to bypass
+ * llvm's integrated assembler sanity checks.
+ */
+ asm volatile(
+ " .insn e,0x0000\n" /* illegal opcode */
+ "0: nopr %%r7\n"
+ EX_TABLE(0b, 0b)
+ :);
+
+ unregister_kprobe(&kp);
+ unwindme = NULL;
+ return u->ret;
+#endif
+ } else {
+ struct pt_regs regs;
+
+ memset(®s, 0, sizeof(regs));
+ regs.psw.addr = get_psw_addr();
+ regs.gprs[15] = current_stack_pointer();
+ return test_unwind(NULL,
+ (u->flags & UWM_REGS) ? ®s : NULL,
+ (u->flags & UWM_SP) ? u->sp : 0);
+ }
+}
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func3(struct unwindme *u)
+{
+ u->sp = current_frame_address();
+ return unwindme_func4(u);
+}
+
+/* This function must appear in the backtrace. */
+static noinline int unwindme_func2(struct unwindme *u)
+{
+ unsigned long flags;
+ int rc;
+
+ if (u->flags & UWM_SWITCH_STACK) {
+ local_irq_save(flags);
+ local_mcck_disable();
+ rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u);
+ local_mcck_enable();
+ local_irq_restore(flags);
+ return rc;
+ } else {
+ return unwindme_func3(u);
+ }
+}
+
+/* This function must follow unwindme_func2 in the backtrace. */
+static noinline int unwindme_func1(void *u)
+{
+ return unwindme_func2((struct unwindme *)u);
+}
+
+static void unwindme_irq_handler(struct ext_code ext_code,
+ unsigned int param32,
+ unsigned long param64)
+{
+ struct unwindme *u = READ_ONCE(unwindme);
+
+ if (u && u->task == current) {
+ unwindme = NULL;
+ u->task = NULL;
+ u->ret = unwindme_func1(u);
+ }
+}
+
+static int test_unwind_irq(struct unwindme *u)
+{
+ preempt_disable();
+ if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) {
+ pr_info("Couldn't register external interrupt handler");
+ return -1;
+ }
+ u->task = current;
+ unwindme = u;
+ udelay(1);
+ unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler);
+ preempt_enable();
+ return u->ret;
+}
+
+/* Spawns a task and passes it to test_unwind(). */
+static int test_unwind_task(struct unwindme *u)
+{
+ struct task_struct *task;
+ int ret;
+
+ /* Initialize thread-related fields. */
+ init_completion(&u->task_ready);
+ init_waitqueue_head(&u->task_wq);
+
+ /*
+ * Start the task and wait until it reaches unwindme_func4() and sleeps
+ * in (task_ready, unwind_done] range.
+ */
+ task = kthread_run(unwindme_func1, u, "%s", __func__);
+ if (IS_ERR(task)) {
+ pr_err("kthread_run() failed\n");
+ return PTR_ERR(task);
+ }
+ /*
+ * Make sure task reaches unwindme_func4 before parking it,
+ * we might park it before kthread function has been executed otherwise
+ */
+ wait_for_completion(&u->task_ready);
+ kthread_park(task);
+ /* Unwind. */
+ ret = test_unwind(task, NULL, (u->flags & UWM_SP) ? u->sp : 0);
+ kthread_stop(task);
+ return ret;
+}
+
+static int test_unwind_flags(int flags)
+{
+ struct unwindme u;
+
+ u.flags = flags;
+ if (u.flags & UWM_THREAD)
+ return test_unwind_task(&u);
+ else if (u.flags & UWM_IRQ)
+ return test_unwind_irq(&u);
+ else
+ return unwindme_func1(&u);
+}
+
+static int test_unwind_init(void)
+{
+ int ret = 0;
+
+#define TEST(flags) \
+do { \
+ pr_info("[ RUN ] " #flags "\n"); \
+ if (!test_unwind_flags((flags))) { \
+ pr_info("[ OK ] " #flags "\n"); \
+ } else { \
+ pr_err("[ FAILED ] " #flags "\n"); \
+ ret = -EINVAL; \
+ } \
+} while (0)
+
+ TEST(UWM_DEFAULT);
+ TEST(UWM_SP);
+ TEST(UWM_REGS);
+ TEST(UWM_SWITCH_STACK);
+ TEST(UWM_SP | UWM_REGS);
+ TEST(UWM_CALLER | UWM_SP);
+ TEST(UWM_CALLER | UWM_SP | UWM_REGS);
+ TEST(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
+ TEST(UWM_THREAD);
+ TEST(UWM_THREAD | UWM_SP);
+ TEST(UWM_THREAD | UWM_CALLER | UWM_SP);
+ TEST(UWM_IRQ);
+ TEST(UWM_IRQ | UWM_SWITCH_STACK);
+ TEST(UWM_IRQ | UWM_SP);
+ TEST(UWM_IRQ | UWM_REGS);
+ TEST(UWM_IRQ | UWM_SP | UWM_REGS);
+ TEST(UWM_IRQ | UWM_CALLER | UWM_SP);
+ TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS);
+ TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
+#ifdef CONFIG_KPROBES
+ TEST(UWM_PGM);
+ TEST(UWM_PGM | UWM_SP);
+ TEST(UWM_PGM | UWM_REGS);
+ TEST(UWM_PGM | UWM_SP | UWM_REGS);
+#endif
+#undef TEST
+
+ return ret;
+}
+
+static void test_unwind_exit(void)
+{
+}
+
+module_init(test_unwind_init);
+module_exit(test_unwind_exit);
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 3175413..cd67e94 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -8,7 +8,7 @@
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
+obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
KASAN_SANITIZE_kasan_init.o := n
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index a51c892..1141c8d 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -19,10 +19,8 @@
#include <linux/swap.h>
#include <linux/kthread.h>
#include <linux/oom.h>
-#include <linux/suspend.h>
#include <linux/uaccess.h>
-#include <asm/pgalloc.h>
#include <asm/diag.h>
#ifdef CONFIG_CMM_IUCV
@@ -49,7 +47,6 @@
static volatile long cmm_timed_pages_target;
static long cmm_timeout_pages;
static long cmm_timeout_seconds;
-static int cmm_suspended;
static struct cmm_page_array *cmm_page_list;
static struct cmm_page_array *cmm_timed_page_list;
@@ -151,9 +148,9 @@
while (1) {
rc = wait_event_interruptible(cmm_thread_wait,
- (!cmm_suspended && (cmm_pages != cmm_pages_target ||
- cmm_timed_pages != cmm_timed_pages_target)) ||
- kthread_should_stop());
+ cmm_pages != cmm_pages_target ||
+ cmm_timed_pages != cmm_timed_pages_target ||
+ kthread_should_stop());
if (kthread_should_stop() || rc == -ERESTARTSYS) {
cmm_pages_target = cmm_pages;
cmm_timed_pages_target = cmm_timed_pages;
@@ -191,7 +188,7 @@
del_timer(&cmm_timer);
return;
}
- mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ);
+ mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
}
static void cmm_timer_fn(struct timer_list *unused)
@@ -247,7 +244,7 @@
}
static int cmm_pages_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
long nr = cmm_get_pages();
struct ctl_table ctl_entry = {
@@ -266,7 +263,7 @@
}
static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp,
+ void *buffer, size_t *lenp,
loff_t *ppos)
{
long nr = cmm_get_timed_pages();
@@ -286,7 +283,7 @@
}
static int cmm_timeout_handler(struct ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
+ void *buffer, size_t *lenp, loff_t *ppos)
{
char buf[64], *p;
long nr, seconds;
@@ -299,8 +296,7 @@
if (write) {
len = min(*lenp, sizeof(buf));
- if (copy_from_user(buf, buffer, len))
- return -EFAULT;
+ memcpy(buf, buffer, len);
buf[len - 1] = '\0';
cmm_skip_blanks(buf, &p);
nr = simple_strtoul(p, &p, 0);
@@ -313,8 +309,7 @@
cmm_timeout_pages, cmm_timeout_seconds);
if (len > *lenp)
len = *lenp;
- if (copy_to_user(buffer, buf, len))
- return -EFAULT;
+ memcpy(buffer, buf, len);
*lenp = len;
*ppos += len;
}
@@ -390,38 +385,6 @@
static struct ctl_table_header *cmm_sysctl_header;
-static int cmm_suspend(void)
-{
- cmm_suspended = 1;
- cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
- cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
- return 0;
-}
-
-static int cmm_resume(void)
-{
- cmm_suspended = 0;
- cmm_kick_thread();
- return 0;
-}
-
-static int cmm_power_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- switch (event) {
- case PM_POST_HIBERNATION:
- return cmm_resume();
- case PM_HIBERNATION_PREPARE:
- return cmm_suspend();
- default:
- return NOTIFY_DONE;
- }
-}
-
-static struct notifier_block cmm_power_notifier = {
- .notifier_call = cmm_power_event,
-};
-
static int __init cmm_init(void)
{
int rc = -ENOMEM;
@@ -446,16 +409,11 @@
rc = register_oom_notifier(&cmm_oom_nb);
if (rc < 0)
goto out_oom_notify;
- rc = register_pm_notifier(&cmm_power_notifier);
- if (rc)
- goto out_pm;
cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
if (!IS_ERR(cmm_thread_ptr))
return 0;
rc = PTR_ERR(cmm_thread_ptr);
- unregister_pm_notifier(&cmm_power_notifier);
-out_pm:
unregister_oom_notifier(&cmm_oom_nb);
out_oom_notify:
#ifdef CONFIG_CMM_IUCV
@@ -475,7 +433,6 @@
#ifdef CONFIG_CMM_IUCV
smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
#endif
- unregister_pm_notifier(&cmm_power_notifier);
unregister_oom_notifier(&cmm_oom_nb);
kthread_stop(cmm_thread_ptr);
del_timer_sync(&cmm_timer);
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 5d67b81..8f9ff7e 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -1,12 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/set_memory.h>
+#include <linux/ptdump.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
-#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/kasan.h>
+#include <asm/ptdump.h>
#include <asm/kasan.h>
#include <asm/sections.h>
-#include <asm/pgtable.h>
static unsigned long max_addr;
@@ -16,263 +17,234 @@
};
enum address_markers_idx {
- IDENTITY_NR = 0,
+ IDENTITY_BEFORE_NR = 0,
+ IDENTITY_BEFORE_END_NR,
KERNEL_START_NR,
KERNEL_END_NR,
+ IDENTITY_AFTER_NR,
+ IDENTITY_AFTER_END_NR,
#ifdef CONFIG_KASAN
KASAN_SHADOW_START_NR,
KASAN_SHADOW_END_NR,
#endif
VMEMMAP_NR,
+ VMEMMAP_END_NR,
VMALLOC_NR,
+ VMALLOC_END_NR,
MODULES_NR,
+ MODULES_END_NR,
};
static struct addr_marker address_markers[] = {
- [IDENTITY_NR] = {0, "Identity Mapping"},
+ [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"},
+ [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
[KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
[KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
+ [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"},
+ [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"},
#ifdef CONFIG_KASAN
[KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
[KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
#endif
- [VMEMMAP_NR] = {0, "vmemmap Area"},
- [VMALLOC_NR] = {0, "vmalloc Area"},
- [MODULES_NR] = {0, "Modules Area"},
+ [VMEMMAP_NR] = {0, "vmemmap Area Start"},
+ [VMEMMAP_END_NR] = {0, "vmemmap Area End"},
+ [VMALLOC_NR] = {0, "vmalloc Area Start"},
+ [VMALLOC_END_NR] = {0, "vmalloc Area End"},
+ [MODULES_NR] = {0, "Modules Area Start"},
+ [MODULES_END_NR] = {0, "Modules Area End"},
{ -1, NULL }
};
struct pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
int level;
unsigned int current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
unsigned long start_address;
- unsigned long current_address;
const struct addr_marker *marker;
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ struct seq_file *__m = (m); \
+ \
+ if (__m) \
+ seq_printf(__m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ struct seq_file *__m = (m); \
+ \
+ if (__m) \
+ seq_printf(__m, fmt); \
+})
+
static void print_prot(struct seq_file *m, unsigned int pr, int level)
{
static const char * const level_name[] =
{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
- seq_printf(m, "%s ", level_name[level]);
+ pt_dump_seq_printf(m, "%s ", level_name[level]);
if (pr & _PAGE_INVALID) {
- seq_printf(m, "I\n");
+ pt_dump_seq_printf(m, "I\n");
return;
}
- seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
- seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
+ pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
+ pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
}
-static void note_page(struct seq_file *m, struct pg_state *st,
- unsigned int new_prot, int level)
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
- static const char units[] = "KMGTPE";
+#ifdef CONFIG_DEBUG_WX
+ if (!st->check_wx)
+ return;
+ if (st->current_prot & _PAGE_INVALID)
+ return;
+ if (st->current_prot & _PAGE_PROTECT)
+ return;
+ if (st->current_prot & _PAGE_NOEXEC)
+ return;
+ /* The first lowcore page is currently still W+X. */
+ if (addr == PAGE_SIZE)
+ return;
+ WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
+ (void *)st->start_address);
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+#endif /* CONFIG_DEBUG_WX */
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
+{
int width = sizeof(unsigned long) * 2;
+ static const char units[] = "KMGTPE";
const char *unit = units;
- unsigned int prot, cur;
unsigned long delta;
+ struct pg_state *st;
+ struct seq_file *m;
+ unsigned int prot;
- /*
- * If we have a "break" in the series, we need to flush the state
- * that we have now. "break" is either changing perms, levels or
- * address space marker.
- */
- prot = new_prot;
- cur = st->current_prot;
-
- if (!st->level) {
- /* First entry */
- st->current_prot = new_prot;
+ st = container_of(pt_st, struct pg_state, ptdump);
+ m = st->seq;
+ prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC);
+ if (level == 4 && (val & _PAGE_INVALID))
+ prot = _PAGE_INVALID;
+ /* For pmd_none() & friends val gets passed as zero. */
+ if (level != 4 && !val)
+ prot = _PAGE_INVALID;
+ /* Final flush from generic code. */
+ if (level == -1)
+ addr = max_addr;
+ if (st->level == -1) {
+ pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ st->start_address = addr;
+ st->current_prot = prot;
st->level = level;
- st->marker = address_markers;
- seq_printf(m, "---[ %s ]---\n", st->marker->name);
- } else if (prot != cur || level != st->level ||
- st->current_address >= st->marker[1].start_address) {
- /* Print the actual finished series */
- seq_printf(m, "0x%0*lx-0x%0*lx ",
- width, st->start_address,
- width, st->current_address);
- delta = (st->current_address - st->start_address) >> 10;
+ } else if (prot != st->current_prot || level != st->level ||
+ addr >= st->marker[1].start_address) {
+ note_prot_wx(st, addr);
+ pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ",
+ width, st->start_address,
+ width, addr);
+ delta = (addr - st->start_address) >> 10;
while (!(delta & 0x3ff) && unit[1]) {
delta >>= 10;
unit++;
}
- seq_printf(m, "%9lu%c ", delta, *unit);
+ pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
print_prot(m, st->current_prot, st->level);
- while (st->current_address >= st->marker[1].start_address) {
+ while (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(m, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
}
- st->start_address = st->current_address;
- st->current_prot = new_prot;
+ st->start_address = addr;
+ st->current_prot = prot;
st->level = level;
}
}
-#ifdef CONFIG_KASAN
-static void note_kasan_early_shadow_page(struct seq_file *m,
- struct pg_state *st)
+#ifdef CONFIG_DEBUG_WX
+void ptdump_check_wx(void)
{
- unsigned int prot;
+ struct pg_state st = {
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {.start = 0, .end = max_addr},
+ {.start = 0, .end = 0},
+ }
+ },
+ .seq = NULL,
+ .level = -1,
+ .current_prot = 0,
+ .check_wx = true,
+ .wx_pages = 0,
+ .start_address = 0,
+ .marker = (struct addr_marker[]) {
+ { .start_address = 0, .name = NULL},
+ { .start_address = -1, .name = NULL},
+ },
+ };
- prot = pte_val(*kasan_early_shadow_pte) &
- (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
- note_page(m, st, prot, 4);
-}
-#endif
-
-/*
- * The actual page table walker functions. In order to keep the
- * implementation of print_prot() short, we only check and pass
- * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
- * segment or page table entry is invalid or read-only.
- * After all it's just a hint that the current level being walked
- * contains an invalid or read-only entry.
- */
-static void walk_pte_level(struct seq_file *m, struct pg_state *st,
- pmd_t *pmd, unsigned long addr)
-{
- unsigned int prot;
- pte_t *pte;
- int i;
-
- for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
- st->current_address = addr;
- pte = pte_offset_kernel(pmd, addr);
- prot = pte_val(*pte) &
- (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
- note_page(m, st, prot, 4);
- addr += PAGE_SIZE;
- }
-}
-
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
- pud_t *pud, unsigned long addr)
-{
- unsigned int prot;
- pmd_t *pmd;
- int i;
-
-#ifdef CONFIG_KASAN
- if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_early_shadow_pmd)) {
- note_kasan_early_shadow_page(m, st);
+ if (!MACHINE_HAS_NX)
return;
- }
-#endif
-
- pmd = pmd_offset(pud, addr);
- for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++, pmd++) {
- st->current_address = addr;
- if (!pmd_none(*pmd)) {
- if (pmd_large(*pmd)) {
- prot = pmd_val(*pmd) &
- (_SEGMENT_ENTRY_PROTECT |
- _SEGMENT_ENTRY_NOEXEC);
- note_page(m, st, prot, 3);
- } else
- walk_pte_level(m, st, pmd, addr);
- } else
- note_page(m, st, _PAGE_INVALID, 3);
- addr += PMD_SIZE;
- }
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n");
}
+#endif /* CONFIG_DEBUG_WX */
-static void walk_pud_level(struct seq_file *m, struct pg_state *st,
- p4d_t *p4d, unsigned long addr)
-{
- unsigned int prot;
- pud_t *pud;
- int i;
-
-#ifdef CONFIG_KASAN
- if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_early_shadow_pud)) {
- note_kasan_early_shadow_page(m, st);
- return;
- }
-#endif
-
- pud = pud_offset(p4d, addr);
- for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++, pud++) {
- st->current_address = addr;
- if (!pud_none(*pud))
- if (pud_large(*pud)) {
- prot = pud_val(*pud) &
- (_REGION_ENTRY_PROTECT |
- _REGION_ENTRY_NOEXEC);
- note_page(m, st, prot, 2);
- } else
- walk_pmd_level(m, st, pud, addr);
- else
- note_page(m, st, _PAGE_INVALID, 2);
- addr += PUD_SIZE;
- }
-}
-
-static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
- pgd_t *pgd, unsigned long addr)
-{
- p4d_t *p4d;
- int i;
-
-#ifdef CONFIG_KASAN
- if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_early_shadow_p4d)) {
- note_kasan_early_shadow_page(m, st);
- return;
- }
-#endif
-
- p4d = p4d_offset(pgd, addr);
- for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++, p4d++) {
- st->current_address = addr;
- if (!p4d_none(*p4d))
- walk_pud_level(m, st, p4d, addr);
- else
- note_page(m, st, _PAGE_INVALID, 2);
- addr += P4D_SIZE;
- }
-}
-
-static void walk_pgd_level(struct seq_file *m)
-{
- unsigned long addr = 0;
- struct pg_state st;
- pgd_t *pgd;
- int i;
-
- memset(&st, 0, sizeof(st));
- for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
- st.current_address = addr;
- pgd = pgd_offset_k(addr);
- if (!pgd_none(*pgd))
- walk_p4d_level(m, &st, pgd, addr);
- else
- note_page(m, &st, _PAGE_INVALID, 1);
- addr += PGDIR_SIZE;
- cond_resched();
- }
- /* Flush out the last page */
- st.current_address = max_addr;
- note_page(m, &st, 0, 0);
-}
-
+#ifdef CONFIG_PTDUMP_DEBUGFS
static int ptdump_show(struct seq_file *m, void *v)
{
- walk_pgd_level(m);
+ struct pg_state st = {
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {.start = 0, .end = max_addr},
+ {.start = 0, .end = 0},
+ }
+ },
+ .seq = m,
+ .level = -1,
+ .current_prot = 0,
+ .check_wx = false,
+ .wx_pages = 0,
+ .start_address = 0,
+ .marker = address_markers,
+ };
+
+ get_online_mems();
+ mutex_lock(&cpa_mutex);
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ mutex_unlock(&cpa_mutex);
+ put_online_mems();
return 0;
}
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
-static int ptdump_open(struct inode *inode, struct file *filp)
+/*
+ * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
+ * insertion sort to preserve the original order of markers with the same
+ * start address.
+ */
+static void sort_address_markers(void)
{
- return single_open(filp, ptdump_show, NULL);
-}
+ struct addr_marker tmp;
+ int i, j;
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+ for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
+ tmp = address_markers[i];
+ for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
+ address_markers[j + 1] = address_markers[j];
+ address_markers[j + 1] = tmp;
+ }
+}
static int pt_dump_init(void)
{
@@ -283,10 +255,17 @@
*/
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
+ address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end;
address_markers[MODULES_NR].start_address = MODULES_VADDR;
+ address_markers[MODULES_END_NR].start_address = MODULES_END;
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+ address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+ address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+ sort_address_markers();
+#ifdef CONFIG_PTDUMP_DEBUGFS
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
return 0;
}
device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index fd0dae9..5060956 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -20,9 +20,9 @@
#include <linux/ctype.h>
#include <linux/ioport.h>
#include <linux/refcount.h>
+#include <linux/pgtable.h>
#include <asm/diag.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/ebcdic.h>
#include <asm/errno.h>
#include <asm/extmem.h>
@@ -313,15 +313,10 @@
goto out_free;
}
- rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
-
- if (rc)
- goto out_free;
-
seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
if (seg->res == NULL) {
rc = -ENOMEM;
- goto out_shared;
+ goto out_free;
}
seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
seg->res->start = seg->start_addr;
@@ -335,12 +330,17 @@
if (rc == SEG_TYPE_SC ||
((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
seg->res->flags |= IORESOURCE_READONLY;
+
+ /* Check for overlapping resources before adding the mapping. */
if (request_resource(&iomem_resource, seg->res)) {
rc = -EBUSY;
- kfree(seg->res);
- goto out_shared;
+ goto out_free_resource;
}
+ rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+ if (rc)
+ goto out_resource;
+
if (do_nonshared)
diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
&start_addr, &end_addr);
@@ -351,14 +351,14 @@
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
rc = diag_cc;
- goto out_resource;
+ goto out_mapping;
}
if (diag_cc > 1) {
pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
rc = dcss_diag_translate_rc(end_addr);
dcss_diag(&purgeseg_scode, seg->dcss_name,
&dummy, &dummy);
- goto out_resource;
+ goto out_mapping;
}
seg->start_addr = start_addr;
seg->end = end_addr;
@@ -377,11 +377,12 @@
(void*) seg->end, segtype_string[seg->vm_segtype]);
}
goto out;
+ out_mapping:
+ vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
out_resource:
release_resource(seg->res);
+ out_free_resource:
kfree(seg->res);
- out_shared:
- vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
out_free:
kfree(seg);
out:
@@ -400,8 +401,7 @@
* -EIO : could not perform query or load diagnose
* -ENOENT : no such segment
* -EOPNOTSUPP: multi-part segment cannot be used with linux
- * -ENOSPC : segment cannot be used (overlaps with storage)
- * -EBUSY : segment can temporarily not be used (overlaps with dcss)
+ * -EBUSY : segment cannot be used (overlaps with dcss or storage)
* -ERANGE : segment cannot be used (exceeds kernel mapping range)
* -EPERM : segment is currently loaded with incompatible permissions
* -ENOMEM : out of memory
@@ -626,10 +626,6 @@
pr_err("DCSS %s has multiple page ranges and cannot be "
"loaded or queried\n", seg_name);
break;
- case -ENOSPC:
- pr_err("DCSS %s overlaps with used storage and cannot "
- "be loaded\n", seg_name);
- break;
case -EBUSY:
pr_err("%s needs used memory resources and cannot be "
"loaded or queried\n", seg_name);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 7b0bb47..ed517fa 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -33,22 +33,22 @@
#include <linux/hugetlb.h>
#include <asm/asm-offsets.h>
#include <asm/diag.h>
-#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/facility.h>
+#include <asm/uv.h>
#include "../kernel/entry.h"
#define __FAIL_ADDR_MASK -4096L
#define __SUBCODE_MASK 0x0600
#define __PF_RES_FIELD 0x8000000000000000ULL
-#define VM_FAULT_BADCONTEXT 0x010000
-#define VM_FAULT_BADMAP 0x020000
-#define VM_FAULT_BADACCESS 0x040000
-#define VM_FAULT_SIGNAL 0x080000
-#define VM_FAULT_PFAULT 0x100000
+#define VM_FAULT_BADCONTEXT ((__force vm_fault_t) 0x010000)
+#define VM_FAULT_BADMAP ((__force vm_fault_t) 0x020000)
+#define VM_FAULT_BADACCESS ((__force vm_fault_t) 0x040000)
+#define VM_FAULT_SIGNAL ((__force vm_fault_t) 0x080000)
+#define VM_FAULT_PFAULT ((__force vm_fault_t) 0x100000)
enum fault_type {
KERNEL_FAULT,
@@ -105,7 +105,7 @@
{
unsigned long dummy;
- return probe_kernel_address((unsigned long *)p, dummy);
+ return get_kernel_nofault(dummy, (unsigned long *)p);
}
static void dump_pagetable(unsigned long asce, unsigned long address)
@@ -122,7 +122,7 @@
if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION2:
table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
if (bad_address(table))
@@ -131,7 +131,7 @@
if (*table & _REGION_ENTRY_INVALID)
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION3:
table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
if (bad_address(table))
@@ -140,7 +140,7 @@
if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
goto out;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* fallthrough */
+ fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (bad_address(table))
@@ -255,10 +255,8 @@
/* Are we prepared to handle this kernel fault? */
fixup = s390_search_extables(regs->psw.addr);
- if (fixup) {
- regs->psw.addr = extable_fixup(fixup);
+ if (fixup && ex_handle(fixup, regs))
return;
- }
/*
* Oops. The kernel tried to access some bad page. We'll have to
@@ -327,7 +325,7 @@
case VM_FAULT_BADACCESS:
if (access == VM_EXEC && signal_return(regs) == 0)
break;
- /* fallthrough */
+ fallthrough;
case VM_FAULT_BADMAP:
/* Bad memory access. Check if it is kernel or user space. */
if (user_mode(regs)) {
@@ -337,9 +335,8 @@
do_sigsegv(regs, si_code);
break;
}
- /* fallthrough */
+ fallthrough;
case VM_FAULT_BADCONTEXT:
- /* fallthrough */
case VM_FAULT_PFAULT:
do_no_context(regs);
break;
@@ -377,7 +374,7 @@
* routines.
*
* interruption code (int_code):
- * 04 Protection -> Write-Protection (suprression)
+ * 04 Protection -> Write-Protection (suppression)
* 10 Segment translation -> Not present (nullification)
* 11 Page translation -> Not present (nullification)
* 3b Region third trans. -> Not present (nullification)
@@ -429,12 +426,12 @@
address = trans_exc_code & __FAIL_ADDR_MASK;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
flags |= FAULT_FLAG_WRITE;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
gmap = NULL;
if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
@@ -479,9 +476,8 @@
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(vma, address, flags);
- /* No reason to continue if interrupted by SIGKILL. */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ fault = handle_mm_fault(vma, address, flags, regs);
+ if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;
if (flags & FAULT_FLAG_RETRY_NOWAIT)
goto out_up;
@@ -490,36 +486,19 @@
if (unlikely(fault & VM_FAULT_ERROR))
goto out_up;
- /*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
- */
if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
if (fault & VM_FAULT_RETRY) {
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
/* FAULT_FLAG_RETRY_NOWAIT has been set,
- * mmap_sem has not been released */
+ * mmap_lock has not been released */
current->thread.gmap_pfault = 1;
fault = VM_FAULT_PFAULT;
goto out_up;
}
- /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
- * of starvation. */
- flags &= ~(FAULT_FLAG_ALLOW_RETRY |
- FAULT_FLAG_RETRY_NOWAIT);
+ flags &= ~FAULT_FLAG_RETRY_NOWAIT;
flags |= FAULT_FLAG_TRIED;
- down_read(&mm->mmap_sem);
+ mmap_read_lock(mm);
goto retry;
}
}
@@ -537,7 +516,7 @@
}
fault = 0;
out_up:
- up_read(&mm->mmap_sem);
+ mmap_read_unlock(mm);
out:
return fault;
}
@@ -584,7 +563,7 @@
int access;
vm_fault_t fault;
- access = VM_READ | VM_EXEC | VM_WRITE;
+ access = VM_ACCESS_FLAGS;
fault = do_exception(regs, access);
if (unlikely(fault))
do_fault_error(regs, access, fault);
@@ -816,3 +795,124 @@
early_initcall(pfault_irq_init);
#endif /* CONFIG_PFAULT */
+
+#if IS_ENABLED(CONFIG_PGSTE)
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ struct page *page;
+ int rc;
+
+ /*
+ * bit 61 tells us if the address is valid, if it's not we
+ * have a major problem and should stop the kernel or send a
+ * SIGSEGV to the process. Unfortunately bit 61 is not
+ * reliable without the misc UV feature so we need to check
+ * for that as well.
+ */
+ if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+ !test_bit_inv(61, ®s->int_parm_long)) {
+ /*
+ * When this happens, userspace did something that it
+ * was not supposed to do, e.g. branching into secure
+ * memory. Trigger a segmentation fault.
+ */
+ if (user_mode(regs)) {
+ send_sig(SIGSEGV, current, 0);
+ return;
+ }
+
+ /*
+ * The kernel should never run into this case and we
+ * have no way out of this situation.
+ */
+ panic("Unexpected PGM 0x3d with TEID bit 61=0");
+ }
+
+ switch (get_fault_type(regs)) {
+ case USER_FAULT:
+ mm = current->mm;
+ mmap_read_lock(mm);
+ vma = find_vma(mm, addr);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ break;
+ }
+ page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
+ if (IS_ERR_OR_NULL(page)) {
+ mmap_read_unlock(mm);
+ break;
+ }
+ if (arch_make_page_accessible(page))
+ send_sig(SIGSEGV, current, 0);
+ put_page(page);
+ mmap_read_unlock(mm);
+ break;
+ case KERNEL_FAULT:
+ page = phys_to_page(addr);
+ if (unlikely(!try_get_page(page)))
+ break;
+ rc = arch_make_page_accessible(page);
+ put_page(page);
+ if (rc)
+ BUG();
+ break;
+ case VDSO_FAULT:
+ case GMAP_FAULT:
+ default:
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ }
+}
+NOKPROBE_SYMBOL(do_secure_storage_access);
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+ if (get_fault_type(regs) != GMAP_FAULT) {
+ do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
+ send_sig(SIGSEGV, current, 0);
+}
+NOKPROBE_SYMBOL(do_non_secure_storage_access);
+
+void do_secure_storage_violation(struct pt_regs *regs)
+{
+ /*
+ * Either KVM messed up the secure guest mapping or the same
+ * page is mapped into multiple secure guests.
+ *
+ * This exception is only triggered when a guest 2 is running
+ * and can therefore never occur in kernel context.
+ */
+ printk_ratelimited(KERN_WARNING
+ "Secure storage violation in task: %s, pid %d\n",
+ current->comm, current->pid);
+ send_sig(SIGSEGV, current, 0);
+}
+
+#else
+void do_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+
+void do_secure_storage_violation(struct pt_regs *regs)
+{
+ default_trap_handler(regs);
+}
+#endif
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4fa7a56..f2d19d4 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -17,8 +17,8 @@
#include <linux/swapops.h>
#include <linux/ksm.h>
#include <linux/mman.h>
+#include <linux/pgtable.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/tlb.h>
@@ -300,7 +300,7 @@
EXPORT_SYMBOL_GPL(gmap_get_enabled);
/*
- * gmap_alloc_table is assumed to be called with mmap_sem held
+ * gmap_alloc_table is assumed to be called with mmap_lock held
*/
static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long init, unsigned long gaddr)
@@ -405,10 +405,10 @@
return -EINVAL;
flush = 0;
- down_write(&gmap->mm->mmap_sem);
+ mmap_write_lock(gmap->mm);
for (off = 0; off < len; off += PMD_SIZE)
flush |= __gmap_unmap_by_gaddr(gmap, to + off);
- up_write(&gmap->mm->mmap_sem);
+ mmap_write_unlock(gmap->mm);
if (flush)
gmap_flush_tlb(gmap);
return 0;
@@ -438,7 +438,7 @@
return -EINVAL;
flush = 0;
- down_write(&gmap->mm->mmap_sem);
+ mmap_write_lock(gmap->mm);
for (off = 0; off < len; off += PMD_SIZE) {
/* Remove old translation */
flush |= __gmap_unmap_by_gaddr(gmap, to + off);
@@ -448,7 +448,7 @@
(void *) from + off))
break;
}
- up_write(&gmap->mm->mmap_sem);
+ mmap_write_unlock(gmap->mm);
if (flush)
gmap_flush_tlb(gmap);
if (off >= len)
@@ -466,7 +466,7 @@
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
* This function does not establish potentially missing page table entries.
- * The mmap_sem of the mm that belongs to the address space must be held
+ * The mmap_lock of the mm that belongs to the address space must be held
* when this function gets called.
*
* Note: Can also be called for shadow gmaps.
@@ -495,9 +495,9 @@
{
unsigned long rc;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
rc = __gmap_translate(gmap, gaddr);
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_translate);
@@ -534,7 +534,7 @@
*
* Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
* if the vm address is already mapped to a different guest segment.
- * The mmap_sem of the mm that belongs to the address space must be held
+ * The mmap_lock of the mm that belongs to the address space must be held
* when this function gets called.
*/
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
@@ -640,7 +640,7 @@
int rc;
bool unlocked;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
retry:
unlocked = false;
@@ -649,13 +649,13 @@
rc = vmaddr;
goto out_up;
}
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+ if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
&unlocked)) {
rc = -EFAULT;
goto out_up;
}
/*
- * In the case that fixup_user_fault unlocked the mmap_sem during
+ * In the case that fixup_user_fault unlocked the mmap_lock during
* faultin redo __gmap_translate to not race with a map/unmap_segment.
*/
if (unlocked)
@@ -663,13 +663,13 @@
rc = __gmap_link(gmap, gaddr, vmaddr);
out_up:
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_fault);
/*
- * this function is assumed to be called with mmap_sem held
+ * this function is assumed to be called with mmap_lock held
*/
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
@@ -684,9 +684,10 @@
vmaddr |= gaddr & ~PMD_MASK;
/* Get pointer to the page table entry */
ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (likely(ptep))
+ if (likely(ptep)) {
ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
- pte_unmap_unlock(ptep, ptl);
+ pte_unmap_unlock(ptep, ptl);
+ }
}
}
EXPORT_SYMBOL_GPL(__gmap_zap);
@@ -696,7 +697,7 @@
unsigned long gaddr, vmaddr, size;
struct vm_area_struct *vma;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
for (gaddr = from; gaddr < to;
gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
/* Find the vm address for the guest address */
@@ -719,7 +720,7 @@
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
zap_page_range(vma, vmaddr, size);
}
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
}
EXPORT_SYMBOL_GPL(gmap_discard);
@@ -788,19 +789,19 @@
unsigned long gaddr, int level)
{
const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
- unsigned long *table;
+ unsigned long *table = gmap->table;
- if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
- return NULL;
if (gmap_is_shadow(gmap) && gmap->removed)
return NULL;
+ if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
+ return NULL;
+
if (asce_type != _ASCE_TYPE_REGION1 &&
gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
return NULL;
- table = gmap->table;
- switch (gmap->asce & _ASCE_TYPE_MASK) {
+ switch (asce_type) {
case _ASCE_TYPE_REGION1:
table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
if (level == 4)
@@ -808,7 +809,7 @@
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION2:
table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
if (level == 3)
@@ -816,7 +817,7 @@
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_REGION3:
table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
if (level == 2)
@@ -824,7 +825,7 @@
if (*table & _REGION_ENTRY_INVALID)
return NULL;
table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- /* Fallthrough */
+ fallthrough;
case _ASCE_TYPE_SEGMENT:
table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
if (level == 1)
@@ -879,10 +880,10 @@
BUG_ON(gmap_is_shadow(gmap));
fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
- if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
+ if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
return -EFAULT;
if (unlocked)
- /* lost mmap_sem, caller has to retry __gmap_translate */
+ /* lost mmap_lock, caller has to retry __gmap_translate */
return 0;
/* Connect the page tables */
return __gmap_link(gmap, gaddr, vmaddr);
@@ -953,7 +954,7 @@
* -EAGAIN if a fixup is needed
* -EINVAL if unsupported notifier bits have been specified
*
- * Expected to be called with sg->mm->mmap_sem in read and
+ * Expected to be called with sg->mm->mmap_lock in read and
* guest_table_lock held.
*/
static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
@@ -999,7 +1000,7 @@
* Returns 0 if successfully protected, -ENOMEM if out of memory and
* -EAGAIN if a fixup is needed.
*
- * Expected to be called with sg->mm->mmap_sem in read
+ * Expected to be called with sg->mm->mmap_lock in read
*/
static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
pmd_t *pmdp, int prot, unsigned long bits)
@@ -1035,7 +1036,7 @@
* Returns 0 if successfully protected, -ENOMEM if out of memory and
* -EFAULT if gaddr is invalid (or mapping for shadows is missing).
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
unsigned long len, int prot, unsigned long bits)
@@ -1106,9 +1107,9 @@
return -EINVAL;
if (!MACHINE_HAS_ESOP && prot == PROT_READ)
return -EINVAL;
- down_read(&gmap->mm->mmap_sem);
+ mmap_read_lock(gmap->mm);
rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
- up_read(&gmap->mm->mmap_sem);
+ mmap_read_unlock(gmap->mm);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
@@ -1124,7 +1125,7 @@
* if reading using the virtual address failed. -EINVAL if called on a gmap
* shadow.
*
- * Called with gmap->mm->mmap_sem in read.
+ * Called with gmap->mm->mmap_lock in read.
*/
int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
{
@@ -1696,11 +1697,11 @@
}
spin_unlock(&parent->shadow_lock);
/* protect after insertion, so it will get properly invalidated */
- down_read(&parent->mm->mmap_sem);
+ mmap_read_lock(parent->mm);
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
PROT_READ, GMAP_NOTIFY_SHADOW);
- up_read(&parent->mm->mmap_sem);
+ mmap_read_unlock(parent->mm);
spin_lock(&parent->shadow_lock);
new->initialized = true;
if (rc) {
@@ -1729,7 +1730,7 @@
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
int fake)
@@ -1813,7 +1814,7 @@
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
int fake)
@@ -1897,7 +1898,7 @@
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
int fake)
@@ -1981,7 +1982,7 @@
* Returns 0 if the shadow page table was found and -EAGAIN if the page
* table was not found.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
unsigned long *pgt, int *dat_protection,
@@ -2021,7 +2022,7 @@
* shadow table structure is incomplete, -ENOMEM if out of memory,
* -EFAULT if an address in the parent gmap could not be resolved and
*
- * Called with gmap->mm->mmap_sem in read
+ * Called with gmap->mm->mmap_lock in read
*/
int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
int fake)
@@ -2100,7 +2101,7 @@
* shadow table structure is incomplete, -ENOMEM if out of memory and
* -EFAULT if an address in the parent gmap could not be resolved.
*
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
*/
int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
{
@@ -2556,16 +2557,33 @@
/* Fail if the page tables are 2K */
if (!mm_alloc_pgste(mm))
return -EINVAL;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
+int gmap_mark_unmergeable(void)
+{
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int ret;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ MADV_UNMERGEABLE, &vma->vm_flags);
+ if (ret)
+ return ret;
+ }
+ mm->def_flags &= ~VM_MERGEABLE;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
@@ -2611,28 +2629,22 @@
int s390_enable_skey(void)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
int rc = 0;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
if (mm_uses_skeys(mm))
goto out_up;
mm->context.uses_skeys = 1;
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
- MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.uses_skeys = 0;
- rc = -ENOMEM;
- goto out_up;
- }
+ rc = gmap_mark_unmergeable();
+ if (rc) {
+ mm->context.uses_skeys = 0;
+ goto out_up;
}
- mm->def_flags &= ~VM_MERGEABLE;
-
walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
out_up:
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
return rc;
}
EXPORT_SYMBOL_GPL(s390_enable_skey);
@@ -2653,8 +2665,45 @@
void s390_reset_cmma(struct mm_struct *mm)
{
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
- up_write(&mm->mmap_sem);
+ mmap_write_unlock(mm);
}
EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+/*
+ * make inaccessible pages accessible again
+ */
+static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t pte = READ_ONCE(*ptep);
+
+ if (pte_present(pte))
+ WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK));
+ return 0;
+}
+
+static const struct mm_walk_ops reset_acc_walk_ops = {
+ .pte_entry = __s390_reset_acc,
+};
+
+#include <linux/sched/mm.h>
+void s390_reset_acc(struct mm_struct *mm)
+{
+ if (!mm_is_protected(mm))
+ return;
+ /*
+ * we might be called during
+ * reset: we walk the pages and clear
+ * close of all kvm file descriptors: we walk the pages and clear
+ * exit of process on fd closure: vma already gone, do nothing
+ */
+ if (!mmget_not_zero(mm))
+ return;
+ mmap_read_lock(mm);
+ walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
+ mmap_read_unlock(mm);
+ mmput(mm);
+}
+EXPORT_SYMBOL_GPL(s390_reset_acc);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index ff8234b..3b5a4d2 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -254,25 +254,15 @@
return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
}
-static __init int setup_hugepagesz(char *opt)
+bool __init arch_hugetlb_valid_size(unsigned long size)
{
- unsigned long size;
- char *string = opt;
-
- size = memparse(opt, &opt);
- if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
- hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
- } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
- hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
- } else {
- hugetlb_bad_size();
- pr_err("hugepagesz= specifies an unsupported page size %s\n",
- string);
- return 0;
- }
- return 1;
+ if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+ return true;
+ else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+ return true;
+ else
+ return false;
}
-__setup("hugepagesz=", setup_hugepagesz);
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long addr, unsigned long len,
@@ -329,7 +319,6 @@
struct hstate *h = hstate_file(file);
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- int rc;
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -356,15 +345,9 @@
else
addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
pgoff, flags);
- if (addr & ~PAGE_MASK)
+ if (offset_in_page(addr))
return addr;
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit &&
- addr + len <= TASK_SIZE) {
- rc = crst_table_upgrade(mm, addr + len);
- if (rc)
- return (unsigned long) rc;
- }
- return addr;
+ return check_asce_limit(mm, addr, len);
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 5521f59..9d5960b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -33,8 +33,8 @@
#include <linux/dma-direct.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
+#include <asm/ptdump.h>
#include <asm/dma.h>
#include <asm/lowcore.h>
#include <asm/tlb.h>
@@ -46,8 +46,9 @@
#include <asm/kasan.h>
#include <asm/dma-mapping.h>
#include <asm/uv.h>
+#include <linux/virtio_config.h>
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
@@ -116,12 +117,12 @@
__load_psw_mask(psw.mask);
kasan_free_early_identity();
- sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+ zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
- free_area_init_nodes(max_zone_pfns);
+ free_area_init(max_zone_pfns);
}
void mark_rodata_ro(void)
@@ -130,6 +131,7 @@
set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
+ debug_checkwx();
}
int set_memory_encrypted(unsigned long addr, int numpages)
@@ -161,6 +163,16 @@
return is_prot_virt_guest();
}
+#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
+
+int arch_has_restricted_virtio_memory_access(void)
+{
+ return is_prot_virt_guest();
+}
+EXPORT_SYMBOL(arch_has_restricted_virtio_memory_access);
+
+#endif
+
/* protected virtualization */
static void pv_init(void)
{
@@ -267,20 +279,23 @@
#endif /* CONFIG_CMA */
int arch_add_memory(int nid, u64 start, u64 size,
- struct mhp_restrictions *restrictions)
+ struct mhp_params *params)
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
int rc;
- if (WARN_ON_ONCE(restrictions->altmap))
+ if (WARN_ON_ONCE(params->altmap))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
return -EINVAL;
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
- rc = __add_pages(nid, start_pfn, size_pages, restrictions);
+ rc = __add_pages(nid, start_pfn, size_pages, params);
if (rc)
vmem_remove_mapping(start, size);
return rc;
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 5182e08..e9a9b7b 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -2,8 +2,8 @@
#include <linux/kasan.h>
#include <linux/sched/task.h>
#include <linux/memblock.h>
+#include <linux/pgtable.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/kasan.h>
#include <asm/mem_detect.h>
#include <asm/processor.h>
@@ -11,7 +11,9 @@
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/uv.h>
+unsigned long kasan_vmax;
static unsigned long segment_pos __initdata;
static unsigned long segment_low __initdata;
static unsigned long pgalloc_pos __initdata;
@@ -82,7 +84,8 @@
enum populate_mode {
POPULATE_ONE2ONE,
POPULATE_MAP,
- POPULATE_ZERO_SHADOW
+ POPULATE_ZERO_SHADOW,
+ POPULATE_SHALLOW
};
static void __init kasan_early_vmemmap_populate(unsigned long address,
unsigned long end,
@@ -98,8 +101,12 @@
pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
if (!has_nx)
pgt_prot_zero &= ~_PAGE_NOEXEC;
- pgt_prot = pgprot_val(PAGE_KERNEL_EXEC);
- sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC);
+ pgt_prot = pgprot_val(PAGE_KERNEL);
+ sgt_prot = pgprot_val(SEGMENT_KERNEL);
+ if (!has_nx || mode == POPULATE_ONE2ONE) {
+ pgt_prot &= ~_PAGE_NOEXEC;
+ sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+ }
/*
* The first 1MB of 1:1 mapping is mapped with 4KB pages
@@ -119,6 +126,12 @@
pgd_populate(&init_mm, pg_dir, p4_dir);
}
+ if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+ mode == POPULATE_SHALLOW) {
+ address = (address + P4D_SIZE) & P4D_MASK;
+ continue;
+ }
+
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
if (mode == POPULATE_ZERO_SHADOW &&
@@ -133,6 +146,12 @@
p4d_populate(&init_mm, p4_dir, pu_dir);
}
+ if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+ mode == POPULATE_SHALLOW) {
+ address = (address + PUD_SIZE) & PUD_MASK;
+ continue;
+ }
+
pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
if (mode == POPULATE_ZERO_SHADOW &&
@@ -194,6 +213,9 @@
page = kasan_early_shadow_page;
pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
break;
+ case POPULATE_SHALLOW:
+ /* should never happen */
+ break;
}
}
address += PAGE_SIZE;
@@ -235,14 +257,31 @@
}
}
+static bool __init has_uv_sec_stor_limit(void)
+{
+ /*
+ * keep these conditions in line with setup_uv()
+ */
+ if (!is_prot_virt_host())
+ return false;
+
+ if (is_prot_virt_guest())
+ return false;
+
+ if (!test_facility(158))
+ return false;
+
+ return !!uv_info.max_sec_stor_addr;
+}
+
void __init kasan_early_init(void)
{
unsigned long untracked_mem_end;
unsigned long shadow_alloc_size;
+ unsigned long vmax_unlimited;
unsigned long initrd_end;
unsigned long asce_type;
unsigned long memsize;
- unsigned long vmax;
unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
pte_t pte_z;
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
@@ -270,7 +309,9 @@
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
crst_table_init((unsigned long *)early_pg_dir,
_REGION2_ENTRY_EMPTY);
- untracked_mem_end = vmax = _REGION1_SIZE;
+ untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE;
+ if (has_uv_sec_stor_limit())
+ kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr);
asce_type = _ASCE_TYPE_REGION2;
} else {
/* 3 level paging */
@@ -278,7 +319,7 @@
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
crst_table_init((unsigned long *)early_pg_dir,
_REGION3_ENTRY_EMPTY);
- untracked_mem_end = vmax = _REGION2_SIZE;
+ untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE;
asce_type = _ASCE_TYPE_REGION3;
}
@@ -312,25 +353,56 @@
init_mm.pgd = early_pg_dir;
/*
* Current memory layout:
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... |/ | kasan |
- * +- shadow start --+ | zero |
- * | 1/8 addr space | | page |
- * +- shadow end -+ | mapping |
- * | ... gap ... |\ | (untracked) |
- * +- modules vaddr -+ \ +----------------+
- * | 2Gb | \| unmapped | allocated per module
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * | 1:1 ram mapping | /| 1/8 ram |
+ * | | / | |
+ * +- end of ram ----+ / +----------------+
+ * | ... gap ... | / | |
+ * | |/ | kasan |
+ * +- shadow start --+ | zero |
+ * | 1/8 addr space | | page |
+ * +- shadow end -+ | mapping |
+ * | ... gap ... |\ | (untracked) |
+ * +- vmalloc area -+ \ | |
+ * | vmalloc_size | \ | |
+ * +- modules vaddr -+ \ +----------------+
+ * | 2Gb | \| unmapped | allocated per module
+ * +-----------------+ +- shadow end ---+
+ *
+ * Current memory layout (KASAN_VMALLOC):
+ * +- 0 -------------+ +- shadow start -+
+ * | 1:1 ram mapping | /| 1/8 ram |
+ * | | / | |
+ * +- end of ram ----+ / +----------------+
+ * | ... gap ... | / | kasan |
+ * | |/ | zero |
+ * +- shadow start --+ | page |
+ * | 1/8 addr space | | mapping |
+ * +- shadow end -+ | (untracked) |
+ * | ... gap ... |\ | |
+ * +- vmalloc area -+ \ +- vmalloc area -+
+ * | vmalloc_size | \ |shallow populate|
+ * +- modules vaddr -+ \ +- modules area -+
+ * | 2Gb | \|shallow populate|
+ * +-----------------+ +- shadow end ---+
*/
/* populate kasan shadow (for identity mapping and zero page mapping) */
kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
if (IS_ENABLED(CONFIG_MODULES))
- untracked_mem_end = vmax - MODULES_LEN;
+ untracked_mem_end = kasan_vmax - MODULES_LEN;
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN;
+ /* shallowly populate kasan shadow for vmalloc and modules */
+ kasan_early_vmemmap_populate(__sha(untracked_mem_end),
+ __sha(kasan_vmax), POPULATE_SHALLOW);
+ }
+ /* populate kasan shadow for untracked memory */
kasan_early_vmemmap_populate(__sha(max_physmem_end),
__sha(untracked_mem_end),
POPULATE_ZERO_SHADOW);
+ kasan_early_vmemmap_populate(__sha(kasan_vmax),
+ __sha(vmax_unlimited),
+ POPULATE_ZERO_SHADOW);
/* memory allocated for identity mapping structs will be freed later */
pgalloc_freeable = pgalloc_pos;
/* populate identity mapping */
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index cbc718b..e54f928 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -17,7 +17,6 @@
#include <linux/random.h>
#include <linux/compat.h>
#include <linux/security.h>
-#include <asm/pgalloc.h>
#include <asm/elf.h>
static unsigned long stack_maxrandom_size(void)
@@ -72,14 +71,13 @@
return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
- unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
- int rc;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -105,30 +103,20 @@
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
- if (addr & ~PAGE_MASK)
+ if (offset_in_page(addr))
return addr;
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit &&
- addr + len <= TASK_SIZE) {
- rc = crst_table_upgrade(mm, addr + len);
- if (rc)
- return (unsigned long) rc;
- }
-
- return addr;
+ return check_asce_limit(mm, addr, len);
}
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
- const unsigned long len, const unsigned long pgoff,
- const unsigned long flags)
+unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- int rc;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -163,25 +151,18 @@
* can happen with large stack limits and large mmap()
* allocations.
*/
- if (addr & ~PAGE_MASK) {
+ if (offset_in_page(addr)) {
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = TASK_UNMAPPED_BASE;
info.high_limit = TASK_SIZE;
addr = vm_unmapped_area(&info);
- if (addr & ~PAGE_MASK)
+ if (offset_in_page(addr))
return addr;
}
check_asce_limit:
- if (addr + len > current->mm->context.asce_limit &&
- addr + len <= TASK_SIZE) {
- rc = crst_table_upgrade(mm, addr + len);
- if (rc)
- return (unsigned long) rc;
- }
-
- return addr;
+ return check_asce_limit(mm, addr, len);
}
/*
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index fc14189..567c69f 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -183,9 +183,9 @@
void __init cmma_init_nodat(void)
{
- struct memblock_region *reg;
struct page *page;
unsigned long start, end, ix;
+ int i;
if (cmma_flag < 2)
return;
@@ -193,9 +193,7 @@
mark_kernel_pgd();
/* Set all kernel pages not used for page tables to stable/no-dat */
- for_each_memblock(memory, reg) {
- start = memblock_region_memory_base_pfn(reg);
- end = memblock_region_memory_end_pfn(reg);
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
page = pfn_to_page(start);
for (ix = start; ix < end; ix++, page++) {
if (__test_and_clear_bit(PG_arch_1, &page->flags))
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index f8c6faa..ed8e5b3 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,7 +7,6 @@
#include <linux/mm.h>
#include <asm/cacheflush.h>
#include <asm/facility.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/set_memory.h>
@@ -86,7 +85,7 @@
{
pte_t *ptep, new;
- ptep = pte_offset(pmdp, addr);
+ ptep = pte_offset_kernel(pmdp, addr);
do {
new = *ptep;
if (pte_none(new))
@@ -279,7 +278,7 @@
return rc;
}
-static DEFINE_MUTEX(cpa_mutex);
+DEFINE_MUTEX(cpa_mutex);
static int change_page_attr(unsigned long addr, unsigned long end,
unsigned long flags)
@@ -338,19 +337,11 @@
{
unsigned long address;
int nr, i, j;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
pte_t *pte;
for (i = 0; i < numpages;) {
address = page_to_phys(page + i);
- pgd = pgd_offset_k(address);
- p4d = p4d_offset(pgd, address);
- pud = pud_offset(p4d, address);
- pmd = pmd_offset(pud, address);
- pte = pte_offset_kernel(pmd, address);
+ pte = virt_to_kpte(address);
nr = (unsigned long)pte >> ilog2(sizeof(long));
nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
nr = min(numpages - i, nr);
@@ -367,20 +358,4 @@
}
}
-#ifdef CONFIG_HIBERNATION
-bool kernel_page_present(struct page *page)
-{
- unsigned long addr;
- int cc;
-
- addr = page_to_phys(page);
- asm volatile(
- " lra %1,0(%1)\n"
- " ipm %0\n"
- " srl %0,28"
- : "=d" (cc), "+a" (addr) : : "cc");
- return cc == 0;
-}
-#endif /* CONFIG_HIBERNATION */
-
#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 46071be..6d99b1b 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -89,67 +89,65 @@
int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
{
- unsigned long *table, *pgd;
- int rc, notify;
+ unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
+ unsigned long asce_limit = mm->context.asce_limit;
/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
- VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
- rc = 0;
- notify = 0;
- while (mm->context.asce_limit < end) {
- table = crst_table_alloc(mm);
- if (!table) {
- rc = -ENOMEM;
- break;
- }
- spin_lock_bh(&mm->page_table_lock);
- pgd = (unsigned long *) mm->pgd;
- if (mm->context.asce_limit == _REGION2_SIZE) {
- crst_table_init(table, _REGION2_ENTRY_EMPTY);
- p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = _REGION1_SIZE;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
- mm_inc_nr_puds(mm);
- } else {
- crst_table_init(table, _REGION1_ENTRY_EMPTY);
- pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
- mm->pgd = (pgd_t *) table;
- mm->context.asce_limit = -PAGE_SIZE;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
- }
- notify = 1;
- spin_unlock_bh(&mm->page_table_lock);
+ VM_BUG_ON(asce_limit < _REGION2_SIZE);
+
+ if (end <= asce_limit)
+ return 0;
+
+ if (asce_limit == _REGION2_SIZE) {
+ p4d = crst_table_alloc(mm);
+ if (unlikely(!p4d))
+ goto err_p4d;
+ crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
}
- if (notify)
- on_each_cpu(__crst_table_upgrade, mm, 0);
- return rc;
-}
-
-void crst_table_downgrade(struct mm_struct *mm)
-{
- pgd_t *pgd;
-
- /* downgrade should only happen from 3 to 2 levels (compat only) */
- VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
-
- if (current->active_mm == mm) {
- clear_user_asce();
- __tlb_flush_mm(mm);
+ if (end > _REGION1_SIZE) {
+ pgd = crst_table_alloc(mm);
+ if (unlikely(!pgd))
+ goto err_pgd;
+ crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
}
- pgd = mm->pgd;
- mm_dec_nr_pmds(mm);
- mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
- mm->context.asce_limit = _REGION3_SIZE;
- mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
- crst_table_free(mm, (unsigned long *) pgd);
+ spin_lock_bh(&mm->page_table_lock);
- if (current->active_mm == mm)
- set_user_asce(mm);
+ /*
+ * This routine gets called with mmap_lock lock held and there is
+ * no reason to optimize for the case of otherwise. However, if
+ * that would ever change, the below check will let us know.
+ */
+ VM_BUG_ON(asce_limit != mm->context.asce_limit);
+
+ if (p4d) {
+ __pgd = (unsigned long *) mm->pgd;
+ p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
+ mm->pgd = (pgd_t *) p4d;
+ mm->context.asce_limit = _REGION1_SIZE;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ mm_inc_nr_puds(mm);
+ }
+ if (pgd) {
+ __pgd = (unsigned long *) mm->pgd;
+ pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
+ mm->pgd = (pgd_t *) pgd;
+ mm->context.asce_limit = TASK_SIZE_MAX;
+ mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+ }
+
+ spin_unlock_bh(&mm->page_table_lock);
+
+ on_each_cpu(__crst_table_upgrade, mm, 0);
+
+ return 0;
+
+err_pgd:
+ crst_table_free(mm, p4d);
+err_p4d:
+ return -ENOMEM;
}
static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
@@ -255,13 +253,15 @@
/* Free 2K page table fragment of a 4K page */
bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
+ mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
mask >>= 24;
if (mask & 3)
list_add(&page->lru, &mm->context.pgtable_list);
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.lock);
+ mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
+ mask >>= 24;
if (mask != 0)
return;
} else {
@@ -316,7 +316,7 @@
mask >>= 24;
if (mask != 0)
break;
- /* fallthrough */
+ fallthrough;
case 3: /* 4K page table with pgstes */
if (mask & 3)
atomic_xor_bits(&page->_refcount, 3 << 24);
@@ -541,7 +541,7 @@
base_region2_walk(table, 0, _REGION1_SIZE, 0);
break;
case _ASCE_TYPE_REGION1:
- base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+ base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
break;
}
base_crst_free(table);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9ebd012..fabaedd 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -19,13 +19,31 @@
#include <linux/ksm.h>
#include <linux/mman.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/page-states.h>
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+ /*
+ * mio_wb_bit_mask may be set on a different CPU, but it is only set
+ * once at init and only read afterwards.
+ */
+ return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writecombine);
+
+pgprot_t pgprot_writethrough(pgprot_t prot)
+{
+ /*
+ * mio_wb_bit_mask may be set on a different CPU, but it is only set
+ * once at init and only read afterwards.
+ */
+ return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writethrough);
+
static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, int nodat)
{
@@ -970,6 +988,7 @@
int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
unsigned long *oldpte, unsigned long *oldpgste)
{
+ struct vm_area_struct *vma;
unsigned long pgstev;
spinlock_t *ptl;
pgste_t pgste;
@@ -979,6 +998,10 @@
WARN_ON_ONCE(orc > ESSA_MAX);
if (unlikely(orc > ESSA_MAX))
return -EINVAL;
+
+ vma = find_vma(mm, hva);
+ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+ return -EFAULT;
ptep = get_locked_pte(mm, hva, &ptl);
if (unlikely(!ptep))
return -EFAULT;
@@ -1071,10 +1094,14 @@
int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
unsigned long bits, unsigned long value)
{
+ struct vm_area_struct *vma;
spinlock_t *ptl;
pgste_t new;
pte_t *ptep;
+ vma = find_vma(mm, hva);
+ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+ return -EFAULT;
ptep = get_locked_pte(mm, hva, &ptl);
if (unlikely(!ptep))
return -EFAULT;
@@ -1099,9 +1126,13 @@
*/
int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
{
+ struct vm_area_struct *vma;
spinlock_t *ptl;
pte_t *ptep;
+ vma = find_vma(mm, hva);
+ if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+ return -EFAULT;
ptep = get_locked_pte(mm, hva, &ptl);
if (unlikely(!ptep))
return -EFAULT;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index f810930..b239f2b 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -13,7 +13,6 @@
#include <linux/slab.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
@@ -21,14 +20,6 @@
static DEFINE_MUTEX(vmem_mutex);
-struct memory_segment {
- struct list_head list;
- unsigned long start;
- unsigned long size;
-};
-
-static LIST_HEAD(mem_segs);
-
static void __ref *vmem_alloc_pages(unsigned int order)
{
unsigned long size = PAGE_SIZE << order;
@@ -38,6 +29,15 @@
return (void *) memblock_phys_alloc(size, size);
}
+static void vmem_free_pages(unsigned long addr, int order)
+{
+ /* We don't expect boot memory to be removed ever. */
+ if (!slab_is_available() ||
+ WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+ return;
+ free_pages(addr, order);
+}
+
void *vmem_crst_alloc(unsigned long val)
{
unsigned long *table;
@@ -63,332 +63,487 @@
return pte;
}
-/*
- * Add a physical memory range to the 1:1 mapping.
- */
-static int vmem_add_mem(unsigned long start, unsigned long size)
+static void vmem_pte_free(unsigned long *table)
{
- unsigned long pgt_prot, sgt_prot, r3_prot;
- unsigned long pages4k, pages1m, pages2g;
- unsigned long end = start + size;
- unsigned long address = start;
- pgd_t *pg_dir;
- p4d_t *p4_dir;
- pud_t *pu_dir;
- pmd_t *pm_dir;
- pte_t *pt_dir;
- int ret = -ENOMEM;
+ /* We don't expect boot memory to be removed ever. */
+ if (!slab_is_available() ||
+ WARN_ON_ONCE(PageReserved(virt_to_page(table))))
+ return;
+ page_table_free(&init_mm, table);
+}
- pgt_prot = pgprot_val(PAGE_KERNEL);
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
- r3_prot = pgprot_val(REGION3_KERNEL);
- if (!MACHINE_HAS_NX) {
- pgt_prot &= ~_PAGE_NOEXEC;
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
- r3_prot &= ~_REGION_ENTRY_NOEXEC;
+#define PAGE_UNUSED 0xFD
+
+/*
+ * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
+ * from unused_pmd_start to next PMD_SIZE boundary.
+ */
+static unsigned long unused_pmd_start;
+
+static void vmemmap_flush_unused_pmd(void)
+{
+ if (!unused_pmd_start)
+ return;
+ memset(__va(unused_pmd_start), PAGE_UNUSED,
+ ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
+ unused_pmd_start = 0;
+}
+
+static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+ /*
+ * As we expect to add in the same granularity as we remove, it's
+ * sufficient to mark only some piece used to block the memmap page from
+ * getting removed (just in case the memmap never gets initialized,
+ * e.g., because the memory block never gets onlined).
+ */
+ memset(__va(start), 0, sizeof(struct page));
+}
+
+static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+ /*
+ * We only optimize if the new used range directly follows the
+ * previously unused range (esp., when populating consecutive sections).
+ */
+ if (unused_pmd_start == start) {
+ unused_pmd_start = end;
+ if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
+ unused_pmd_start = 0;
+ return;
}
- pages4k = pages1m = pages2g = 0;
- while (address < end) {
- pg_dir = pgd_offset_k(address);
- if (pgd_none(*pg_dir)) {
- p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
- if (!p4_dir)
- goto out;
- pgd_populate(&init_mm, pg_dir, p4_dir);
- }
- p4_dir = p4d_offset(pg_dir, address);
- if (p4d_none(*p4_dir)) {
- pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
- if (!pu_dir)
- goto out;
- p4d_populate(&init_mm, p4_dir, pu_dir);
- }
- pu_dir = pud_offset(p4_dir, address);
- if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
- !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
- !debug_pagealloc_enabled()) {
- pud_val(*pu_dir) = address | r3_prot;
- address += PUD_SIZE;
- pages2g++;
- continue;
- }
- if (pud_none(*pu_dir)) {
- pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
- if (!pm_dir)
- goto out;
- pud_populate(&init_mm, pu_dir, pm_dir);
- }
- pm_dir = pmd_offset(pu_dir, address);
- if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
- !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
- !debug_pagealloc_enabled()) {
- pmd_val(*pm_dir) = address | sgt_prot;
- address += PMD_SIZE;
- pages1m++;
- continue;
- }
- if (pmd_none(*pm_dir)) {
- pt_dir = vmem_pte_alloc();
- if (!pt_dir)
- goto out;
- pmd_populate(&init_mm, pm_dir, pt_dir);
- }
+ vmemmap_flush_unused_pmd();
+ __vmemmap_use_sub_pmd(start, end);
+}
- pt_dir = pte_offset_kernel(pm_dir, address);
- pte_val(*pt_dir) = address | pgt_prot;
- address += PAGE_SIZE;
- pages4k++;
+static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
+{
+ void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+ vmemmap_flush_unused_pmd();
+
+ /* Could be our memmap page is filled with PAGE_UNUSED already ... */
+ __vmemmap_use_sub_pmd(start, end);
+
+ /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
+ if (!IS_ALIGNED(start, PMD_SIZE))
+ memset(page, PAGE_UNUSED, start - __pa(page));
+ /*
+ * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
+ * consecutive sections. Remember for the last added PMD the last
+ * unused range in the populated PMD.
+ */
+ if (!IS_ALIGNED(end, PMD_SIZE))
+ unused_pmd_start = end;
+}
+
+/* Returns true if the PMD is completely unused and can be freed. */
+static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
+{
+ void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+ vmemmap_flush_unused_pmd();
+ memset(__va(start), PAGE_UNUSED, end - start);
+ return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
+ unsigned long end, bool add, bool direct)
+{
+ unsigned long prot, pages = 0;
+ int ret = -ENOMEM;
+ pte_t *pte;
+
+ prot = pgprot_val(PAGE_KERNEL);
+ if (!MACHINE_HAS_NX)
+ prot &= ~_PAGE_NOEXEC;
+
+ pte = pte_offset_kernel(pmd, addr);
+ for (; addr < end; addr += PAGE_SIZE, pte++) {
+ if (!add) {
+ if (pte_none(*pte))
+ continue;
+ if (!direct)
+ vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
+ pte_clear(&init_mm, addr, pte);
+ } else if (pte_none(*pte)) {
+ if (!direct) {
+ void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+
+ if (!new_page)
+ goto out;
+ pte_val(*pte) = __pa(new_page) | prot;
+ } else {
+ pte_val(*pte) = addr | prot;
+ }
+ } else {
+ continue;
+ }
+ pages++;
}
ret = 0;
out:
- update_page_count(PG_DIRECT_MAP_4K, pages4k);
- update_page_count(PG_DIRECT_MAP_1M, pages1m);
- update_page_count(PG_DIRECT_MAP_2G, pages2g);
+ if (direct)
+ update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
return ret;
}
+static void try_free_pte_table(pmd_t *pmd, unsigned long start)
+{
+ pte_t *pte;
+ int i;
+
+ /* We can safely assume this is fully in 1:1 mapping & vmemmap area */
+ pte = pte_offset_kernel(pmd, start);
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ if (!pte_none(*pte))
+ return;
+ }
+ vmem_pte_free(__va(pmd_deref(*pmd)));
+ pmd_clear(pmd);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
+ unsigned long end, bool add, bool direct)
+{
+ unsigned long next, prot, pages = 0;
+ int ret = -ENOMEM;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ prot = pgprot_val(SEGMENT_KERNEL);
+ if (!MACHINE_HAS_NX)
+ prot &= ~_SEGMENT_ENTRY_NOEXEC;
+
+ pmd = pmd_offset(pud, addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+ if (!add) {
+ if (pmd_none(*pmd))
+ continue;
+ if (pmd_large(*pmd) && !add) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE)) {
+ if (!direct)
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+ pmd_clear(pmd);
+ pages++;
+ } else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+ pmd_clear(pmd);
+ }
+ continue;
+ }
+ } else if (pmd_none(*pmd)) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE) &&
+ MACHINE_HAS_EDAT1 && addr && direct &&
+ !debug_pagealloc_enabled()) {
+ pmd_val(*pmd) = addr | prot;
+ pages++;
+ continue;
+ } else if (!direct && MACHINE_HAS_EDAT1) {
+ void *new_page;
+
+ /*
+ * Use 1MB frames for vmemmap if available. We
+ * always use large frames even if they are only
+ * partially used. Otherwise we would have also
+ * page tables since vmemmap_populate gets
+ * called for each section separately.
+ */
+ new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+ if (new_page) {
+ pmd_val(*pmd) = __pa(new_page) | prot;
+ if (!IS_ALIGNED(addr, PMD_SIZE) ||
+ !IS_ALIGNED(next, PMD_SIZE)) {
+ vmemmap_use_new_sub_pmd(addr, next);
+ }
+ continue;
+ }
+ }
+ pte = vmem_pte_alloc();
+ if (!pte)
+ goto out;
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (pmd_large(*pmd)) {
+ if (!direct)
+ vmemmap_use_sub_pmd(addr, next);
+ continue;
+ }
+ ret = modify_pte_table(pmd, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_pte_table(pmd, addr & PMD_MASK);
+ }
+ ret = 0;
+out:
+ if (direct)
+ update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
+ return ret;
+}
+
+static void try_free_pmd_table(pud_t *pud, unsigned long start)
+{
+ const unsigned long end = start + PUD_SIZE;
+ pmd_t *pmd;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+ pmd = pmd_offset(pud, start);
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
+ if (!pmd_none(*pmd))
+ return;
+ vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+ pud_clear(pud);
+}
+
+static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+ bool add, bool direct)
+{
+ unsigned long next, prot, pages = 0;
+ int ret = -ENOMEM;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ prot = pgprot_val(REGION3_KERNEL);
+ if (!MACHINE_HAS_NX)
+ prot &= ~_REGION_ENTRY_NOEXEC;
+ pud = pud_offset(p4d, addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+ if (!add) {
+ if (pud_none(*pud))
+ continue;
+ if (pud_large(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE)) {
+ pud_clear(pud);
+ pages++;
+ }
+ continue;
+ }
+ } else if (pud_none(*pud)) {
+ if (IS_ALIGNED(addr, PUD_SIZE) &&
+ IS_ALIGNED(next, PUD_SIZE) &&
+ MACHINE_HAS_EDAT2 && addr && direct &&
+ !debug_pagealloc_enabled()) {
+ pud_val(*pud) = addr | prot;
+ pages++;
+ continue;
+ }
+ pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!pmd)
+ goto out;
+ pud_populate(&init_mm, pud, pmd);
+ } else if (pud_large(*pud)) {
+ continue;
+ }
+ ret = modify_pmd_table(pud, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_pmd_table(pud, addr & PUD_MASK);
+ }
+ ret = 0;
+out:
+ if (direct)
+ update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
+ return ret;
+}
+
+static void try_free_pud_table(p4d_t *p4d, unsigned long start)
+{
+ const unsigned long end = start + P4D_SIZE;
+ pud_t *pud;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+
+ pud = pud_offset(p4d, start);
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ if (!pud_none(*pud))
+ return;
+ }
+ vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+ p4d_clear(p4d);
+}
+
+static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
+ bool add, bool direct)
+{
+ unsigned long next;
+ int ret = -ENOMEM;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ for (; addr < end; addr = next, p4d++) {
+ next = p4d_addr_end(addr, end);
+ if (!add) {
+ if (p4d_none(*p4d))
+ continue;
+ } else if (p4d_none(*p4d)) {
+ pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!pud)
+ goto out;
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ ret = modify_pud_table(p4d, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_pud_table(p4d, addr & P4D_MASK);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
+{
+ const unsigned long end = start + PGDIR_SIZE;
+ p4d_t *p4d;
+ int i;
+
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (end > VMALLOC_START)
+ return;
+#ifdef CONFIG_KASAN
+ if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+ return;
+#endif
+
+ p4d = p4d_offset(pgd, start);
+ for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
+ if (!p4d_none(*p4d))
+ return;
+ }
+ vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+ pgd_clear(pgd);
+}
+
+static int modify_pagetable(unsigned long start, unsigned long end, bool add,
+ bool direct)
+{
+ unsigned long addr, next;
+ int ret = -ENOMEM;
+ pgd_t *pgd;
+ p4d_t *p4d;
+
+ if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
+ return -EINVAL;
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
+ pgd = pgd_offset_k(addr);
+
+ if (!add) {
+ if (pgd_none(*pgd))
+ continue;
+ } else if (pgd_none(*pgd)) {
+ p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!p4d)
+ goto out;
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ ret = modify_p4d_table(pgd, addr, next, add, direct);
+ if (ret)
+ goto out;
+ if (!add)
+ try_free_p4d_table(pgd, addr & PGDIR_MASK);
+ }
+ ret = 0;
+out:
+ if (!add)
+ flush_tlb_kernel_range(start, end);
+ return ret;
+}
+
+static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+ return modify_pagetable(start, end, true, direct);
+}
+
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+ return modify_pagetable(start, end, false, direct);
+}
+
+/*
+ * Add a physical memory range to the 1:1 mapping.
+ */
+static int vmem_add_range(unsigned long start, unsigned long size)
+{
+ return add_pagetable(start, start + size, true);
+}
+
/*
* Remove a physical memory range from the 1:1 mapping.
- * Currently only invalidates page table entries.
*/
static void vmem_remove_range(unsigned long start, unsigned long size)
{
- unsigned long pages4k, pages1m, pages2g;
- unsigned long end = start + size;
- unsigned long address = start;
- pgd_t *pg_dir;
- p4d_t *p4_dir;
- pud_t *pu_dir;
- pmd_t *pm_dir;
- pte_t *pt_dir;
-
- pages4k = pages1m = pages2g = 0;
- while (address < end) {
- pg_dir = pgd_offset_k(address);
- if (pgd_none(*pg_dir)) {
- address += PGDIR_SIZE;
- continue;
- }
- p4_dir = p4d_offset(pg_dir, address);
- if (p4d_none(*p4_dir)) {
- address += P4D_SIZE;
- continue;
- }
- pu_dir = pud_offset(p4_dir, address);
- if (pud_none(*pu_dir)) {
- address += PUD_SIZE;
- continue;
- }
- if (pud_large(*pu_dir)) {
- pud_clear(pu_dir);
- address += PUD_SIZE;
- pages2g++;
- continue;
- }
- pm_dir = pmd_offset(pu_dir, address);
- if (pmd_none(*pm_dir)) {
- address += PMD_SIZE;
- continue;
- }
- if (pmd_large(*pm_dir)) {
- pmd_clear(pm_dir);
- address += PMD_SIZE;
- pages1m++;
- continue;
- }
- pt_dir = pte_offset_kernel(pm_dir, address);
- pte_clear(&init_mm, address, pt_dir);
- address += PAGE_SIZE;
- pages4k++;
- }
- flush_tlb_kernel_range(start, end);
- update_page_count(PG_DIRECT_MAP_4K, -pages4k);
- update_page_count(PG_DIRECT_MAP_1M, -pages1m);
- update_page_count(PG_DIRECT_MAP_2G, -pages2g);
+ remove_pagetable(start, start + size, true);
}
/*
* Add a backed mem_map array to the virtual mem_map array.
*/
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
- struct vmem_altmap *altmap)
+ struct vmem_altmap *altmap)
{
- unsigned long pgt_prot, sgt_prot;
- unsigned long address = start;
- pgd_t *pg_dir;
- p4d_t *p4_dir;
- pud_t *pu_dir;
- pmd_t *pm_dir;
- pte_t *pt_dir;
- int ret = -ENOMEM;
-
- pgt_prot = pgprot_val(PAGE_KERNEL);
- sgt_prot = pgprot_val(SEGMENT_KERNEL);
- if (!MACHINE_HAS_NX) {
- pgt_prot &= ~_PAGE_NOEXEC;
- sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
- }
- for (address = start; address < end;) {
- pg_dir = pgd_offset_k(address);
- if (pgd_none(*pg_dir)) {
- p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
- if (!p4_dir)
- goto out;
- pgd_populate(&init_mm, pg_dir, p4_dir);
- }
-
- p4_dir = p4d_offset(pg_dir, address);
- if (p4d_none(*p4_dir)) {
- pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
- if (!pu_dir)
- goto out;
- p4d_populate(&init_mm, p4_dir, pu_dir);
- }
-
- pu_dir = pud_offset(p4_dir, address);
- if (pud_none(*pu_dir)) {
- pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
- if (!pm_dir)
- goto out;
- pud_populate(&init_mm, pu_dir, pm_dir);
- }
-
- pm_dir = pmd_offset(pu_dir, address);
- if (pmd_none(*pm_dir)) {
- /* Use 1MB frames for vmemmap if available. We always
- * use large frames even if they are only partially
- * used.
- * Otherwise we would have also page tables since
- * vmemmap_populate gets called for each section
- * separately. */
- if (MACHINE_HAS_EDAT1) {
- void *new_page;
-
- new_page = vmemmap_alloc_block(PMD_SIZE, node);
- if (!new_page)
- goto out;
- pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- pt_dir = vmem_pte_alloc();
- if (!pt_dir)
- goto out;
- pmd_populate(&init_mm, pm_dir, pt_dir);
- } else if (pmd_large(*pm_dir)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
-
- pt_dir = pte_offset_kernel(pm_dir, address);
- if (pte_none(*pt_dir)) {
- void *new_page;
-
- new_page = vmemmap_alloc_block(PAGE_SIZE, node);
- if (!new_page)
- goto out;
- pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
- }
- address += PAGE_SIZE;
- }
- ret = 0;
-out:
- return ret;
-}
-
-void vmemmap_free(unsigned long start, unsigned long end,
- struct vmem_altmap *altmap)
-{
-}
-
-/*
- * Add memory segment to the segment list if it doesn't overlap with
- * an already present segment.
- */
-static int insert_memory_segment(struct memory_segment *seg)
-{
- struct memory_segment *tmp;
-
- if (seg->start + seg->size > VMEM_MAX_PHYS ||
- seg->start + seg->size < seg->start)
- return -ERANGE;
-
- list_for_each_entry(tmp, &mem_segs, list) {
- if (seg->start >= tmp->start + tmp->size)
- continue;
- if (seg->start + seg->size <= tmp->start)
- continue;
- return -ENOSPC;
- }
- list_add(&seg->list, &mem_segs);
- return 0;
-}
-
-/*
- * Remove memory segment from the segment list.
- */
-static void remove_memory_segment(struct memory_segment *seg)
-{
- list_del(&seg->list);
-}
-
-static void __remove_shared_memory(struct memory_segment *seg)
-{
- remove_memory_segment(seg);
- vmem_remove_range(seg->start, seg->size);
-}
-
-int vmem_remove_mapping(unsigned long start, unsigned long size)
-{
- struct memory_segment *seg;
int ret;
mutex_lock(&vmem_mutex);
-
- ret = -ENOENT;
- list_for_each_entry(seg, &mem_segs, list) {
- if (seg->start == start && seg->size == size)
- break;
- }
-
- if (seg->start != start || seg->size != size)
- goto out;
-
- ret = 0;
- __remove_shared_memory(seg);
- kfree(seg);
-out:
+ /* We don't care about the node, just use NUMA_NO_NODE on allocations */
+ ret = add_pagetable(start, end, false);
+ if (ret)
+ remove_pagetable(start, end, false);
mutex_unlock(&vmem_mutex);
return ret;
}
+void vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+ mutex_lock(&vmem_mutex);
+ remove_pagetable(start, end, false);
+ mutex_unlock(&vmem_mutex);
+}
+
+void vmem_remove_mapping(unsigned long start, unsigned long size)
+{
+ mutex_lock(&vmem_mutex);
+ vmem_remove_range(start, size);
+ mutex_unlock(&vmem_mutex);
+}
+
int vmem_add_mapping(unsigned long start, unsigned long size)
{
- struct memory_segment *seg;
int ret;
+ if (start + size > VMEM_MAX_PHYS ||
+ start + size < start)
+ return -ERANGE;
+
mutex_lock(&vmem_mutex);
- ret = -ENOMEM;
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
- if (!seg)
- goto out;
- seg->start = start;
- seg->size = size;
-
- ret = insert_memory_segment(seg);
+ ret = vmem_add_range(start, size);
if (ret)
- goto out_free;
-
- ret = vmem_add_mem(start, size);
- if (ret)
- goto out_remove;
- goto out;
-
-out_remove:
- __remove_shared_memory(seg);
-out_free:
- kfree(seg);
-out:
+ vmem_remove_range(start, size);
mutex_unlock(&vmem_mutex);
return ret;
}
@@ -400,10 +555,11 @@
*/
void __init vmem_map_init(void)
{
- struct memblock_region *reg;
+ phys_addr_t base, end;
+ u64 i;
- for_each_memblock(memory, reg)
- vmem_add_mem(reg->base, reg->size);
+ for_each_mem_range(i, &base, &end)
+ vmem_add_range(base, end - base);
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
@@ -422,27 +578,3 @@
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
-
-/*
- * Convert memblock.memory to a memory segment list so there is a single
- * list that contains all memory segments.
- */
-static int __init vmem_convert_memory_chunk(void)
-{
- struct memblock_region *reg;
- struct memory_segment *seg;
-
- mutex_lock(&vmem_mutex);
- for_each_memblock(memory, reg) {
- seg = kzalloc(sizeof(*seg), GFP_KERNEL);
- if (!seg)
- panic("Out of memory...\n");
- seg->start = reg->base;
- seg->size = reg->size;
- insert_memory_segment(seg);
- }
- mutex_unlock(&vmem_mutex);
- return 0;
-}
-
-core_initcall(vmem_convert_memory_chunk);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 2d29966..cd0cbda 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -24,6 +24,7 @@
#include <linux/init.h>
#include <linux/bpf.h>
#include <linux/mm.h>
+#include <linux/kernel.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
#include <asm/facility.h>
@@ -39,25 +40,22 @@
int size; /* Size of program and literal pool */
int size_prg; /* Size of program */
int prg; /* Current position in program */
- int lit_start; /* Start of literal pool */
- int lit; /* Current position in literal pool */
+ int lit32_start; /* Start of 32-bit literal pool */
+ int lit32; /* Current position in 32-bit literal pool */
+ int lit64_start; /* Start of 64-bit literal pool */
+ int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
- int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
- int labels[1]; /* Labels for local jumps */
+ int excnt; /* Number of exception table entries */
};
-#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
-
-#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
-#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL (1 << 2) /* code uses literals */
-#define SEEN_FUNC (1 << 3) /* calls C functions */
-#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
-#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
+#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
+#define SEEN_LITERAL BIT(1) /* code uses literals */
+#define SEEN_FUNC BIT(2) /* calls C functions */
+#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
@@ -132,13 +130,13 @@
#define _EMIT2(op) \
({ \
if (jit->prg_buf) \
- *(u16 *) (jit->prg_buf + jit->prg) = op; \
+ *(u16 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 2; \
})
#define EMIT2(op, b1, b2) \
({ \
- _EMIT2(op | reg(b1, b2)); \
+ _EMIT2((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -146,20 +144,20 @@
#define _EMIT4(op) \
({ \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->prg) = op; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 4; \
})
#define EMIT4(op, b1, b2) \
({ \
- _EMIT4(op | reg(b1, b2)); \
+ _EMIT4((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT4_RRF(op, b1, b2, b3) \
({ \
- _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \
+ _EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
REG_SET_SEEN(b3); \
@@ -168,13 +166,13 @@
#define _EMIT4_DISP(op, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT4(op | __disp); \
+ _EMIT4((op) | __disp); \
})
#define EMIT4_DISP(op, b1, b2, disp) \
({ \
- _EMIT4_DISP(op | reg_high(b1) << 16 | \
- reg_high(b2) << 8, disp); \
+ _EMIT4_DISP((op) | reg_high(b1) << 16 | \
+ reg_high(b2) << 8, (disp)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -182,21 +180,27 @@
#define EMIT4_IMM(op, b1, imm) \
({ \
unsigned int __imm = (imm) & 0xffff; \
- _EMIT4(op | reg_high(b1) << 16 | __imm); \
+ _EMIT4((op) | reg_high(b1) << 16 | __imm); \
REG_SET_SEEN(b1); \
})
#define EMIT4_PCREL(op, pcrel) \
({ \
long __pcrel = ((pcrel) >> 1) & 0xffff; \
- _EMIT4(op | __pcrel); \
+ _EMIT4((op) | __pcrel); \
+})
+
+#define EMIT4_PCREL_RIC(op, mask, target) \
+({ \
+ int __rel = ((target) - jit->prg) / 2; \
+ _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
#define _EMIT6(op1, op2) \
({ \
if (jit->prg_buf) { \
- *(u32 *) (jit->prg_buf + jit->prg) = op1; \
- *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op1); \
+ *(u16 *) (jit->prg_buf + jit->prg + 4) = (op2); \
} \
jit->prg += 6; \
})
@@ -204,98 +208,112 @@
#define _EMIT6_DISP(op1, op2, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT6(op1 | __disp, op2); \
+ _EMIT6((op1) | __disp, op2); \
})
#define _EMIT6_DISP_LH(op1, op2, disp) \
({ \
- u32 _disp = (u32) disp; \
+ u32 _disp = (u32) (disp); \
unsigned int __disp_h = _disp & 0xff000; \
unsigned int __disp_l = _disp & 0x00fff; \
- _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
+ _EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4); \
})
#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \
({ \
- _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \
+ _EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 | \
reg_high(b3) << 8, op2, disp); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
REG_SET_SEEN(b3); \
})
-#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
+#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target) \
({ \
- int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
- op2 | mask << 12); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
+ (op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
-#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
+#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target) \
({ \
- int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
- (rel & 0xffff), op2 | (imm & 0xff) << 8); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
+ (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
- BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
+ BUILD_BUG_ON(((unsigned long) (imm)) > 0xff); \
})
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
- /* Branch instruction needs 6 bytes */ \
- int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
+ int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2; \
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT6_PCREL_RILB(op, b, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
REG_SET_SEEN(b); \
})
#define EMIT6_PCREL_RIL(op, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | rel >> 16, rel & 0xffff); \
+})
+
+#define EMIT6_PCREL_RILC(op, mask, target) \
+({ \
+ EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
})
#define _EMIT6_IMM(op, imm) \
({ \
unsigned int __imm = (imm); \
- _EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+ _EMIT6((op) | (__imm >> 16), __imm & 0xffff); \
})
#define EMIT6_IMM(op, b1, imm) \
({ \
- _EMIT6_IMM(op | reg_high(b1) << 16, imm); \
+ _EMIT6_IMM((op) | reg_high(b1) << 16, imm); \
REG_SET_SEEN(b1); \
})
+#define _EMIT_CONST_U32(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit32; \
+ if (jit->prg_buf) \
+ *(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+ jit->lit32 += 4; \
+ ret; \
+})
+
#define EMIT_CONST_U32(val) \
({ \
- unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
jit->seen |= SEEN_LITERAL; \
+ _EMIT_CONST_U32(val) - jit->base_ip; \
+})
+
+#define _EMIT_CONST_U64(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit64; \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \
- jit->lit += 4; \
+ *(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+ jit->lit64 += 8; \
ret; \
})
#define EMIT_CONST_U64(val) \
({ \
- unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
jit->seen |= SEEN_LITERAL; \
- if (jit->prg_buf) \
- *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \
- jit->lit += 8; \
- ret; \
+ _EMIT_CONST_U64(val) - jit->base_ip; \
})
#define EMIT_ZERO(b1) \
@@ -308,6 +326,67 @@
})
/*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+ return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+ return jit->prg_buf;
+}
+
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+ return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+ return is_valid_rel(off - jit->prg);
+}
+
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+ return disp >= -524288 && disp <= 524287;
+}
+
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
+/*
* Fill whole space with illegal instructions
*/
static void jit_fill_hole(void *area, unsigned int size)
@@ -384,9 +463,18 @@
*/
static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
{
-
+ const int last = 15, save_restore_size = 6;
int re = 6, rs;
+ if (is_first_pass(jit)) {
+ /*
+ * We don't know yet which registers are used. Reserve space
+ * conservatively.
+ */
+ jit->prg += (last - re + 1) * save_restore_size;
+ return;
+ }
+
do {
rs = get_start(jit, re);
if (!rs)
@@ -397,7 +485,25 @@
else
restore_regs(jit, rs, re, stack_depth);
re++;
- } while (re <= 15);
+ } while (re <= last);
+}
+
+static void bpf_skip(struct bpf_jit *jit, int size)
+{
+ if (size >= 6 && !is_valid_rel(size)) {
+ /* brcl 0xf,size */
+ EMIT6_PCREL_RIL(0xc0f4000000, size);
+ size -= 6;
+ } else if (size >= 4 && is_valid_rel(size)) {
+ /* brc 0xf,size */
+ EMIT4_PCREL(0xa7f40000, size);
+ size -= 4;
+ }
+ while (size >= 2) {
+ /* bcr 0,%0 */
+ _EMIT2(0x0700);
+ size -= 2;
+ }
}
/*
@@ -412,30 +518,39 @@
/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
} else {
- /* j tail_call_start: NOP if no tail calls are used */
- EMIT4_PCREL(0xa7f40000, 6);
- _EMIT2(0);
+ /*
+ * There are no tail calls. Insert nops in order to have
+ * tail_call_start at a predictable offset.
+ */
+ bpf_skip(jit, 6);
}
/* Tail calls have to skip above initialization */
jit->tail_call_start = jit->prg;
/* Save registers */
save_restore_regs(jit, REGS_SAVE, stack_depth);
/* Setup literal pool */
- if (jit->seen & SEEN_LITERAL) {
- /* basr %r13,0 */
- EMIT2(0x0d00, REG_L, REG_0);
- jit->base_ip = jit->prg;
+ if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
+ if (!is_first_pass(jit) &&
+ is_valid_ldisp(jit->size - (jit->prg + 2))) {
+ /* basr %l,0 */
+ EMIT2(0x0d00, REG_L, REG_0);
+ jit->base_ip = jit->prg;
+ } else {
+ /* larl %l,lit32_start */
+ EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+ jit->base_ip = jit->lit32_start;
+ }
}
/* Setup stack and backchain */
- if (jit->seen & SEEN_STACK) {
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* lgr %w1,%r15 (backchain) */
EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* stg %w1,152(%r15) (backchain) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
@@ -447,12 +562,6 @@
*/
static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
{
- /* Return 0 */
- if (jit->seen & SEEN_RET0) {
- jit->ret0_ip = jit->prg;
- /* lghi %b0,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
- }
jit->exit_ip = jit->prg;
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
@@ -477,7 +586,7 @@
_EMIT2(0x07fe);
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
- (jit->seen & SEEN_FUNC)) {
+ (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
jit->r1_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r1 thunk */
if (test_facility(35)) {
@@ -497,6 +606,84 @@
}
}
+static int get_probe_mem_regno(const u8 *insn)
+{
+ /*
+ * insn must point to llgc, llgh, llgf or lg, which have destination
+ * register at the same position.
+ */
+ if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+ return -1;
+ if (insn[5] != 0x90 && /* llgc */
+ insn[5] != 0x91 && /* llgh */
+ insn[5] != 0x16 && /* llgf */
+ insn[5] != 0x04) /* lg */
+ return -1;
+ return insn[1] >> 4;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs)
+{
+ int regno;
+ u8 *insn;
+
+ regs->psw.addr = extable_fixup(x);
+ insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16);
+ regno = get_probe_mem_regno(insn);
+ if (WARN_ON_ONCE(regno < 0))
+ /* JIT bug - unexpected instruction. */
+ return false;
+ regs->gprs[regno] = 0;
+ return true;
+}
+
+static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
+ int probe_prg, int nop_prg)
+{
+ struct exception_table_entry *ex;
+ s64 delta;
+ u8 *insn;
+ int prg;
+ int i;
+
+ if (!fp->aux->extable)
+ /* Do nothing during early JIT passes. */
+ return 0;
+ insn = jit->prg_buf + probe_prg;
+ if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0))
+ /* JIT bug - unexpected probe instruction. */
+ return -1;
+ if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+ /* JIT bug - gap between probe and nop instructions. */
+ return -1;
+ for (i = 0; i < 2; i++) {
+ if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
+ /* Verifier bug - not enough entries. */
+ return -1;
+ ex = &fp->aux->extable[jit->excnt];
+ /* Add extable entries for probe and nop instructions. */
+ prg = i == 0 ? probe_prg : nop_prg;
+ delta = jit->prg_buf + prg - (u8 *)&ex->insn;
+ if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+ /* JIT bug - code and extable must be close. */
+ return -1;
+ ex->insn = delta;
+ /*
+ * Always land on the nop. Note that extable infrastructure
+ * ignores fixup field, it is handled by ex_handler_bpf().
+ */
+ delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+ if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+ /* JIT bug - landing pad and extable must be close. */
+ return -1;
+ ex->fixup = delta;
+ ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ jit->excnt++;
+ }
+ return 0;
+}
+
/*
* Compile one eBPF instruction into s390x code
*
@@ -504,19 +691,24 @@
* stack space for the large switch statement.
*/
static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
- int i, bool extra_pass)
+ int i, bool extra_pass, u32 stack_depth)
{
struct bpf_insn *insn = &fp->insnsi[i];
- int jmp_off, last, insn_count = 1;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
+ int last, insn_count = 1;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
+ int probe_prg = -1;
unsigned int mask;
+ int nop_prg;
+ int err;
- if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
- jit->seen |= SEEN_REG_AX;
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ BPF_MODE(insn->code) == BPF_PROBE_MEM)
+ probe_prg = jit->prg;
+
switch (insn->code) {
/*
* BPF_MOV
@@ -550,9 +742,8 @@
u64 imm64;
imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
- /* lg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm64));
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
insn_count = 2;
break;
}
@@ -688,9 +879,18 @@
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
EMIT2(0x1800, REG_W1, dst_reg);
- /* dl %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
- EMIT_CONST_U32(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U32(imm));
+ } else {
+ /* lgfrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlr %w0,%dst */
+ EMIT4(0xb9970000, REG_W0, dst_reg);
+ }
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
@@ -712,9 +912,18 @@
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
- /* dlg %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, dst_reg);
+ }
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -737,9 +946,19 @@
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
- /* ng %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* ng %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0080,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ngr %dst,%w0 */
+ EMIT4(0xb9800000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_OR
@@ -759,9 +978,19 @@
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
- /* og %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* og %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0081,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ogr %dst,%w0 */
+ EMIT4(0xb9810000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_XOR
@@ -783,9 +1012,19 @@
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
- /* xg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* xg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0082,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* xgr %dst,%w0 */
+ EMIT4(0xb9820000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_LSH
@@ -995,6 +1234,7 @@
* BPF_LDX
*/
case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
/* llgc %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
@@ -1002,6 +1242,7 @@
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
/* llgh %dst,0(off,%src) */
EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
jit->seen |= SEEN_MEM;
@@ -1009,6 +1250,7 @@
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
/* llgf %dst,off(%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
@@ -1016,6 +1258,7 @@
insn_count = 2;
break;
case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
/* lg %dst,0(off,%src) */
jit->seen |= SEEN_MEM;
EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
@@ -1036,9 +1279,8 @@
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
- /* lg %w1,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
- EMIT_CONST_U64(func));
+ /* lgrl %w1,func */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
/* brasl %r14,__s390_indirect_jump_r1 */
EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
@@ -1050,7 +1292,9 @@
EMIT4(0xb9040000, BPF_REG_0, REG_2);
break;
}
- case BPF_JMP | BPF_TAIL_CALL:
+ case BPF_JMP | BPF_TAIL_CALL: {
+ int patch_1_clrj, patch_2_clij, patch_3_brc;
+
/*
* Implicit input:
* B1: pointer to ctx
@@ -1067,9 +1311,11 @@
/* llgf %w1,map.max_entries(%b2) */
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
offsetof(struct bpf_array, map.max_entries));
- /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
- EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
- REG_W1, 0, 0xa);
+ /* if ((u32)%b3 >= (u32)%w1) goto out; */
+ /* clrj %b3,%w1,0xa,out */
+ patch_1_clrj = jit->prg;
+ EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
+ jit->prg);
/*
* if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1077,16 +1323,17 @@
*/
if (jit->seen & SEEN_STACK)
- off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth;
+ off = STK_OFF_TCCNT + STK_OFF + stack_depth;
else
off = STK_OFF_TCCNT;
/* lhi %w0,1 */
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
- MAX_TAIL_CALL_CNT, 0, 0x2);
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,out */
+ patch_2_clij = jit->prg;
+ EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT,
+ 2, jit->prg);
/*
* prog = array->ptrs[index];
@@ -1098,16 +1345,17 @@
EMIT4(0xb9160000, REG_1, BPF_REG_3);
/* sllg %r1,%r1,3: %r1 *= 8 */
EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
- /* lg %r1,prog(%b2,%r1) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+ /* ltg %r1,prog(%b2,%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
REG_1, offsetof(struct bpf_array, ptrs));
- /* clgij %r1,0,0x8,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+ /* brc 0x8,out */
+ patch_3_brc = jit->prg;
+ EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
/*
* Restore registers before calling function
*/
- save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth);
+ save_restore_regs(jit, REGS_RESTORE, stack_depth);
/*
* goto *(prog->bpf_func + tail_call_start);
@@ -1119,14 +1367,26 @@
/* bc 0xf,tail_call_start(%r1) */
_EMIT4(0x47f01000 + jit->tail_call_start);
/* out: */
- jit->labels[0] = jit->prg;
+ if (jit->prg_buf) {
+ *(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
+ (jit->prg - patch_1_clrj) >> 1;
+ *(u16 *)(jit->prg_buf + patch_2_clij + 2) =
+ (jit->prg - patch_2_clij) >> 1;
+ *(u16 *)(jit->prg_buf + patch_3_brc + 2) =
+ (jit->prg - patch_3_brc) >> 1;
+ }
break;
+ }
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
- if (last && !(jit->seen & SEEN_RET0))
+ if (last)
break;
- /* j <exit> */
- EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
+ /* brc 0xf, <exit> */
+ EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
+ else
+ /* brcl 0xf, <exit> */
+ EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
break;
/*
* Branch relative (number of skipped instructions) to offset on
@@ -1259,69 +1519,175 @@
goto branch_oc;
branch_ks:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* crj or cgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, REG_W1, i, off, mask);
+ /* cfi or cgfi %dst,imm */
+ EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+ dst_reg, imm);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_ku:
- is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
/* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* clrj or clgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, REG_W1, i, off, mask);
- break;
+ src_reg = REG_1;
+ EMIT6_IMM(0xc0010000, src_reg, imm);
+ goto branch_xu;
branch_xs:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* crj or cgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* crj or cgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* cr or cgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1900, dst_reg, src_reg);
+ else
+ EMIT4(0xb9200000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_xu:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* clrj or clgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* clrj or clgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* clr or clgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1500, dst_reg, src_reg);
+ else
+ EMIT4(0xb9210000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_oc:
- /* brc mask,jmp_off (branch instruction needs 4 bytes) */
- jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
- EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
}
default: /* too complex, give up */
pr_err("Unknown opcode %02x\n", insn->code);
return -1;
}
+
+ if (probe_prg != -1) {
+ /*
+ * Handlers of certain exceptions leave psw.addr pointing to
+ * the instruction directly after the failing one. Therefore,
+ * create two exception table entries and also add a nop in
+ * case two probing instructions come directly after each
+ * other.
+ */
+ nop_prg = jit->prg;
+ /* bcr 0,%0 */
+ _EMIT2(0x0700);
+ err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
+ if (err < 0)
+ return err;
+ }
+
return insn_count;
}
/*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+ /* On the first pass anything goes */
+ if (is_first_pass(jit))
+ return true;
+
+ /* The codegen pass must not change anything */
+ if (is_codegen_pass(jit))
+ return jit->addrs[i] == jit->prg;
+
+ /* Passes in between must not increase code size */
+ return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+ int delta;
+
+ if (is_codegen_pass(jit)) {
+ delta = jit->prg - jit->addrs[i];
+ if (delta < 0)
+ bpf_skip(jit, -delta);
+ }
+ if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
+ return -1;
+ jit->addrs[i] = jit->prg;
+ return 0;
+}
+
+/*
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
- bool extra_pass)
+ bool extra_pass, u32 stack_depth)
{
- int i, insn_count;
+ int i, insn_count, lit32_size, lit64_size;
- jit->lit = jit->lit_start;
+ jit->lit32 = jit->lit32_start;
+ jit->lit64 = jit->lit64_start;
jit->prg = 0;
+ jit->excnt = 0;
- bpf_jit_prologue(jit, fp->aux->stack_depth);
+ bpf_jit_prologue(jit, stack_depth);
+ if (bpf_set_addr(jit, 0) < 0)
+ return -1;
for (i = 0; i < fp->len; i += insn_count) {
- insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
+ insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
if (insn_count < 0)
return -1;
/* Next instruction address */
- jit->addrs[i + insn_count] = jit->prg;
+ if (bpf_set_addr(jit, i + insn_count) < 0)
+ return -1;
}
- bpf_jit_epilogue(jit, fp->aux->stack_depth);
+ bpf_jit_epilogue(jit, stack_depth);
- jit->lit_start = jit->prg;
- jit->size = jit->lit;
+ lit32_size = jit->lit32 - jit->lit32_start;
+ lit64_size = jit->lit64 - jit->lit64_start;
+ jit->lit32_start = jit->prg;
+ if (lit32_size)
+ jit->lit32_start = ALIGN(jit->lit32_start, 4);
+ jit->lit64_start = jit->lit32_start + lit32_size;
+ if (lit64_size)
+ jit->lit64_start = ALIGN(jit->lit64_start, 8);
+ jit->size = jit->lit64_start + lit64_size;
jit->size_prg = jit->prg;
+
+ if (WARN_ON_ONCE(fp->aux->extable &&
+ jit->excnt != fp->aux->num_exentries))
+ /* Verifier bug - too many entries. */
+ return -1;
+
return 0;
}
@@ -1336,11 +1702,35 @@
int pass;
};
+static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
+ struct bpf_prog *fp)
+{
+ struct bpf_binary_header *header;
+ u32 extable_size;
+ u32 code_size;
+
+ /* We need two entries per insn. */
+ fp->aux->num_exentries *= 2;
+
+ code_size = roundup(jit->size,
+ __alignof__(struct exception_table_entry));
+ extable_size = fp->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+ header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
+ 8, jit_fill_hole);
+ if (!header)
+ return NULL;
+ fp->aux->extable = (struct exception_table_entry *)
+ (jit->prg_buf + code_size);
+ return header;
+}
+
/*
* Compile eBPF program "fp"
*/
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
+ u32 stack_depth = round_up(fp->aux->stack_depth, 8);
struct bpf_prog *tmp, *orig_fp = fp;
struct bpf_binary_header *header;
struct s390_jit_data *jit_data;
@@ -1385,7 +1775,7 @@
jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
if (jit.addrs == NULL) {
fp = orig_fp;
- goto out;
+ goto free_addrs;
}
/*
* Three initial passes:
@@ -1393,7 +1783,7 @@
* - 3: Calculate program size and addrs arrray
*/
for (pass = 1; pass <= 3; pass++) {
- if (bpf_jit_prog(&jit, fp, extra_pass)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
fp = orig_fp;
goto free_addrs;
}
@@ -1401,18 +1791,13 @@
/*
* Final pass: Allocate and generate program
*/
- if (jit.size >= BPF_SIZE_MAX) {
- fp = orig_fp;
- goto free_addrs;
- }
-
- header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+ header = bpf_jit_alloc(&jit, fp);
if (!header) {
fp = orig_fp;
goto free_addrs;
}
skip_init_ctx:
- if (bpf_jit_prog(&jit, fp, extra_pass)) {
+ if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
bpf_jit_binary_free(header);
fp = orig_fp;
goto free_addrs;
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
deleted file mode 100644
index 66c2dff..0000000
--- a/arch/s390/numa/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-y += numa.o
-obj-y += toptree.o
-obj-$(CONFIG_NUMA_EMU) += mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
deleted file mode 100644
index 72d742b..0000000
--- a/arch/s390/numa/mode_emu.c
+++ /dev/null
@@ -1,577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
- * without using real topology information about the physical memory of the
- * machine.
- *
- * It distributes the available CPUs to nodes while respecting the original
- * machine topology information. This is done by trying to avoid to separate
- * CPUs which reside on the same book or even on the same MC.
- *
- * Because the current Linux scheduler code requires a stable cpu to node
- * mapping, cores are pinned to nodes when the first CPU thread is set online.
- *
- * Copyright IBM Corp. 2015
- */
-
-#define KMSG_COMPONENT "numa_emu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/cpumask.h>
-#include <linux/memblock.h>
-#include <linux/node.h>
-#include <linux/memory.h>
-#include <linux/slab.h>
-#include <asm/smp.h>
-#include <asm/topology.h>
-#include "numa_mode.h"
-#include "toptree.h"
-
-/* Distances between the different system components */
-#define DIST_EMPTY 0
-#define DIST_CORE 1
-#define DIST_MC 2
-#define DIST_BOOK 3
-#define DIST_DRAWER 4
-#define DIST_MAX 5
-
-/* Node distance reported to common code */
-#define EMU_NODE_DIST 10
-
-/* Node ID for free (not yet pinned) cores */
-#define NODE_ID_FREE -1
-
-/* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
-
-/* The two toptree IDs */
-enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
-
-/* Number of NUMA nodes */
-static int emu_nodes = 1;
-/* NUMA stripe size */
-static unsigned long emu_size;
-
-/*
- * Node to core pinning information updates are protected by
- * "sched_domains_mutex".
- */
-static struct {
- s32 to_node_id[CONFIG_NR_CPUS]; /* Pinned core to node mapping */
- int total; /* Total number of pinned cores */
- int per_node_target; /* Cores per node without extra cores */
- int per_node[MAX_NUMNODES]; /* Number of cores pinned to node */
-} *emu_cores;
-
-/*
- * Pin a core to a node
- */
-static void pin_core_to_node(int core_id, int node_id)
-{
- if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
- emu_cores->per_node[node_id]++;
- emu_cores->to_node_id[core_id] = node_id;
- emu_cores->total++;
- } else {
- WARN_ON(emu_cores->to_node_id[core_id] != node_id);
- }
-}
-
-/*
- * Number of pinned cores of a node
- */
-static int cores_pinned(struct toptree *node)
-{
- return emu_cores->per_node[node->id];
-}
-
-/*
- * ID of the node where the core is pinned (or NODE_ID_FREE)
- */
-static int core_pinned_to_node_id(struct toptree *core)
-{
- return emu_cores->to_node_id[core->id];
-}
-
-/*
- * Number of cores in the tree that are not yet pinned
- */
-static int cores_free(struct toptree *tree)
-{
- struct toptree *core;
- int count = 0;
-
- toptree_for_each(core, tree, CORE) {
- if (core_pinned_to_node_id(core) == NODE_ID_FREE)
- count++;
- }
- return count;
-}
-
-/*
- * Return node of core
- */
-static struct toptree *core_node(struct toptree *core)
-{
- return core->parent->parent->parent->parent;
-}
-
-/*
- * Return drawer of core
- */
-static struct toptree *core_drawer(struct toptree *core)
-{
- return core->parent->parent->parent;
-}
-
-/*
- * Return book of core
- */
-static struct toptree *core_book(struct toptree *core)
-{
- return core->parent->parent;
-}
-
-/*
- * Return mc of core
- */
-static struct toptree *core_mc(struct toptree *core)
-{
- return core->parent;
-}
-
-/*
- * Distance between two cores
- */
-static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
-{
- if (core_drawer(core1)->id != core_drawer(core2)->id)
- return DIST_DRAWER;
- if (core_book(core1)->id != core_book(core2)->id)
- return DIST_BOOK;
- if (core_mc(core1)->id != core_mc(core2)->id)
- return DIST_MC;
- /* Same core or sibling on same MC */
- return DIST_CORE;
-}
-
-/*
- * Distance of a node to a core
- */
-static int dist_node_to_core(struct toptree *node, struct toptree *core)
-{
- struct toptree *core_node;
- int dist_min = DIST_MAX;
-
- toptree_for_each(core_node, node, CORE)
- dist_min = min(dist_min, dist_core_to_core(core_node, core));
- return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
-}
-
-/*
- * Unify will delete empty nodes, therefore recreate nodes.
- */
-static void toptree_unify_tree(struct toptree *tree)
-{
- int nid;
-
- toptree_unify(tree);
- for (nid = 0; nid < emu_nodes; nid++)
- toptree_get_child(tree, nid);
-}
-
-/*
- * Find the best/nearest node for a given core and ensure that no node
- * gets more than "emu_cores->per_node_target + extra" cores.
- */
-static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
- int extra)
-{
- struct toptree *node, *node_best = NULL;
- int dist_cur, dist_best, cores_target;
-
- cores_target = emu_cores->per_node_target + extra;
- dist_best = DIST_MAX;
- node_best = NULL;
- toptree_for_each(node, numa, NODE) {
- /* Already pinned cores must use their nodes */
- if (core_pinned_to_node_id(core) == node->id) {
- node_best = node;
- break;
- }
- /* Skip nodes that already have enough cores */
- if (cores_pinned(node) >= cores_target)
- continue;
- dist_cur = dist_node_to_core(node, core);
- if (dist_cur < dist_best) {
- dist_best = dist_cur;
- node_best = node;
- }
- }
- return node_best;
-}
-
-/*
- * Find the best node for each core with respect to "extra" core count
- */
-static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
- int extra)
-{
- struct toptree *node, *core, *tmp;
-
- toptree_for_each_safe(core, tmp, phys, CORE) {
- node = node_for_core(numa, core, extra);
- if (!node)
- return;
- toptree_move(core, node);
- pin_core_to_node(core->id, node->id);
- }
-}
-
-/*
- * Move structures of given level to specified NUMA node
- */
-static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
- enum toptree_level level, bool perfect)
-{
- int cores_free, cores_target = emu_cores->per_node_target;
- struct toptree *cur, *tmp;
-
- toptree_for_each_safe(cur, tmp, phys, level) {
- cores_free = cores_target - toptree_count(node, CORE);
- if (perfect) {
- if (cores_free == toptree_count(cur, CORE))
- toptree_move(cur, node);
- } else {
- if (cores_free >= toptree_count(cur, CORE))
- toptree_move(cur, node);
- }
- }
-}
-
-/*
- * Move structures of a given level to NUMA nodes. If "perfect" is specified
- * move only perfectly fitting structures. Otherwise move also smaller
- * than needed structures.
- */
-static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
- enum toptree_level level, bool perfect)
-{
- struct toptree *node;
-
- toptree_for_each(node, numa, NODE)
- move_level_to_numa_node(node, phys, level, perfect);
-}
-
-/*
- * For the first run try to move the big structures
- */
-static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
-{
- struct toptree *core;
-
- /* Always try to move perfectly fitting structures first */
- move_level_to_numa(numa, phys, DRAWER, true);
- move_level_to_numa(numa, phys, DRAWER, false);
- move_level_to_numa(numa, phys, BOOK, true);
- move_level_to_numa(numa, phys, BOOK, false);
- move_level_to_numa(numa, phys, MC, true);
- move_level_to_numa(numa, phys, MC, false);
- /* Now pin all the moved cores */
- toptree_for_each(core, numa, CORE)
- pin_core_to_node(core->id, core_node(core)->id);
-}
-
-/*
- * Allocate new topology and create required nodes
- */
-static struct toptree *toptree_new(int id, int nodes)
-{
- struct toptree *tree;
- int nid;
-
- tree = toptree_alloc(TOPOLOGY, id);
- if (!tree)
- goto fail;
- for (nid = 0; nid < nodes; nid++) {
- if (!toptree_get_child(tree, nid))
- goto fail;
- }
- return tree;
-fail:
- panic("NUMA emulation could not allocate topology");
-}
-
-/*
- * Allocate and initialize core to node mapping
- */
-static void __ref create_core_to_node_map(void)
-{
- int i;
-
- emu_cores = memblock_alloc(sizeof(*emu_cores), 8);
- if (!emu_cores)
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(*emu_cores), 8);
- for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
- emu_cores->to_node_id[i] = NODE_ID_FREE;
-}
-
-/*
- * Move cores from physical topology into NUMA target topology
- * and try to keep as much of the physical topology as possible.
- */
-static struct toptree *toptree_to_numa(struct toptree *phys)
-{
- static int first = 1;
- struct toptree *numa;
- int cores_total;
-
- cores_total = emu_cores->total + cores_free(phys);
- emu_cores->per_node_target = cores_total / emu_nodes;
- numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
- if (first) {
- toptree_to_numa_first(numa, phys);
- first = 0;
- }
- toptree_to_numa_single(numa, phys, 0);
- toptree_to_numa_single(numa, phys, 1);
- toptree_unify_tree(numa);
-
- WARN_ON(cpumask_weight(&phys->mask));
- return numa;
-}
-
-/*
- * Create a toptree out of the physical topology that we got from the hypervisor
- */
-static struct toptree *toptree_from_topology(void)
-{
- struct toptree *phys, *node, *drawer, *book, *mc, *core;
- struct cpu_topology_s390 *top;
- int cpu;
-
- phys = toptree_new(TOPTREE_ID_PHYS, 1);
-
- for_each_cpu(cpu, &cpus_with_topology) {
- top = &cpu_topology[cpu];
- node = toptree_get_child(phys, 0);
- drawer = toptree_get_child(node, top->drawer_id);
- book = toptree_get_child(drawer, top->book_id);
- mc = toptree_get_child(book, top->socket_id);
- core = toptree_get_child(mc, smp_get_base_cpu(cpu));
- if (!drawer || !book || !mc || !core)
- panic("NUMA emulation could not allocate memory");
- cpumask_set_cpu(cpu, &core->mask);
- toptree_update_mask(mc);
- }
- return phys;
-}
-
-/*
- * Add toptree core to topology and create correct CPU masks
- */
-static void topology_add_core(struct toptree *core)
-{
- struct cpu_topology_s390 *top;
- int cpu;
-
- for_each_cpu(cpu, &core->mask) {
- top = &cpu_topology[cpu];
- cpumask_copy(&top->thread_mask, &core->mask);
- cpumask_copy(&top->core_mask, &core_mc(core)->mask);
- cpumask_copy(&top->book_mask, &core_book(core)->mask);
- cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
- cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
- top->node_id = core_node(core)->id;
- }
-}
-
-/*
- * Apply toptree to topology and create CPU masks
- */
-static void toptree_to_topology(struct toptree *numa)
-{
- struct toptree *core;
- int i;
-
- /* Clear all node masks */
- for (i = 0; i < MAX_NUMNODES; i++)
- cpumask_clear(&node_to_cpumask_map[i]);
-
- /* Rebuild all masks */
- toptree_for_each(core, numa, CORE)
- topology_add_core(core);
-}
-
-/*
- * Show the node to core mapping
- */
-static void print_node_to_core_map(void)
-{
- int nid, cid;
-
- if (!numa_debug_enabled)
- return;
- printk(KERN_DEBUG "NUMA node to core mapping\n");
- for (nid = 0; nid < emu_nodes; nid++) {
- printk(KERN_DEBUG " node %3d: ", nid);
- for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
- if (emu_cores->to_node_id[cid] == nid)
- printk(KERN_CONT "%d ", cid);
- }
- printk(KERN_CONT "\n");
- }
-}
-
-static void pin_all_possible_cpus(void)
-{
- int core_id, node_id, cpu;
- static int initialized;
-
- if (initialized)
- return;
- print_node_to_core_map();
- node_id = 0;
- for_each_possible_cpu(cpu) {
- core_id = smp_get_base_cpu(cpu);
- if (emu_cores->to_node_id[core_id] != NODE_ID_FREE)
- continue;
- pin_core_to_node(core_id, node_id);
- cpu_topology[cpu].node_id = node_id;
- node_id = (node_id + 1) % emu_nodes;
- }
- print_node_to_core_map();
- initialized = 1;
-}
-
-/*
- * Transfer physical topology into a NUMA topology and modify CPU masks
- * according to the NUMA topology.
- *
- * Must be called with "sched_domains_mutex" lock held.
- */
-static void emu_update_cpu_topology(void)
-{
- struct toptree *phys, *numa;
-
- if (emu_cores == NULL)
- create_core_to_node_map();
- phys = toptree_from_topology();
- numa = toptree_to_numa(phys);
- toptree_free(phys);
- toptree_to_topology(numa);
- toptree_free(numa);
- pin_all_possible_cpus();
-}
-
-/*
- * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
- * alignment (needed for memory hotplug).
- */
-static unsigned long emu_setup_size_adjust(unsigned long size)
-{
- unsigned long size_new;
-
- size = size ? : CONFIG_EMU_SIZE;
- size_new = roundup(size, memory_block_size_bytes());
- if (size_new == size)
- return size;
- pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
- size >> 20, size_new >> 20);
- return size_new;
-}
-
-/*
- * If we have not enough memory for the specified nodes, reduce the node count.
- */
-static int emu_setup_nodes_adjust(int nodes)
-{
- int nodes_max;
-
- nodes_max = memblock.memory.total_size / emu_size;
- nodes_max = max(nodes_max, 1);
- if (nodes_max >= nodes)
- return nodes;
- pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
- return nodes_max;
-}
-
-/*
- * Early emu setup
- */
-static void emu_setup(void)
-{
- int nid;
-
- emu_size = emu_setup_size_adjust(emu_size);
- emu_nodes = emu_setup_nodes_adjust(emu_nodes);
- for (nid = 0; nid < emu_nodes; nid++)
- node_set(nid, node_possible_map);
- pr_info("Creating %d nodes with memory stripe size %ld MB\n",
- emu_nodes, emu_size >> 20);
-}
-
-/*
- * Return node id for given page number
- */
-static int emu_pfn_to_nid(unsigned long pfn)
-{
- return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
-}
-
-/*
- * Return stripe size
- */
-static unsigned long emu_align(void)
-{
- return emu_size;
-}
-
-/*
- * Return distance between two nodes
- */
-static int emu_distance(int node1, int node2)
-{
- return (node1 != node2) * EMU_NODE_DIST;
-}
-
-/*
- * Define callbacks for generic s390 NUMA infrastructure
- */
-const struct numa_mode numa_mode_emu = {
- .name = "emu",
- .setup = emu_setup,
- .update_cpu_topology = emu_update_cpu_topology,
- .__pfn_to_nid = emu_pfn_to_nid,
- .align = emu_align,
- .distance = emu_distance,
-};
-
-/*
- * Kernel parameter: emu_nodes=<n>
- */
-static int __init early_parse_emu_nodes(char *p)
-{
- int count;
-
- if (!p || kstrtoint(p, 0, &count) != 0 || count <= 0)
- return 0;
- emu_nodes = min(count, MAX_NUMNODES);
- return 0;
-}
-early_param("emu_nodes", early_parse_emu_nodes);
-
-/*
- * Kernel parameter: emu_size=[<n>[k|M|G|T]]
- */
-static int __init early_parse_emu_size(char *p)
-{
- if (p)
- emu_size = memparse(p, NULL);
- return 0;
-}
-early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
deleted file mode 100644
index 8386c58..0000000
--- a/arch/s390/numa/numa.c
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * Implement NUMA core code.
- *
- * Copyright IBM Corp. 2015
- */
-
-#define KMSG_COMPONENT "numa"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/mmzone.h>
-#include <linux/cpumask.h>
-#include <linux/memblock.h>
-#include <linux/slab.h>
-#include <linux/node.h>
-
-#include <asm/numa.h>
-#include "numa_mode.h"
-
-pg_data_t *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
-cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-EXPORT_SYMBOL(node_to_cpumask_map);
-
-static void plain_setup(void)
-{
- node_set(0, node_possible_map);
-}
-
-const struct numa_mode numa_mode_plain = {
- .name = "plain",
- .setup = plain_setup,
-};
-
-static const struct numa_mode *mode = &numa_mode_plain;
-
-int numa_pfn_to_nid(unsigned long pfn)
-{
- return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
-}
-
-void numa_update_cpu_topology(void)
-{
- if (mode->update_cpu_topology)
- mode->update_cpu_topology();
-}
-
-int numa_debug_enabled;
-
-/*
- * numa_setup_memory() - Assign bootmem to nodes
- *
- * The memory is first added to memblock without any respect to nodes.
- * This is fixed before remaining memblock memory is handed over to the
- * buddy allocator.
- * An important side effect is that large bootmem allocations might easily
- * cross node boundaries, which can be needed for large allocations with
- * smaller memory stripes in each node (i.e. when using NUMA emulation).
- *
- * Memory defines nodes:
- * Therefore this routine also sets the nodes online with memory.
- */
-static void __init numa_setup_memory(void)
-{
- unsigned long cur_base, align, end_of_dram;
- int nid = 0;
-
- end_of_dram = memblock_end_of_DRAM();
- align = mode->align ? mode->align() : ULONG_MAX;
-
- /*
- * Step through all available memory and assign it to the nodes
- * indicated by the mode implementation.
- * All nodes which are seen here will be set online.
- */
- cur_base = 0;
- do {
- nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
- node_set_online(nid);
- memblock_set_node(cur_base, align, &memblock.memory, nid);
- cur_base += align;
- } while (cur_base < end_of_dram);
-
- /* Allocate and fill out node_data */
- for (nid = 0; nid < MAX_NUMNODES; nid++) {
- NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
- if (!NODE_DATA(nid))
- panic("%s: Failed to allocate %zu bytes align=0x%x\n",
- __func__, sizeof(pg_data_t), 8);
- }
-
- for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn;
- unsigned long t_start, t_end;
- int i;
-
- start_pfn = ULONG_MAX;
- end_pfn = 0;
- for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
- if (t_start < start_pfn)
- start_pfn = t_start;
- if (t_end > end_pfn)
- end_pfn = t_end;
- }
- NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
- NODE_DATA(nid)->node_id = nid;
- }
-}
-
-/*
- * numa_setup() - Earliest initialization
- *
- * Assign the mode and call the mode's setup routine.
- */
-void __init numa_setup(void)
-{
- pr_info("NUMA mode: %s\n", mode->name);
- nodes_clear(node_possible_map);
- /* Initially attach all possible CPUs to node 0. */
- cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
- if (mode->setup)
- mode->setup();
- numa_setup_memory();
- memblock_dump_all();
-}
-
-/*
- * numa_init_late() - Initialization initcall
- *
- * Register NUMA nodes.
- */
-static int __init numa_init_late(void)
-{
- int nid;
-
- for_each_online_node(nid)
- register_one_node(nid);
- return 0;
-}
-arch_initcall(numa_init_late);
-
-static int __init parse_debug(char *parm)
-{
- numa_debug_enabled = 1;
- return 0;
-}
-early_param("numa_debug", parse_debug);
-
-static int __init parse_numa(char *parm)
-{
- if (!parm)
- return 1;
- if (strcmp(parm, numa_mode_plain.name) == 0)
- mode = &numa_mode_plain;
-#ifdef CONFIG_NUMA_EMU
- if (strcmp(parm, numa_mode_emu.name) == 0)
- mode = &numa_mode_emu;
-#endif
- return 0;
-}
-early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
deleted file mode 100644
index dfd3e27..0000000
--- a/arch/s390/numa/numa_mode.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * Define declarations used for communication between NUMA mode
- * implementations and NUMA core functionality.
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef __S390_NUMA_MODE_H
-#define __S390_NUMA_MODE_H
-
-struct numa_mode {
- char *name; /* Name of mode */
- void (*setup)(void); /* Initizalize mode */
- void (*update_cpu_topology)(void); /* Called by topology code */
- int (*__pfn_to_nid)(unsigned long pfn); /* PFN to node ID */
- unsigned long (*align)(void); /* Minimum node alignment */
- int (*distance)(int a, int b); /* Distance between two nodes */
-};
-
-extern const struct numa_mode numa_mode_plain;
-extern const struct numa_mode numa_mode_emu;
-
-#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
deleted file mode 100644
index 71a608c..0000000
--- a/arch/s390/numa/toptree.c
+++ /dev/null
@@ -1,351 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-
-#include <linux/kernel.h>
-#include <linux/memblock.h>
-#include <linux/cpumask.h>
-#include <linux/list.h>
-#include <linux/list_sort.h>
-#include <linux/slab.h>
-#include <asm/numa.h>
-
-#include "toptree.h"
-
-/**
- * toptree_alloc - Allocate and initialize a new tree node.
- * @level: The node's vertical level; level 0 contains the leaves.
- * @id: ID number, explicitly not unique beyond scope of node's siblings
- *
- * Allocate a new tree node and initialize it.
- *
- * RETURNS:
- * Pointer to the new tree node or NULL on error
- */
-struct toptree __ref *toptree_alloc(int level, int id)
-{
- struct toptree *res;
-
- if (slab_is_available())
- res = kzalloc(sizeof(*res), GFP_KERNEL);
- else
- res = memblock_alloc(sizeof(*res), 8);
- if (!res)
- return res;
-
- INIT_LIST_HEAD(&res->children);
- INIT_LIST_HEAD(&res->sibling);
- cpumask_clear(&res->mask);
- res->level = level;
- res->id = id;
- return res;
-}
-
-/**
- * toptree_remove - Remove a tree node from a tree
- * @cand: Pointer to the node to remove
- *
- * The node is detached from its parent node. The parent node's
- * masks will be updated to reflect the loss of the child.
- */
-static void toptree_remove(struct toptree *cand)
-{
- struct toptree *oldparent;
-
- list_del_init(&cand->sibling);
- oldparent = cand->parent;
- cand->parent = NULL;
- toptree_update_mask(oldparent);
-}
-
-/**
- * toptree_free - discard a tree node
- * @cand: Pointer to the tree node to discard
- *
- * Checks if @cand is attached to a parent node. Detaches it
- * cleanly using toptree_remove. Possible children are freed
- * recursively. In the end @cand itself is freed.
- */
-void __ref toptree_free(struct toptree *cand)
-{
- struct toptree *child, *tmp;
-
- if (cand->parent)
- toptree_remove(cand);
- toptree_for_each_child_safe(child, tmp, cand)
- toptree_free(child);
- if (slab_is_available())
- kfree(cand);
- else
- memblock_free_early((unsigned long)cand, sizeof(*cand));
-}
-
-/**
- * toptree_update_mask - Update node bitmasks
- * @cand: Pointer to a tree node
- *
- * The node's cpumask will be updated by combining all children's
- * masks. Then toptree_update_mask is called recursively for the
- * parent if applicable.
- *
- * NOTE:
- * This must not be called on leaves. If called on a leaf, its
- * CPU mask is cleared and lost.
- */
-void toptree_update_mask(struct toptree *cand)
-{
- struct toptree *child;
-
- cpumask_clear(&cand->mask);
- list_for_each_entry(child, &cand->children, sibling)
- cpumask_or(&cand->mask, &cand->mask, &child->mask);
- if (cand->parent)
- toptree_update_mask(cand->parent);
-}
-
-/**
- * toptree_insert - Insert a tree node into tree
- * @cand: Pointer to the node to insert
- * @target: Pointer to the node to which @cand will added as a child
- *
- * Insert a tree node into a tree. Masks will be updated automatically.
- *
- * RETURNS:
- * 0 on success, -1 if NULL is passed as argument or the node levels
- * don't fit.
- */
-static int toptree_insert(struct toptree *cand, struct toptree *target)
-{
- if (!cand || !target)
- return -1;
- if (target->level != (cand->level + 1))
- return -1;
- list_add_tail(&cand->sibling, &target->children);
- cand->parent = target;
- toptree_update_mask(target);
- return 0;
-}
-
-/**
- * toptree_move_children - Move all child nodes of a node to a new place
- * @cand: Pointer to the node whose children are to be moved
- * @target: Pointer to the node to which @cand's children will be attached
- *
- * Take all child nodes of @cand and move them using toptree_move.
- */
-static void toptree_move_children(struct toptree *cand, struct toptree *target)
-{
- struct toptree *child, *tmp;
-
- toptree_for_each_child_safe(child, tmp, cand)
- toptree_move(child, target);
-}
-
-/**
- * toptree_unify - Merge children with same ID
- * @cand: Pointer to node whose direct children should be made unique
- *
- * When mangling the tree it is possible that a node has two or more children
- * which have the same ID. This routine merges these children into one and
- * moves all children of the merged nodes into the unified node.
- */
-void toptree_unify(struct toptree *cand)
-{
- struct toptree *child, *tmp, *cand_copy;
-
- /* Threads cannot be split, cores are not split */
- if (cand->level < 2)
- return;
-
- cand_copy = toptree_alloc(cand->level, 0);
- toptree_for_each_child_safe(child, tmp, cand) {
- struct toptree *tmpchild;
-
- if (!cpumask_empty(&child->mask)) {
- tmpchild = toptree_get_child(cand_copy, child->id);
- toptree_move_children(child, tmpchild);
- }
- toptree_free(child);
- }
- toptree_move_children(cand_copy, cand);
- toptree_free(cand_copy);
-
- toptree_for_each_child(child, cand)
- toptree_unify(child);
-}
-
-/**
- * toptree_move - Move a node to another context
- * @cand: Pointer to the node to move
- * @target: Pointer to the node where @cand should go
- *
- * In the easiest case @cand is exactly on the level below @target
- * and will be immediately moved to the target.
- *
- * If @target's level is not the direct parent level of @cand,
- * nodes for the missing levels are created and put between
- * @cand and @target. The "stacking" nodes' IDs are taken from
- * @cand's parents.
- *
- * After this it is likely to have redundant nodes in the tree
- * which are addressed by means of toptree_unify.
- */
-void toptree_move(struct toptree *cand, struct toptree *target)
-{
- struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
-
- if (cand->level + 1 == target->level) {
- toptree_remove(cand);
- toptree_insert(cand, target);
- return;
- }
-
- real_insert_point = NULL;
- ptr = cand;
- stack_target = NULL;
-
- do {
- tmp = stack_target;
- stack_target = toptree_alloc(ptr->level + 1,
- ptr->parent->id);
- toptree_insert(tmp, stack_target);
- if (!real_insert_point)
- real_insert_point = stack_target;
- ptr = ptr->parent;
- } while (stack_target->level < (target->level - 1));
-
- toptree_remove(cand);
- toptree_insert(cand, real_insert_point);
- toptree_insert(stack_target, target);
-}
-
-/**
- * toptree_get_child - Access a tree node's child by its ID
- * @cand: Pointer to tree node whose child is to access
- * @id: The desired child's ID
- *
- * @cand's children are searched for a child with matching ID.
- * If no match can be found, a new child with the desired ID
- * is created and returned.
- */
-struct toptree *toptree_get_child(struct toptree *cand, int id)
-{
- struct toptree *child;
-
- toptree_for_each_child(child, cand)
- if (child->id == id)
- return child;
- child = toptree_alloc(cand->level-1, id);
- toptree_insert(child, cand);
- return child;
-}
-
-/**
- * toptree_first - Find the first descendant on specified level
- * @context: Pointer to tree node whose descendants are to be used
- * @level: The level of interest
- *
- * RETURNS:
- * @context's first descendant on the specified level, or NULL
- * if there is no matching descendant
- */
-struct toptree *toptree_first(struct toptree *context, int level)
-{
- struct toptree *child, *tmp;
-
- if (context->level == level)
- return context;
-
- if (!list_empty(&context->children)) {
- list_for_each_entry(child, &context->children, sibling) {
- tmp = toptree_first(child, level);
- if (tmp)
- return tmp;
- }
- }
- return NULL;
-}
-
-/**
- * toptree_next_sibling - Return next sibling
- * @cur: Pointer to a tree node
- *
- * RETURNS:
- * If @cur has a parent and is not the last in the parent's children list,
- * the next sibling is returned. Or NULL when there are no siblings left.
- */
-static struct toptree *toptree_next_sibling(struct toptree *cur)
-{
- if (cur->parent == NULL)
- return NULL;
-
- if (cur == list_last_entry(&cur->parent->children,
- struct toptree, sibling))
- return NULL;
- return (struct toptree *) list_next_entry(cur, sibling);
-}
-
-/**
- * toptree_next - Tree traversal function
- * @cur: Pointer to current element
- * @context: Pointer to the root node of the tree or subtree to
- * be traversed.
- * @level: The level of interest.
- *
- * RETURNS:
- * Pointer to the next node on level @level
- * or NULL when there is no next node.
- */
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
- int level)
-{
- struct toptree *cur_context, *tmp;
-
- if (!cur)
- return NULL;
-
- if (context->level == level)
- return NULL;
-
- tmp = toptree_next_sibling(cur);
- if (tmp != NULL)
- return tmp;
-
- cur_context = cur;
- while (cur_context->level < context->level - 1) {
- /* Step up */
- cur_context = cur_context->parent;
- /* Step aside */
- tmp = toptree_next_sibling(cur_context);
- if (tmp != NULL) {
- /* Step down */
- tmp = toptree_first(tmp, level);
- if (tmp != NULL)
- return tmp;
- }
- }
- return NULL;
-}
-
-/**
- * toptree_count - Count descendants on specified level
- * @context: Pointer to node whose descendants are to be considered
- * @level: Only descendants on the specified level will be counted
- *
- * RETURNS:
- * Number of descendants on the specified level
- */
-int toptree_count(struct toptree *context, int level)
-{
- struct toptree *cur;
- int cnt = 0;
-
- toptree_for_each(cur, context, level)
- cnt++;
- return cnt;
-}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
deleted file mode 100644
index 5246371..0000000
--- a/arch/s390/numa/toptree.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef S390_TOPTREE_H
-#define S390_TOPTREE_H
-
-#include <linux/cpumask.h>
-#include <linux/list.h>
-
-struct toptree {
- int level;
- int id;
- cpumask_t mask;
- struct toptree *parent;
- struct list_head sibling;
- struct list_head children;
-};
-
-struct toptree *toptree_alloc(int level, int id);
-void toptree_free(struct toptree *cand);
-void toptree_update_mask(struct toptree *cand);
-void toptree_unify(struct toptree *cand);
-struct toptree *toptree_get_child(struct toptree *cand, int id);
-void toptree_move(struct toptree *cand, struct toptree *target);
-int toptree_count(struct toptree *context, int level);
-
-struct toptree *toptree_first(struct toptree *context, int level);
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
- int level);
-
-#define toptree_for_each_child(child, ptree) \
- list_for_each_entry(child, &ptree->children, sibling)
-
-#define toptree_for_each_child_safe(child, ptmp, ptree) \
- list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
-
-#define toptree_is_last(ptree) \
- ((ptree->parent == NULL) || \
- (ptree->parent->children.prev == &ptree->sibling))
-
-#define toptree_for_each(ptree, cont, ttype) \
- for (ptree = toptree_first(cont, ttype); \
- ptree != NULL; \
- ptree = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_safe(ptree, tmp, cont, ttype) \
- for (ptree = toptree_first(cont, ttype), \
- tmp = toptree_next(ptree, cont, ttype); \
- ptree != NULL; \
- ptree = tmp, \
- tmp = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_sibling(ptree, start) \
- toptree_for_each(ptree, start->parent, start->level)
-
-#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 748626a..bf557a1 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -4,4 +4,6 @@
#
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
- pci_event.o pci_debug.o pci_insn.o pci_mmio.o
+ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
+ pci_bus.o
+obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index b8ddacf..e14e4a3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -27,6 +27,7 @@
#include <linux/seq_file.h>
#include <linux/jump_label.h>
#include <linux/pci.h>
+#include <linux/printk.h>
#include <asm/isc.h>
#include <asm/airq.h>
@@ -35,17 +36,22 @@
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
+#include "pci_bus.h"
+#include "pci_iov.h"
+
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
static DEFINE_SPINLOCK(zpci_domain_lock);
#define ZPCI_IOMAP_ENTRIES \
- min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \
+ min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2), \
ZPCI_IOMAP_MAX_ENTRIES)
+unsigned int s390_pci_no_rid;
+
static DEFINE_SPINLOCK(zpci_iomap_lock);
static unsigned long *zpci_iomap_bitmap;
struct zpci_iomap_entry *zpci_iomap_start;
@@ -86,17 +92,12 @@
spin_unlock(&zpci_list_lock);
list_for_each_entry_safe(zdev, tmp, &remove, entry)
- zpci_remove_device(zdev);
-}
-
-static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
-{
- return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
+ zpci_device_reserved(zdev);
}
int pci_domain_nr(struct pci_bus *bus)
{
- return ((struct zpci_dev *) bus->sysdata)->domain;
+ return ((struct zpci_bus *) bus->sysdata)->domain_nr;
}
EXPORT_SYMBOL_GPL(pci_domain_nr);
@@ -226,34 +227,58 @@
zpci_memcpy_toio(to, from, count);
}
-void __iomem *ioremap(unsigned long ioaddr, unsigned long size)
+static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
{
+ unsigned long offset, vaddr;
struct vm_struct *area;
- unsigned long offset;
+ phys_addr_t last_addr;
- if (!size)
+ last_addr = addr + size - 1;
+ if (!size || last_addr < addr)
return NULL;
if (!static_branch_unlikely(&have_mio))
- return (void __iomem *) ioaddr;
+ return (void __iomem *) addr;
- offset = ioaddr & ~PAGE_MASK;
- ioaddr &= PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ addr &= PAGE_MASK;
size = PAGE_ALIGN(size + offset);
area = get_vm_area(size, VM_IOREMAP);
if (!area)
return NULL;
- if (ioremap_page_range((unsigned long) area->addr,
- (unsigned long) area->addr + size,
- ioaddr, PAGE_KERNEL)) {
- vunmap(area->addr);
+ vaddr = (unsigned long) area->addr;
+ if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
+ free_vm_area(area);
return NULL;
}
return (void __iomem *) ((unsigned long) area->addr + offset);
}
+
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
+{
+ return __ioremap(addr, size, __pgprot(prot));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+void __iomem *ioremap(phys_addr_t addr, size_t size)
+{
+ return __ioremap(addr, size, PAGE_KERNEL);
+}
EXPORT_SYMBOL(ioremap);
+void __iomem *ioremap_wc(phys_addr_t addr, size_t size)
+{
+ return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_wt(phys_addr_t addr, size_t size)
+{
+ return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
void iounmap(volatile void __iomem *addr)
{
if (static_branch_likely(&have_mio))
@@ -294,7 +319,7 @@
void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
unsigned long offset, unsigned long max)
{
- if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+ if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
return NULL;
if (static_branch_likely(&have_mio))
@@ -324,7 +349,7 @@
void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
unsigned long offset, unsigned long max)
{
- if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+ if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
return NULL;
if (static_branch_likely(&have_mio))
@@ -371,29 +396,17 @@
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 *val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
- int ret;
+ struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
- if (!zdev || devfn != ZPCI_DEVFN)
- ret = -ENODEV;
- else
- ret = zpci_cfg_load(zdev, where, val, size);
-
- return ret;
+ return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
}
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 val)
{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
- int ret;
+ struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
- if (!zdev || devfn != ZPCI_DEVFN)
- ret = -ENODEV;
- else
- ret = zpci_cfg_store(zdev, where, val, size);
-
- return ret;
+ return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
}
static struct pci_ops pci_root_ops = {
@@ -401,22 +414,13 @@
.write = pci_write,
};
-#ifdef CONFIG_PCI_IOV
-static struct resource iov_res = {
- .name = "PCI IOV res",
- .start = 0,
- .end = -1,
- .flags = IORESOURCE_MEM,
-};
-#endif
-
static void zpci_map_resources(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
resource_size_t len;
int i;
- for (i = 0; i < PCI_BAR_COUNT; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
len = pci_resource_len(pdev, i);
if (!len)
continue;
@@ -430,16 +434,7 @@
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
-#ifdef CONFIG_PCI_IOV
- for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
- int bar = i + PCI_IOV_RESOURCES;
-
- len = pci_resource_len(pdev, bar);
- if (!len)
- continue;
- pdev->resource[bar].parent = &iov_res;
- }
-#endif
+ zpci_iov_map_resources(pdev);
}
static void zpci_unmap_resources(struct pci_dev *pdev)
@@ -451,7 +446,7 @@
if (zpci_use_mio(zdev))
return;
- for (i = 0; i < PCI_BAR_COUNT; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
len = pci_resource_len(pdev, i);
if (!len)
continue;
@@ -504,17 +499,17 @@
return r;
}
-static int zpci_setup_bus_resources(struct zpci_dev *zdev,
- struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ struct list_head *resources)
{
unsigned long addr, size, flags;
struct resource *res;
int i, entry;
snprintf(zdev->res_name, sizeof(zdev->res_name),
- "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+ "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR);
- for (i = 0; i < PCI_BAR_COUNT; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (!zdev->bars[i].size)
continue;
entry = zpci_alloc_iomap(zdev);
@@ -551,7 +546,7 @@
{
int i;
- for (i = 0; i < PCI_BAR_COUNT; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (!zdev->bars[i].size || !zdev->bars[i].res)
continue;
@@ -563,9 +558,12 @@
int pcibios_add_device(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
struct resource *res;
int i;
+ /* The pdev has a reference to the zdev via its bus */
+ zpci_zdev_get(zdev);
if (pdev->is_physfn)
pdev->no_vf_scan = 1;
@@ -573,7 +571,7 @@
pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
- for (i = 0; i < PCI_BAR_COUNT; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
res = &pdev->resource[i];
if (res->parent || !res->flags)
continue;
@@ -585,7 +583,10 @@
void pcibios_release_device(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
zpci_unmap_resources(pdev);
+ zpci_zdev_put(zdev);
}
int pcibios_enable_device(struct pci_dev *pdev, int mask)
@@ -606,136 +607,62 @@
zpci_debug_exit_device(zdev);
}
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-static int zpci_restore(struct device *dev)
+static int __zpci_register_domain(int domain)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = to_zpci(pdev);
- int ret = 0;
-
- if (zdev->state != ZPCI_FN_STATE_ONLINE)
- goto out;
-
- ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
- if (ret)
- goto out;
-
- zpci_map_resources(pdev);
- zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- (u64) zdev->dma_table);
-
-out:
- return ret;
+ spin_lock(&zpci_domain_lock);
+ if (test_bit(domain, zpci_domain)) {
+ spin_unlock(&zpci_domain_lock);
+ pr_err("Domain %04x is already assigned\n", domain);
+ return -EEXIST;
+ }
+ set_bit(domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return domain;
}
-static int zpci_freeze(struct device *dev)
+static int __zpci_alloc_domain(void)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct zpci_dev *zdev = to_zpci(pdev);
+ int domain;
- if (zdev->state != ZPCI_FN_STATE_ONLINE)
- return 0;
-
- zpci_unregister_ioat(zdev, 0);
- zpci_unmap_resources(pdev);
- return clp_disable_fh(zdev);
+ spin_lock(&zpci_domain_lock);
+ /*
+ * We can always auto allocate domains below ZPCI_NR_DEVICES.
+ * There is either a free domain or we have reached the maximum in
+ * which case we would have bailed earlier.
+ */
+ domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+ set_bit(domain, zpci_domain);
+ spin_unlock(&zpci_domain_lock);
+ return domain;
}
-struct dev_pm_ops pcibios_pm_ops = {
- .thaw_noirq = zpci_restore,
- .freeze_noirq = zpci_freeze,
- .restore_noirq = zpci_restore,
- .poweroff_noirq = zpci_freeze,
-};
-#endif /* CONFIG_HIBERNATE_CALLBACKS */
-
-static int zpci_alloc_domain(struct zpci_dev *zdev)
+int zpci_alloc_domain(int domain)
{
if (zpci_unique_uid) {
- zdev->domain = (u16) zdev->uid;
- if (zdev->domain >= ZPCI_NR_DEVICES)
- return 0;
-
- spin_lock(&zpci_domain_lock);
- if (test_bit(zdev->domain, zpci_domain)) {
- spin_unlock(&zpci_domain_lock);
- return -EEXIST;
- }
- set_bit(zdev->domain, zpci_domain);
- spin_unlock(&zpci_domain_lock);
- return 0;
+ if (domain)
+ return __zpci_register_domain(domain);
+ pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n");
+ update_uid_checking(false);
}
-
- spin_lock(&zpci_domain_lock);
- zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
- if (zdev->domain == ZPCI_NR_DEVICES) {
- spin_unlock(&zpci_domain_lock);
- return -ENOSPC;
- }
- set_bit(zdev->domain, zpci_domain);
- spin_unlock(&zpci_domain_lock);
- return 0;
+ return __zpci_alloc_domain();
}
-static void zpci_free_domain(struct zpci_dev *zdev)
+void zpci_free_domain(int domain)
{
- if (zdev->domain >= ZPCI_NR_DEVICES)
- return;
-
spin_lock(&zpci_domain_lock);
- clear_bit(zdev->domain, zpci_domain);
+ clear_bit(domain, zpci_domain);
spin_unlock(&zpci_domain_lock);
}
-void pcibios_remove_bus(struct pci_bus *bus)
-{
- struct zpci_dev *zdev = get_zdev_by_bus(bus);
-
- zpci_exit_slot(zdev);
- zpci_cleanup_bus_resources(zdev);
- zpci_destroy_iommu(zdev);
- zpci_free_domain(zdev);
-
- spin_lock(&zpci_list_lock);
- list_del(&zdev->entry);
- spin_unlock(&zpci_list_lock);
-
- zpci_dbg(3, "rem fid:%x\n", zdev->fid);
- kfree(zdev);
-}
-
-static int zpci_scan_bus(struct zpci_dev *zdev)
-{
- LIST_HEAD(resources);
- int ret;
-
- ret = zpci_setup_bus_resources(zdev, &resources);
- if (ret)
- goto error;
-
- zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
- zdev, &resources);
- if (!zdev->bus) {
- ret = -EIO;
- goto error;
- }
- zdev->bus->max_bus_speed = zdev->max_bus_speed;
- pci_bus_add_devices(zdev->bus);
- return 0;
-
-error:
- zpci_cleanup_bus_resources(zdev);
- pci_free_resource_list(&resources);
- return ret;
-}
int zpci_enable_device(struct zpci_dev *zdev)
{
int rc;
- rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
- if (rc)
+ if (clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES)) {
+ rc = -EIO;
goto out;
+ }
rc = zpci_dma_init_device(zdev);
if (rc)
@@ -754,58 +681,175 @@
int zpci_disable_device(struct zpci_dev *zdev)
{
zpci_dma_exit_device(zdev);
- return clp_disable_fh(zdev);
+ /*
+ * The zPCI function may already be disabled by the platform, this is
+ * detected in clp_disable_fh() which becomes a no-op.
+ */
+ return clp_disable_fh(zdev) ? -EIO : 0;
}
EXPORT_SYMBOL_GPL(zpci_disable_device);
-int zpci_create_device(struct zpci_dev *zdev)
+/* zpci_remove_device - Removes the given zdev from the PCI core
+ * @zdev: the zdev to be removed from the PCI core
+ * @set_error: if true the device's error state is set to permanent failure
+ *
+ * Sets a zPCI device to a configured but offline state; the zPCI
+ * device is still accessible through its hotplug slot and the zPCI
+ * API but is removed from the common code PCI bus, making it
+ * no longer available to drivers.
+ */
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error)
{
+ struct zpci_bus *zbus = zdev->zbus;
+ struct pci_dev *pdev;
+
+ if (!zdev->zbus->bus)
+ return;
+
+ pdev = pci_get_slot(zbus->bus, zdev->devfn);
+ if (pdev) {
+ if (set_error)
+ pdev->error_state = pci_channel_io_perm_failure;
+ if (pdev->is_virtfn) {
+ zpci_iov_remove_virtfn(pdev, zdev->vfn);
+ /* balance pci_get_slot */
+ pci_dev_put(pdev);
+ return;
+ }
+ pci_stop_and_remove_bus_device_locked(pdev);
+ /* balance pci_get_slot */
+ pci_dev_put(pdev);
+ }
+}
+
+/**
+ * zpci_create_device() - Create a new zpci_dev and add it to the zbus
+ * @fid: Function ID of the device to be created
+ * @fh: Current Function Handle of the device to be created
+ * @state: Initial state after creation either Standby or Configured
+ *
+ * Creates a new zpci device and adds it to its, possibly newly created, zbus
+ * as well as zpci_list.
+ *
+ * Returns: 0 on success, an error value otherwise
+ */
+int zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
+{
+ struct zpci_dev *zdev;
int rc;
- rc = zpci_alloc_domain(zdev);
+ zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
+ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+ if (!zdev)
+ return -ENOMEM;
+
+ /* FID and Function Handle are the static/dynamic identifiers */
+ zdev->fid = fid;
+ zdev->fh = fh;
+
+ /* Query function properties and update zdev */
+ rc = clp_query_pci_fn(zdev);
if (rc)
- goto out;
+ goto error;
+ zdev->state = state;
+
+ kref_init(&zdev->kref);
+ mutex_init(&zdev->lock);
rc = zpci_init_iommu(zdev);
if (rc)
- goto out_free;
+ goto error;
- mutex_init(&zdev->lock);
if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
rc = zpci_enable_device(zdev);
if (rc)
- goto out_destroy_iommu;
+ goto error_destroy_iommu;
}
- rc = zpci_scan_bus(zdev);
+
+ rc = zpci_bus_device_register(zdev, &pci_root_ops);
if (rc)
- goto out_disable;
+ goto error_disable;
spin_lock(&zpci_list_lock);
list_add_tail(&zdev->entry, &zpci_list);
spin_unlock(&zpci_list_lock);
- zpci_init_slot(zdev);
-
return 0;
-out_disable:
+error_disable:
if (zdev->state == ZPCI_FN_STATE_ONLINE)
zpci_disable_device(zdev);
-out_destroy_iommu:
+error_destroy_iommu:
zpci_destroy_iommu(zdev);
-out_free:
- zpci_free_domain(zdev);
-out:
+error:
+ zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
+ kfree(zdev);
return rc;
}
-void zpci_remove_device(struct zpci_dev *zdev)
+bool zpci_is_device_configured(struct zpci_dev *zdev)
{
- if (!zdev->bus)
- return;
+ enum zpci_state state = zdev->state;
- pci_stop_root_bus(zdev->bus);
- pci_remove_root_bus(zdev->bus);
+ return state != ZPCI_FN_STATE_RESERVED &&
+ state != ZPCI_FN_STATE_STANDBY;
+}
+
+/**
+ * zpci_device_reserved() - Mark device as resverved
+ * @zdev: the zpci_dev that was reserved
+ *
+ * Handle the case that a given zPCI function was reserved by another system.
+ * After a call to this function the zpci_dev can not be found via
+ * get_zdev_by_fid() anymore but may still be accessible via existing
+ * references though it will not be functional anymore.
+ */
+void zpci_device_reserved(struct zpci_dev *zdev)
+{
+ if (zdev->has_hp_slot)
+ zpci_exit_slot(zdev);
+ /*
+ * Remove device from zpci_list as it is going away. This also
+ * makes sure we ignore subsequent zPCI events for this device.
+ */
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
+ zdev->state = ZPCI_FN_STATE_RESERVED;
+ zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ zpci_zdev_put(zdev);
+}
+
+void zpci_release_device(struct kref *kref)
+{
+ struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+
+ if (zdev->zbus->bus)
+ zpci_remove_device(zdev, false);
+
+ switch (zdev->state) {
+ case ZPCI_FN_STATE_ONLINE:
+ case ZPCI_FN_STATE_CONFIGURED:
+ zpci_disable_device(zdev);
+ fallthrough;
+ case ZPCI_FN_STATE_STANDBY:
+ if (zdev->has_hp_slot)
+ zpci_exit_slot(zdev);
+ spin_lock(&zpci_list_lock);
+ list_del(&zdev->entry);
+ spin_unlock(&zpci_list_lock);
+ zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+ fallthrough;
+ case ZPCI_FN_STATE_RESERVED:
+ zpci_cleanup_bus_resources(zdev);
+ zpci_bus_device_unregister(zdev);
+ zpci_destroy_iommu(zdev);
+ fallthrough;
+ default:
+ break;
+ }
+ zpci_dbg(3, "rem fid:%x\n", zdev->fid);
+ kfree(zdev);
}
int zpci_report_error(struct pci_dev *pdev,
@@ -837,6 +881,9 @@
if (!zpci_iomap_bitmap)
goto error_iomap_bitmap;
+ if (static_branch_likely(&have_mio))
+ clp_setup_writeback_mio();
+
return 0;
error_iomap_bitmap:
kfree(zpci_iomap_start);
@@ -871,6 +918,10 @@
s390_pci_force_floating = 1;
return NULL;
}
+ if (!strcmp(str, "norid")) {
+ s390_pci_no_rid = 1;
+ return NULL;
+ }
return str;
}
@@ -929,9 +980,3 @@
return rc;
}
subsys_initcall_sync(pci_base_init);
-
-void zpci_rescan(void)
-{
- if (zpci_is_enabled())
- clp_rescan_pci_devices_simple(NULL);
-}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
new file mode 100644
index 0000000..755b46f
--- /dev/null
+++ b/arch/s390/pci/pci_bus.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/jump_label.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#include "pci_bus.h"
+#include "pci_iov.h"
+
+static LIST_HEAD(zbus_list);
+static DEFINE_SPINLOCK(zbus_list_lock);
+static int zpci_nb_devices;
+
+/* zpci_bus_scan
+ * @zbus: the zbus holding the zdevices
+ * @ops: the pci operations
+ *
+ * The domain number must be set before pci_scan_root_bus is called.
+ * This function can be called once the domain is known, hence
+ * when the function_0 is dicovered.
+ */
+static int zpci_bus_scan(struct zpci_bus *zbus, int domain, struct pci_ops *ops)
+{
+ struct pci_bus *bus;
+ int rc;
+
+ rc = zpci_alloc_domain(domain);
+ if (rc < 0)
+ return rc;
+ zbus->domain_nr = rc;
+
+ bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources);
+ if (!bus) {
+ zpci_free_domain(zbus->domain_nr);
+ return -EFAULT;
+ }
+
+ zbus->bus = bus;
+ pci_bus_add_devices(bus);
+ return 0;
+}
+
+static void zpci_bus_release(struct kref *kref)
+{
+ struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
+
+ if (zbus->bus) {
+ pci_lock_rescan_remove();
+ pci_stop_root_bus(zbus->bus);
+
+ zpci_free_domain(zbus->domain_nr);
+ pci_free_resource_list(&zbus->resources);
+
+ pci_remove_root_bus(zbus->bus);
+ pci_unlock_rescan_remove();
+ }
+
+ spin_lock(&zbus_list_lock);
+ list_del(&zbus->bus_next);
+ spin_unlock(&zbus_list_lock);
+ kfree(zbus);
+}
+
+static void zpci_bus_put(struct zpci_bus *zbus)
+{
+ kref_put(&zbus->kref, zpci_bus_release);
+}
+
+static struct zpci_bus *zpci_bus_get(int pchid)
+{
+ struct zpci_bus *zbus;
+
+ spin_lock(&zbus_list_lock);
+ list_for_each_entry(zbus, &zbus_list, bus_next) {
+ if (pchid == zbus->pchid) {
+ kref_get(&zbus->kref);
+ goto out_unlock;
+ }
+ }
+ zbus = NULL;
+out_unlock:
+ spin_unlock(&zbus_list_lock);
+ return zbus;
+}
+
+static struct zpci_bus *zpci_bus_alloc(int pchid)
+{
+ struct zpci_bus *zbus;
+
+ zbus = kzalloc(sizeof(*zbus), GFP_KERNEL);
+ if (!zbus)
+ return NULL;
+
+ zbus->pchid = pchid;
+ INIT_LIST_HEAD(&zbus->bus_next);
+ spin_lock(&zbus_list_lock);
+ list_add_tail(&zbus->bus_next, &zbus_list);
+ spin_unlock(&zbus_list_lock);
+
+ kref_init(&zbus->kref);
+ INIT_LIST_HEAD(&zbus->resources);
+
+ zbus->bus_resource.start = 0;
+ zbus->bus_resource.end = ZPCI_BUS_NR;
+ zbus->bus_resource.flags = IORESOURCE_BUS;
+ pci_add_resource(&zbus->resources, &zbus->bus_resource);
+
+ return zbus;
+}
+
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+
+ /*
+ * With pdev->no_vf_scan the common PCI probing code does not
+ * perform PF/VF linking.
+ */
+ if (zdev->vfn) {
+ zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
+ pdev->no_command_memory = 1;
+ }
+}
+
+static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+ struct pci_bus *bus;
+ struct resource_entry *window, *n;
+ struct resource *res;
+ struct pci_dev *pdev;
+ int rc;
+
+ bus = zbus->bus;
+ if (!bus)
+ return -EINVAL;
+
+ pdev = pci_get_slot(bus, zdev->devfn);
+ if (pdev) {
+ /* Device is already known. */
+ pci_dev_put(pdev);
+ return 0;
+ }
+
+ rc = zpci_init_slot(zdev);
+ if (rc)
+ return rc;
+ zdev->has_hp_slot = 1;
+
+ resource_list_for_each_entry_safe(window, n, &zbus->resources) {
+ res = window->res;
+ pci_bus_add_resource(bus, res, 0);
+ }
+
+ pdev = pci_scan_single_device(bus, zdev->devfn);
+ if (pdev)
+ pci_bus_add_device(pdev);
+
+ return 0;
+}
+
+static void zpci_bus_add_devices(struct zpci_bus *zbus)
+{
+ int i;
+
+ for (i = 1; i < ZPCI_FUNCTIONS_PER_BUS; i++)
+ if (zbus->function[i])
+ zpci_bus_add_device(zbus, zbus->function[i]);
+
+ pci_lock_rescan_remove();
+ pci_bus_add_devices(zbus->bus);
+ pci_unlock_rescan_remove();
+}
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
+{
+ struct zpci_bus *zbus = NULL;
+ int rc = -EBADF;
+
+ if (zpci_nb_devices == ZPCI_NR_DEVICES) {
+ pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+ zdev->fid, ZPCI_NR_DEVICES);
+ return -ENOSPC;
+ }
+ zpci_nb_devices++;
+
+ if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
+ return -EINVAL;
+
+ if (!s390_pci_no_rid && zdev->rid_available)
+ zbus = zpci_bus_get(zdev->pchid);
+
+ if (!zbus) {
+ zbus = zpci_bus_alloc(zdev->pchid);
+ if (!zbus)
+ return -ENOMEM;
+ }
+
+ zdev->zbus = zbus;
+ if (zbus->function[zdev->devfn]) {
+ pr_err("devfn %04x is already assigned\n", zdev->devfn);
+ goto error; /* rc already set */
+ }
+ zbus->function[zdev->devfn] = zdev;
+
+ zpci_setup_bus_resources(zdev, &zbus->resources);
+
+ if (zbus->bus) {
+ if (!zbus->multifunction) {
+ WARN_ONCE(1, "zbus is not multifunction\n");
+ goto error_bus;
+ }
+ if (!zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set for multifunction\n");
+ goto error_bus;
+ }
+ rc = zpci_bus_add_device(zbus, zdev);
+ if (rc)
+ goto error_bus;
+ } else if (zdev->devfn == 0) {
+ if (zbus->multifunction && !zdev->rid_available) {
+ WARN_ONCE(1, "rid_available not set on function 0 for multifunction\n");
+ goto error_bus;
+ }
+ rc = zpci_bus_scan(zbus, (u16)zdev->uid, ops);
+ if (rc)
+ goto error_bus;
+ zpci_bus_add_devices(zbus);
+ rc = zpci_init_slot(zdev);
+ if (rc)
+ goto error_bus;
+ zdev->has_hp_slot = 1;
+ zbus->multifunction = zdev->rid_available;
+ zbus->max_bus_speed = zdev->max_bus_speed;
+ } else {
+ zbus->multifunction = 1;
+ }
+
+ return 0;
+
+error_bus:
+ zpci_nb_devices--;
+ zbus->function[zdev->devfn] = NULL;
+error:
+ pr_err("Adding PCI function %08x failed\n", zdev->fid);
+ zpci_bus_put(zbus);
+ return rc;
+}
+
+void zpci_bus_device_unregister(struct zpci_dev *zdev)
+{
+ struct zpci_bus *zbus = zdev->zbus;
+
+ zpci_nb_devices--;
+ zbus->function[zdev->devfn] = NULL;
+ zpci_bus_put(zbus);
+}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
new file mode 100644
index 0000000..55c9488
--- /dev/null
+++ b/arch/s390/pci/pci_bus.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
+void zpci_bus_device_unregister(struct zpci_dev *zdev);
+
+void zpci_release_device(struct kref *kref);
+static inline void zpci_zdev_put(struct zpci_dev *zdev)
+{
+ kref_put(&zdev->kref, zpci_release_device);
+}
+
+static inline void zpci_zdev_get(struct zpci_dev *zdev)
+{
+ kref_get(&zdev->kref);
+}
+
+int zpci_alloc_domain(int domain);
+void zpci_free_domain(int domain);
+int zpci_setup_bus_resources(struct zpci_dev *zdev,
+ struct list_head *resources);
+
+static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus,
+ unsigned int devfn)
+{
+ struct zpci_bus *zbus = bus->sysdata;
+
+ return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
+}
+
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 20e093f..0a0e8b8 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,7 +24,7 @@
bool zpci_unique_uid;
-static void update_uid_checking(bool new)
+void update_uid_checking(bool new)
{
if (zpci_unique_uid != new)
zpci_dbg(1, "uid checking:%d\n", new);
@@ -102,6 +102,7 @@
zdev->msi_addr = response->msia;
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
+ zdev->version = response->version;
switch (response->version) {
case 1:
@@ -145,7 +146,7 @@
{
int i;
- for (i = 0; i < PCI_BAR_COUNT; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
zdev->bars[i].val = le32_to_cpu(response->bar[i]);
zdev->bars[i].size = response->bar_size[i];
}
@@ -155,17 +156,23 @@
zdev->pfgid = response->pfgid;
zdev->pft = response->pft;
zdev->vfn = response->vfn;
+ zdev->port = response->port;
zdev->uid = response->uid;
zdev->fmb_length = sizeof(u32) * response->fmb_len;
+ zdev->rid_available = response->rid_avail;
+ zdev->is_physfn = response->is_physfn;
+ if (!s390_pci_no_rid && zdev->rid_available)
+ zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
if (response->util_str_avail) {
memcpy(zdev->util_str, response->util_str,
sizeof(zdev->util_str));
+ zdev->util_str_avail = 1;
}
zdev->mio_capable = response->mio_addr_avail;
- for (i = 0; i < PCI_BAR_COUNT; i++) {
- if (!(response->mio.valid & (1 << (PCI_BAR_COUNT - i - 1))))
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+ if (!(response->mio.valid & (1 << (PCI_STD_NUM_BARS - i - 1))))
continue;
zdev->bars[i].mio_wb = (void __iomem *) response->mio.addr[i].wb;
@@ -174,7 +181,7 @@
return 0;
}
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int clp_query_pci_fn(struct zpci_dev *zdev)
{
struct clp_req_rsp_query_pci *rrb;
int rc;
@@ -187,7 +194,7 @@
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
rrb->response.hdr.len = sizeof(rrb->response);
- rrb->request.fh = fh;
+ rrb->request.fh = zdev->fh;
rc = clp_req(rrb, CLP_LPS_PCI);
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
@@ -205,49 +212,20 @@
return rc;
}
-int clp_add_pci_device(u32 fid, u32 fh, int configured)
-{
- struct zpci_dev *zdev;
- int rc = -ENOMEM;
-
- zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
- zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
- if (!zdev)
- goto error;
-
- zdev->fh = fh;
- zdev->fid = fid;
-
- /* Query function properties and update zdev */
- rc = clp_query_pci_fn(zdev, fh);
- if (rc)
- goto error;
-
- if (configured)
- zdev->state = ZPCI_FN_STATE_CONFIGURED;
- else
- zdev->state = ZPCI_FN_STATE_STANDBY;
-
- rc = zpci_create_device(zdev);
- if (rc)
- goto error;
- return 0;
-
-error:
- zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
- kfree(zdev);
- return rc;
-}
-
-/*
- * Enable/Disable a given PCI function and update its function handle if
- * necessary
+static int clp_refresh_fh(u32 fid);
+/**
+ * clp_set_pci_fn() - Execute a command on a PCI function
+ * @zdev: Function that will be affected
+ * @nr_dma_as: DMA address space number
+ * @command: The command code to execute
+ *
+ * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and
+ * > 0 for non-success platform responses
*/
static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command)
{
struct clp_req_rsp_set_pci *rrb;
int rc, retries = 100;
- u32 fid = zdev->fid;
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
@@ -271,17 +249,50 @@
}
} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
- if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
- zpci_err("Set PCI FN:\n");
- zpci_err_clp(rrb->response.hdr.rsp, rc);
- }
-
if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
zdev->fh = rrb->response.fh;
- } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY &&
- rrb->response.fh == 0) {
+ } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY) {
/* Function is already in desired state - update handle */
- rc = clp_rescan_pci_devices_simple(&fid);
+ rc = clp_refresh_fh(zdev->fid);
+ } else {
+ zpci_err("Set PCI FN:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ if (!rc)
+ rc = rrb->response.hdr.rsp;
+ }
+ clp_free_block(rrb);
+ return rc;
+}
+
+int clp_setup_writeback_mio(void)
+{
+ struct clp_req_rsp_slpc_pci *rrb;
+ u8 wb_bit_pos;
+ int rc;
+
+ rrb = clp_alloc_block(GFP_KERNEL);
+ if (!rrb)
+ return -ENOMEM;
+
+ memset(rrb, 0, sizeof(*rrb));
+ rrb->request.hdr.len = sizeof(rrb->request);
+ rrb->request.hdr.cmd = CLP_SLPC;
+ rrb->response.hdr.len = sizeof(rrb->response);
+
+ rc = clp_req(rrb, CLP_LPS_PCI);
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+ if (rrb->response.vwb) {
+ wb_bit_pos = rrb->response.mio_wb;
+ set_bit_inv(wb_bit_pos, &mio_wb_bit_mask);
+ zpci_dbg(3, "wb bit: %d\n", wb_bit_pos);
+ } else {
+ zpci_dbg(3, "wb bit: n.a.\n");
+ }
+
+ } else {
+ zpci_err("SLPC PCI:\n");
+ zpci_err_clp(rrb->response.hdr.rsp, rc);
+ rc = -EIO;
}
clp_free_block(rrb);
return rc;
@@ -293,17 +304,13 @@
rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
- if (rc)
- goto out;
-
- if (zpci_use_mio(zdev)) {
+ if (!rc && zpci_use_mio(zdev)) {
rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO);
zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n",
zdev->fid, zdev->fh, rc);
if (rc)
clp_disable_fh(zdev);
}
-out:
return rc;
}
@@ -366,25 +373,7 @@
zdev = get_zdev_by_fid(entry->fid);
if (!zdev)
- clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
-}
-
-static void __clp_update(struct clp_fh_list_entry *entry, void *data)
-{
- struct zpci_dev *zdev;
- u32 *fid = data;
-
- if (!entry->vendor_id)
- return;
-
- if (fid && *fid != entry->fid)
- return;
-
- zdev = get_zdev_by_fid(entry->fid);
- if (!zdev)
- return;
-
- zdev->fh = entry->fh;
+ zpci_create_device(entry->fid, entry->fh, entry->config_state);
}
int clp_scan_pci_devices(void)
@@ -402,27 +391,25 @@
return rc;
}
-int clp_rescan_pci_devices(void)
+static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data)
{
- struct clp_req_rsp_list_pci *rrb;
- int rc;
+ struct zpci_dev *zdev;
+ u32 fid = *((u32 *)data);
- zpci_remove_reserved_devices();
+ if (!entry->vendor_id || fid != entry->fid)
+ return;
- rrb = clp_alloc_block(GFP_KERNEL);
- if (!rrb)
- return -ENOMEM;
+ zdev = get_zdev_by_fid(fid);
+ if (!zdev)
+ return;
- rc = clp_list_pci(rrb, NULL, __clp_add);
-
- clp_free_block(rrb);
- return rc;
+ zdev->fh = entry->fh;
}
-/* Rescan PCI functions and refresh function handles. If fid is non-NULL only
- * refresh the handle of the function matching @fid
+/*
+ * Refresh the function handle of the function matching @fid
*/
-int clp_rescan_pci_devices_simple(u32 *fid)
+static int clp_refresh_fh(u32 fid)
{
struct clp_req_rsp_list_pci *rrb;
int rc;
@@ -431,7 +418,7 @@
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, fid, __clp_update);
+ rc = clp_list_pci(rrb, &fid, __clp_refresh_fh);
clp_free_block(rrb);
return rc;
@@ -490,7 +477,7 @@
}
}
-static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc_pci *lpcb)
{
unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 64b1399..ebc9a49 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -10,7 +10,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/iommu-helper.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/vmalloc.h>
#include <linux/pci.h>
#include <asm/pci_dma.h>
@@ -261,13 +261,11 @@
unsigned long start, int size)
{
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long boundary_size;
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- PAGE_SIZE) >> PAGE_SHIFT;
return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
start, size, zdev->start_dma >> PAGE_SHIFT,
- boundary_size, 0);
+ dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
+ 0);
}
static dma_addr_t dma_alloc_address(struct device *dev, int size)
@@ -670,6 +668,8 @@
.unmap_page = s390_dma_unmap_pages,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .alloc_pages = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 8d6ee4a..b7cfde7 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -14,6 +14,8 @@
#include <asm/pci_debug.h>
#include <asm/sclp.h>
+#include "pci_bus.h"
+
/* Content Code Description for PCI Function Error */
struct zpci_ccdf_err {
u32 reserved1;
@@ -53,7 +55,7 @@
zpci_err_hex(ccdf, sizeof(*ccdf));
if (zdev)
- pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+ pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -74,46 +76,52 @@
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = NULL;
enum zpci_state state;
+ struct pci_dev *pdev;
int ret;
- if (zdev)
- pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
-
- pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
- pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
zpci_err("avail CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
switch (ccdf->pec) {
case 0x0301: /* Reserved|Standby -> Configured */
if (!zdev) {
- ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
- if (ret)
- break;
- zdev = get_zdev_by_fid(ccdf->fid);
- }
- if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
+ zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
break;
- zdev->state = ZPCI_FN_STATE_CONFIGURED;
+ }
+ /* the configuration request may be stale */
+ if (zdev->state != ZPCI_FN_STATE_STANDBY)
+ break;
zdev->fh = ccdf->fh;
+ zdev->state = ZPCI_FN_STATE_CONFIGURED;
ret = zpci_enable_device(zdev);
if (ret)
break;
+
+ /* the PCI function will be scanned once function 0 appears */
+ if (!zdev->zbus->bus)
+ break;
+
+ pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);
+ if (!pdev)
+ break;
+
+ pci_bus_add_device(pdev);
pci_lock_rescan_remove();
- pci_rescan_bus(zdev->bus);
+ pci_bus_add_devices(zdev->zbus->bus);
pci_unlock_rescan_remove();
break;
case 0x0302: /* Reserved -> Standby */
- if (!zdev)
- clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+ if (!zdev) {
+ zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
+ break;
+ }
+ zdev->fh = ccdf->fh;
break;
case 0x0303: /* Deconfiguration requested */
if (!zdev)
break;
- if (pdev)
- pci_stop_and_remove_bus_device_locked(pdev);
+ zpci_remove_device(zdev, false);
ret = zpci_disable_device(zdev);
if (ret)
@@ -128,33 +136,31 @@
case 0x0304: /* Configured -> Standby|Reserved */
if (!zdev)
break;
- if (pdev) {
- /* Give the driver a hint that the function is
- * already unusable. */
- pdev->error_state = pci_channel_io_perm_failure;
- pci_stop_and_remove_bus_device_locked(pdev);
- }
+ /* Give the driver a hint that the function is
+ * already unusable.
+ */
+ zpci_remove_device(zdev, true);
zdev->fh = ccdf->fh;
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
if (!clp_get_state(ccdf->fid, &state) &&
state == ZPCI_FN_STATE_RESERVED) {
- zpci_remove_device(zdev);
+ zpci_device_reserved(zdev);
}
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
- clp_rescan_pci_devices();
+ zpci_remove_reserved_devices();
+ clp_scan_pci_devices();
break;
case 0x0308: /* Standby -> Reserved */
if (!zdev)
break;
- zpci_remove_device(zdev);
+ zpci_device_reserved(zdev);
break;
default:
break;
}
- pci_dev_put(pdev);
}
void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
new file mode 100644
index 0000000..ead062b
--- /dev/null
+++ b/arch/s390/pci/pci_iov.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include "pci_iov.h"
+
+static struct resource iov_res = {
+ .name = "PCI IOV res",
+ .start = 0,
+ .end = -1,
+ .flags = IORESOURCE_MEM,
+};
+
+void zpci_iov_map_resources(struct pci_dev *pdev)
+{
+ resource_size_t len;
+ int i;
+
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ int bar = i + PCI_IOV_RESOURCES;
+
+ len = pci_resource_len(pdev, bar);
+ if (!len)
+ continue;
+ pdev->resource[bar].parent = &iov_res;
+ }
+}
+
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn)
+{
+ pci_lock_rescan_remove();
+ /* Linux' vfid's start at 0 vfn at 1 */
+ pci_iov_remove_virtfn(pdev->physfn, vfn - 1);
+ pci_unlock_rescan_remove();
+}
+
+static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, int vfid)
+{
+ int rc;
+
+ rc = pci_iov_sysfs_link(pdev, virtfn, vfid);
+ if (rc)
+ return rc;
+
+ virtfn->is_virtfn = 1;
+ virtfn->multifunction = 0;
+ virtfn->physfn = pci_dev_get(pdev);
+
+ return 0;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ int i, cand_devfn;
+ struct zpci_dev *zdev;
+ struct pci_dev *pdev;
+ int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
+ int rc = 0;
+
+ if (!zbus->multifunction)
+ return 0;
+
+ /* If the parent PF for the given VF is also configured in the
+ * instance, it must be on the same zbus.
+ * We can then identify the parent PF by checking what
+ * devfn the VF would have if it belonged to that PF using the PF's
+ * stride and offset. Only if this candidate devfn matches the
+ * actual devfn will we link both functions.
+ */
+ for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) {
+ zdev = zbus->function[i];
+ if (zdev && zdev->is_physfn) {
+ pdev = pci_get_slot(zbus->bus, zdev->devfn);
+ if (!pdev)
+ continue;
+ cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
+ if (cand_devfn == virtfn->devfn) {
+ rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ break;
+ }
+ /* balance pci_get_slot() */
+ pci_dev_put(pdev);
+ }
+ }
+ return rc;
+}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
new file mode 100644
index 0000000..b2c8280
--- /dev/null
+++ b/arch/s390/pci/pci_iov.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ * Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#ifndef __S390_PCI_IOV_H
+#define __S390_PCI_IOV_H
+
+#ifdef CONFIG_PCI_IOV
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
+
+void zpci_iov_map_resources(struct pci_dev *pdev);
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+
+#else /* CONFIG_PCI_IOV */
+static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
+
+static inline void zpci_iov_map_resources(struct pci_dev *pdev) {}
+
+static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+ return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 743f257..75217fb 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -103,9 +103,10 @@
{
struct msi_desc *entry = irq_get_msi_desc(data->irq);
struct msi_msg msg = entry->msg;
+ int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
msg.address_lo &= 0xff0000ff;
- msg.address_lo |= (cpumask_first(dest) << 8);
+ msg.address_lo |= (cpu_addr << 8);
pci_write_msi_msg(data->irq, &msg);
return IRQ_SET_MASK_OK;
@@ -238,6 +239,7 @@
unsigned long bit;
struct msi_desc *msi;
struct msi_msg msg;
+ int cpu_addr;
int rc, irq;
zdev->aisb = -1UL;
@@ -287,9 +289,15 @@
handle_percpu_irq);
msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
+ if (msi->affinity)
+ cpu = cpumask_first(&msi->affinity->mask);
+ else
+ cpu = 0;
+ cpu_addr = smp_cpu_get_cpu_address(cpu);
+
msg.address_lo = zdev->msi_addr & 0xff0000ff;
- msg.address_lo |= msi->affinity ?
- (cpumask_first(&msi->affinity->mask) << 8) : 0;
+ msg.address_lo |= (cpu_addr << 8);
+
for_each_possible_cpu(cpu) {
airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
}
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 020a2c5..37b1bbd 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -125,17 +125,17 @@
struct vm_area_struct *vma;
long ret;
- down_read(¤t->mm->mmap_sem);
+ mmap_read_lock(current->mm);
ret = -EINVAL;
vma = find_vma(current->mm, user_addr);
- if (!vma)
+ if (!vma || user_addr < vma->vm_start)
goto out;
ret = -EACCES;
if (!(vma->vm_flags & access))
goto out;
ret = follow_pfn(vma, user_addr, pfn);
out:
- up_read(¤t->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
return ret;
}
@@ -155,10 +155,12 @@
return -EINVAL;
/*
- * Only support read access to MIO capable devices on a MIO enabled
- * system. Otherwise we would have to check for every address if it is
- * a special ZPCI_ADDR and we would have to do a get_pfn() which we
- * don't need for MIO capable devices.
+ * We only support write access to MIO capable devices if we are on
+ * a MIO enabled system. Otherwise we would have to check for every
+ * address if it is a special ZPCI_ADDR and would have to do
+ * a get_pfn() which we don't need for MIO capable devices. Currently
+ * ISM devices are the only devices without MIO support and there is no
+ * known need for accessing these from userspace.
*/
if (static_branch_likely(&have_mio)) {
ret = __memcpy_toio_inuser((void __iomem *) mmio_addr,
@@ -282,10 +284,12 @@
return -EINVAL;
/*
- * Only support write access to MIO capable devices on a MIO enabled
- * system. Otherwise we would have to check for every address if it is
- * a special ZPCI_ADDR and we would have to do a get_pfn() which we
- * don't need for MIO capable devices.
+ * We only support read access to MIO capable devices if we are on
+ * a MIO enabled system. Otherwise we would have to check for every
+ * address if it is a special ZPCI_ADDR and would have to do
+ * a get_pfn() which we don't need for MIO capable devices. Currently
+ * ISM devices are the only devices without MIO support and there is no
+ * known need for accessing these from userspace.
*/
if (static_branch_likely(&have_mio)) {
ret = __memcpy_fromio_inuser(
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 215f174..5c028be 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -33,6 +33,7 @@
zpci_attr(pfgid, "0x%02x\n", pfgid);
zpci_attr(vfn, "0x%04x\n", vfn);
zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(port, "%d\n", port);
zpci_attr(uid, "0x%x\n", uid);
zpci_attr(segment0, "0x%02x\n", pfip[0]);
zpci_attr(segment1, "0x%02x\n", pfip[1]);
@@ -88,7 +89,7 @@
ret = zpci_enable_device(zdev);
if (ret)
goto out;
- pci_rescan_bus(zdev->bus);
+ pci_rescan_bus(zdev->zbus->bus);
}
out:
pci_unlock_rescan_remove();
@@ -142,6 +143,7 @@
&dev_attr_pchid.attr,
&dev_attr_pfgid.attr,
&dev_attr_pft.attr,
+ &dev_attr_port.attr,
&dev_attr_vfn.attr,
&dev_attr_uid.attr,
&dev_attr_recover.attr,
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
index 04a0343..97ca527 100644
--- a/arch/s390/purgatory/.gitignore
+++ b/arch/s390/purgatory/.gitignore
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
purgatory
+purgatory.chk
purgatory.lds
purgatory.ro
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 9de5606..21c4ebe 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@
purgatory-y := head.o purgatory.o string.o sha256.o mem.o
-targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
@@ -24,19 +24,27 @@
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += -fno-stack-protector
KBUILD_CFLAGS += $(CLANG_FLAGS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
-LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T
+# Since we link purgatory with -r unresolved symbols are not checked, so we
+# also link a purgatory.chk binary without -r to check for unresolved symbols.
+PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib
+LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T
+LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS)
$(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
+$(obj)/purgatory.chk: $(obj)/purgatory FORCE
+ $(call if_changed,ld)
+
OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
-$(obj)/purgatory.ro: $(obj)/purgatory FORCE
+$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
$(call if_changed,objcopy)
$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
deleted file mode 100644
index f4f4c2c..0000000
--- a/arch/s390/scripts/Makefile.chkbss
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-chkbss-target ?= built-in.a
-$(obj)/$(chkbss-target): chkbss
-
-chkbss-files := $(addsuffix .chkbss, $(chkbss))
-clean-files += $(chkbss-files)
-
-PHONY += chkbss
-chkbss: $(addprefix $(obj)/, $(chkbss-files))
-
-quiet_cmd_chkbss = CHKBSS $<
- cmd_chkbss = \
- if ! $(OBJSIZE) --common $< | $(AWK) 'END { if ($$3) exit 1 }'; then \
- echo "error: $< .bss section is not empty" >&2; exit 1; \
- fi; \
- touch $@;
-
-$(obj)/%.o.chkbss: $(obj)/%.o
- $(call cmd,chkbss)
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
index 71bd6f8..ea62f37 100644
--- a/arch/s390/tools/.gitignore
+++ b/arch/s390/tools/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
gen_facilities
gen_opcode_table
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
index b5e35e8..f9dd47f 100644
--- a/arch/s390/tools/Makefile
+++ b/arch/s390/tools/Makefile
@@ -10,8 +10,8 @@
kapi: $(kapi-hdrs-y)
-hostprogs-y += gen_facilities
-hostprogs-y += gen_opcode_table
+hostprogs += gen_facilities
+hostprogs += gen_opcode_table
HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE)