Update Linux to v5.10.109

Sourced from [1]

[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz

Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild
index e63940b..8b98c50 100644
--- a/arch/s390/Kbuild
+++ b/arch/s390/Kbuild
@@ -7,5 +7,4 @@
 obj-$(CONFIG_APPLDATA_BASE)	+= appldata/
 obj-y				+= net/
 obj-$(CONFIG_PCI)		+= pci/
-obj-$(CONFIG_NUMA)		+= numa/
 obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += purgatory/
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0023b78..896b68e 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -30,7 +30,7 @@
 	def_bool y
 
 config GENERIC_LOCKBREAK
-	def_bool y if PREEMPT
+	def_bool y if PREEMPTION
 
 config PGSTE
 	def_bool y if KVM
@@ -59,6 +59,8 @@
 config S390
 	def_bool y
 	select ARCH_BINFMT_ELF_STATE
+	select ARCH_HAS_DEBUG_VM_PGTABLE
+	select ARCH_HAS_DEBUG_WX
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
@@ -72,6 +74,7 @@
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_VDSO_DATA
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
 	select ARCH_INLINE_READ_LOCK_BH
@@ -101,22 +104,24 @@
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
-	select ARCH_KEEP_MEMBLOCK
-	select ARCH_SAVE_PAGE_KEYS if HIBERNATION
 	select ARCH_STACKWALK
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_NUMA_BALANCING
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
+	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_IPC_PARSE_VERSION
-	select BUILDTIME_EXTABLE_SORT
+	select BUILDTIME_TABLE_SORT
 	select CLONE_BACKWARDS2
+	select DMA_OPS if PCI
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CPU_AUTOPROBE
 	select GENERIC_CPU_VULNERABILITIES
 	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_GETTIMEOFDAY
+	select GENERIC_PTDUMP
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
@@ -124,6 +129,7 @@
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN
+	select HAVE_ARCH_KASAN_VMALLOC
 	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SOFT_DIRTY
@@ -134,7 +140,6 @@
 	select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
-	select HAVE_COPY_THREAD_TLS
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
@@ -143,10 +148,14 @@
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_FENTRY
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FUNCTION_ERROR_INJECTION
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_GCC_PLUGINS
+	select HAVE_GENERIC_VDSO
+	select HAVE_IOREMAP_PROT if PCI
+	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZ4
@@ -155,21 +164,22 @@
 	select HAVE_KERNEL_UNCOMPRESSED
 	select HAVE_KERNEL_XZ
 	select HAVE_KPROBES
+	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_KRETPROBES
 	select HAVE_KVM
 	select HAVE_LIVEPATCH
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
-	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MEMBLOCK_PHYS_MAP
-	select HAVE_MMU_GATHER_NO_GATHER
+	select MMU_GATHER_NO_GATHER
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NOP_MCOUNT
 	select HAVE_OPROFILE
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
-	select HAVE_RCU_TABLE_FREE
+	select MMU_GATHER_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_RELIABLE_STACKTRACE
 	select HAVE_RSEQ
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
@@ -182,6 +192,8 @@
 	select OLD_SIGSUSPEND3
 	select PCI_DOMAINS		if PCI
 	select PCI_MSI			if PCI
+	select PCI_MSI_ARCH_FALLBACKS	if PCI_MSI
+	select SET_FS
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
@@ -192,6 +204,7 @@
 	select ARCH_HAS_FORCE_DMA_UNENCRYPTED
 	select SWIOTLB
 	select GENERIC_ALLOCATOR
+	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
 
 
 config SCHED_OMIT_FRAME_POINTER
@@ -246,8 +259,8 @@
 
 config MARCH_Z900
 	bool "IBM zSeries model z800 and z900"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z900_FEATURES
+	depends on $(cc-option,-march=z900)
 	help
 	  Select this to enable optimizations for model z800/z900 (2064 and
 	  2066 series). This will enable some optimizations that are not
@@ -255,8 +268,8 @@
 
 config MARCH_Z990
 	bool "IBM zSeries model z890 and z990"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z990_FEATURES
+	depends on $(cc-option,-march=z990)
 	help
 	  Select this to enable optimizations for model z890/z990 (2084 and
 	  2086 series). The kernel will be slightly faster but will not work
@@ -264,8 +277,8 @@
 
 config MARCH_Z9_109
 	bool "IBM System z9"
-	depends on !CC_IS_CLANG
 	select HAVE_MARCH_Z9_109_FEATURES
+	depends on $(cc-option,-march=z9-109)
 	help
 	  Select this to enable optimizations for IBM System z9 (2094 and
 	  2096 series). The kernel will be slightly faster but will not work
@@ -274,6 +287,7 @@
 config MARCH_Z10
 	bool "IBM System z10"
 	select HAVE_MARCH_Z10_FEATURES
+	depends on $(cc-option,-march=z10)
 	help
 	  Select this to enable optimizations for IBM System z10 (2097 and
 	  2098 series). The kernel will be slightly faster but will not work
@@ -282,6 +296,7 @@
 config MARCH_Z196
 	bool "IBM zEnterprise 114 and 196"
 	select HAVE_MARCH_Z196_FEATURES
+	depends on $(cc-option,-march=z196)
 	help
 	  Select this to enable optimizations for IBM zEnterprise 114 and 196
 	  (2818 and 2817 series). The kernel will be slightly faster but will
@@ -290,6 +305,7 @@
 config MARCH_ZEC12
 	bool "IBM zBC12 and zEC12"
 	select HAVE_MARCH_ZEC12_FEATURES
+	depends on $(cc-option,-march=zEC12)
 	help
 	  Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
 	  2827 series). The kernel will be slightly faster but will not work on
@@ -298,6 +314,7 @@
 config MARCH_Z13
 	bool "IBM z13s and z13"
 	select HAVE_MARCH_Z13_FEATURES
+	depends on $(cc-option,-march=z13)
 	help
 	  Select this to enable optimizations for IBM z13s and z13 (2965 and
 	  2964 series). The kernel will be slightly faster but will not work on
@@ -306,6 +323,7 @@
 config MARCH_Z14
 	bool "IBM z14 ZR1 and z14"
 	select HAVE_MARCH_Z14_FEATURES
+	depends on $(cc-option,-march=z14)
 	help
 	  Select this to enable optimizations for IBM z14 ZR1 and z14 (3907
 	  and 3906 series). The kernel will be slightly faster but will not
@@ -314,6 +332,7 @@
 config MARCH_Z15
 	bool "IBM z15"
 	select HAVE_MARCH_Z15_FEATURES
+	depends on $(cc-option,-march=z15)
 	help
 	  Select this to enable optimizations for IBM z15 (8562
 	  and 8561 series). The kernel will be slightly faster but will not
@@ -367,33 +386,39 @@
 
 config TUNE_Z900
 	bool "IBM zSeries model z800 and z900"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z900)
 
 config TUNE_Z990
 	bool "IBM zSeries model z890 and z990"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z990)
 
 config TUNE_Z9_109
 	bool "IBM System z9"
-	depends on !CC_IS_CLANG
+	depends on $(cc-option,-mtune=z9-109)
 
 config TUNE_Z10
 	bool "IBM System z10"
+	depends on $(cc-option,-mtune=z10)
 
 config TUNE_Z196
 	bool "IBM zEnterprise 114 and 196"
+	depends on $(cc-option,-mtune=z196)
 
 config TUNE_ZEC12
 	bool "IBM zBC12 and zEC12"
+	depends on $(cc-option,-mtune=zEC12)
 
 config TUNE_Z13
-	bool "IBM z13"
+	bool "IBM z13s and z13"
+	depends on $(cc-option,-mtune=z13)
 
 config TUNE_Z14
-	bool "IBM z14"
+	bool "IBM z14 ZR1 and z14"
+	depends on $(cc-option,-mtune=z14)
 
 config TUNE_Z15
 	bool "IBM z15"
+	depends on $(cc-option,-mtune=z15)
 
 endchoice
 
@@ -414,9 +439,6 @@
 	  (and some other stuff like libraries and such) is needed for
 	  executing 31 bit applications.  It is safe to say "Y".
 
-config COMPAT_VDSO
-	def_bool COMPAT && !CC_IS_CLANG
-
 config SYSVIPC_COMPAT
 	def_bool y if COMPAT && SYSVIPC
 
@@ -438,14 +460,6 @@
 config HOTPLUG_CPU
 	def_bool y
 
-# Some NUMA nodes have memory ranges that span
-# other nodes.	Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node.	See memmap_init_zone()
-# for details. <- They meant memory holes!
-config NODES_SPAN_OTHER_NODES
-	def_bool NUMA
-
 config NUMA
 	bool "NUMA support"
 	depends on SCHED_TOPOLOGY
@@ -455,58 +469,10 @@
 
 	  This option adds NUMA support to the kernel.
 
-	  An operation mode can be selected by appending
-	  numa=<method> to the kernel command line.
-
-	  The default behaviour is identical to appending numa=plain to
-	  the command line. This will create just one node with all
-	  available memory and all CPUs in it.
-
 config NODES_SHIFT
-	int "Maximum NUMA nodes (as a power of 2)"
-	range 1 10
-	depends on NUMA
-	default "4"
-	help
-	  Specify the maximum number of NUMA nodes available on the target
-	  system. Increases memory reserved to accommodate various tables.
-
-menu "Select NUMA modes"
-	depends on NUMA
-
-config NUMA_EMU
-	bool "NUMA emulation"
-	default y
-	help
-	  Numa emulation mode will split the available system memory into
-	  equal chunks which then are distributed over the configured number
-	  of nodes in a round-robin manner.
-
-	  The number of fake nodes is limited by the number of available memory
-	  chunks (i.e. memory size / fake size) and the number of supported
-	  nodes in the kernel.
-
-	  The CPUs are assigned to the nodes in a way that partially respects
-	  the original machine topology (if supported by the machine).
-	  Fair distribution of the CPUs is not guaranteed.
-
-config EMU_SIZE
-	hex "NUMA emulation memory chunk size"
-	default 0x10000000
-	range 0x400000 0x100000000
-	depends on NUMA_EMU
-	help
-	  Select the default size by which the memory is chopped and then
-	  assigned to emulated NUMA nodes.
-
-	  This can be overridden by specifying
-
-	  emu_size=<n>
-
-	  on the kernel command line where also suffixes K, M, G, and T are
-	  supported.
-
-endmenu
+	int
+	depends on NEED_MULTIPLE_NODES
+	default "1"
 
 config SCHED_SMT
 	def_bool n
@@ -669,10 +635,6 @@
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	def_bool y
 
-config FORCE_MAX_ZONEORDER
-	int
-	default "9"
-
 config MAX_PHYSMEM_BITS
 	int "Maximum size of supported physical memory in bits (42-53)"
 	range 42 53
@@ -740,7 +702,7 @@
 config QDIO
 	def_tristate y
 	prompt "QDIO support"
-	---help---
+	help
 	  This driver provides the Queued Direct I/O base support for
 	  IBM System z.
 
@@ -812,6 +774,7 @@
 	def_tristate n
 	prompt "VFIO support for AP devices"
 	depends on S390_AP_IOMMU && VFIO_MDEV_DEVICE && KVM
+	depends on ZCRYPT
 	help
 		This driver grants access to Adjunct Processor (AP) devices
 		via the VFIO mediated device interface.
@@ -837,32 +800,6 @@
 
 endmenu
 
-config SECCOMP
-	def_bool y
-	prompt "Enable seccomp to safely compute untrusted bytecode"
-	depends on PROC_FS
-	help
-	  This kernel feature is useful for number crunching applications
-	  that may need to compute untrusted bytecode during their
-	  execution. By using pipes or other transports made available to
-	  the process as file descriptors supporting the read/write
-	  syscalls, it's possible to isolate those applications in
-	  their own address space using seccomp. Once seccomp is
-	  enabled via /proc/<pid>/seccomp, it cannot be disabled
-	  and the task is only allowed to execute a few safe syscalls
-	  defined by each seccomp mode.
-
-	  If unsure, say Y.
-
-menu "Power Management"
-
-config ARCH_HIBERNATION_POSSIBLE
-	def_bool y
-
-source "kernel/power/Kconfig"
-
-endmenu
-
 config CCW
 	def_bool y
 
@@ -875,6 +812,7 @@
 config PROTECTED_VIRTUALIZATION_GUEST
 	def_bool n
 	prompt "Protected virtualization guest support"
+	select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
 	help
 	  Select this option, if you want to be able to run this
 	  kernel as a protected virtualization KVM guest.
@@ -997,7 +935,6 @@
 	select TTY
 	select VIRTUALIZATION
 	select VIRTIO
-	select VIRTIO_CONSOLE
 	help
 	  Enabling this option adds support for virtio based paravirtual device
 	  drivers on s390.
@@ -1006,3 +943,17 @@
 	  the KVM hypervisor.
 
 endmenu
+
+menu "Selftests"
+
+config S390_UNWIND_SELFTEST
+	def_tristate n
+	prompt "Test unwind functions"
+	help
+	  This option enables s390 specific stack unwinder testing kernel
+	  module. This option is not useful for distributions or general
+	  kernels, but only for kernel developers working on architecture code.
+
+	  Say N if you are unsure.
+
+endmenu
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index 1905275..ab48b69 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -3,17 +3,5 @@
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
-config S390_PTDUMP
-	bool "Export kernel pagetable layout to userspace via debugfs"
-	depends on DEBUG_KERNEL
-	select DEBUG_FS
-	---help---
-	  Say Y here if you want to show the kernel pagetable layout in a
-	  debugfs file. This information is only useful for kernel developers
-	  who are working in architecture specific areas of the kernel.
-	  It is probably not a good idea to enable this feature in a production
-	  kernel.
-	  If in doubt, say "N"
-
 config EARLY_PRINTK
 	def_bool y
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 2faaf45..9250691 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -27,7 +27,8 @@
 KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
 KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
-KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
+KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding
+KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector
 KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
@@ -157,7 +158,6 @@
 
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
-	$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index aa738ca..d74a4c7 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -51,10 +51,9 @@
  */
 static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
 static int appldata_timer_handler(struct ctl_table *ctl, int write,
-				  void __user *buffer, size_t *lenp, loff_t *ppos);
+				  void *buffer, size_t *lenp, loff_t *ppos);
 static int appldata_interval_handler(struct ctl_table *ctl, int write,
-					 void __user *buffer,
-					 size_t *lenp, loff_t *ppos);
+				     void *buffer, size_t *lenp, loff_t *ppos);
 
 static struct ctl_table_header *appldata_sysctl_header;
 static struct ctl_table appldata_table[] = {
@@ -217,7 +216,7 @@
  */
 static int
 appldata_timer_handler(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int timer_active = appldata_timer_active;
 	int rc;
@@ -250,7 +249,7 @@
  */
 static int
 appldata_interval_handler(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int interval = appldata_interval;
 	int rc;
@@ -280,7 +279,7 @@
  */
 static int
 appldata_generic_handler(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct appldata_ops *ops = NULL, *tmp_ops;
 	struct list_head *lh;
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index e68136c..21c3147 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -29,10 +29,6 @@
  * the structure version (product ID, see appldata_base.c) needs to be changed
  * as well and all documentation and z/VM applications using it must be
  * updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
  */
 struct appldata_mem_data {
 	u64 timestamp;
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 8bc14b0..59c282c 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -25,10 +25,6 @@
  * This is accessed as binary data by z/VM. If changes to it can't be avoided,
  * the structure version (product ID, see appldata_base.c) needs to be changed
  * as well and all documentation and z/VM applications using it must be updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
  */
 struct appldata_net_sum_data {
 	u64 timestamp;
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 54f3756..a363d30 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -32,10 +32,6 @@
  * the structure version (product ID, see appldata_base.c) needs to be changed
  * as well and all documentation and z/VM applications using it must be
  * updated.
- *
- * The record layout is documented in the Linux for zSeries Device Drivers
- * book:
- * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
  */
 struct appldata_os_per_cpu {
 	u32 per_cpu_user;	/* timer ticks spent in user mode   */
@@ -75,7 +71,7 @@
 				   (waiting for I/O)               */
 
 	/* per cpu data */
-	struct appldata_os_per_cpu os_cpu[0];
+	struct appldata_os_per_cpu os_cpu[];
 } __attribute__((packed));
 
 static struct appldata_os_data *appldata_os_data;
@@ -133,8 +129,7 @@
 
 	os_data->nr_cpus = j;
 
-	new_size = sizeof(struct appldata_os_data) +
-		   (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu));
+	new_size = struct_size(os_data, os_cpu, os_data->nr_cpus);
 	if (ops.size != new_size) {
 		if (ops.active) {
 			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
@@ -169,8 +164,7 @@
 {
 	int rc, max_size;
 
-	max_size = sizeof(struct appldata_os_data) +
-		   (num_possible_cpus() * sizeof(struct appldata_os_per_cpu));
+	max_size = struct_size(appldata_os_data, os_cpu, num_possible_cpus());
 	if (max_size > APPLDATA_MAX_REC_SIZE) {
 		pr_err("Maximum OS record size %i exceeds the maximum "
 		       "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE);
diff --git a/arch/s390/boot/.gitignore b/arch/s390/boot/.gitignore
index 16ff906..b265bfe 100644
--- a/arch/s390/boot/.gitignore
+++ b/arch/s390/boot/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 image
 bzImage
 section_cmp.*
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 0ff9261..41a64b8 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -37,7 +37,7 @@
 obj-y	:= head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
 obj-y	+= string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
 obj-y	+= version.o pgm_check_info.o ctype.o text_dma.o
-obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST)	+= uv.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
 obj-$(CONFIG_RELOCATABLE)	+= machine_kexec_reloc.o
 obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 targets	:= bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
@@ -73,7 +73,3 @@
 install:
 	sh -x  $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
 	      System.map "$(INSTALL_PATH)"
-
-chkbss := $(obj-y)
-chkbss-target := startup.a
-include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/.gitignore b/arch/s390/boot/compressed/.gitignore
index e72fcd7..765a08f 100644
--- a/arch/s390/boot/compressed/.gitignore
+++ b/arch/s390/boot/compressed/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 vmlinux
 vmlinux.lds
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index fa529c5..b235ed9 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -62,7 +62,3 @@
 OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
 $(obj)/piggy.o: $(obj)/vmlinux.bin$(suffix-y) FORCE
 	$(call if_changed,objcopy)
-
-chkbss := $(filter-out piggy.o info.o, $(obj-y))
-chkbss-target := vmlinux.bin
-include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/boot/compressed/decompressor.c b/arch/s390/boot/compressed/decompressor.c
index 4504663..3061b11 100644
--- a/arch/s390/boot/compressed/decompressor.c
+++ b/arch/s390/boot/compressed/decompressor.c
@@ -16,7 +16,6 @@
  * gzip declarations
  */
 #define STATIC static
-#define STATIC_RW_DATA static __section(.data)
 
 #undef memset
 #undef memcpy
@@ -30,13 +29,13 @@
 extern unsigned char _compressed_end[];
 
 #ifdef CONFIG_HAVE_KERNEL_BZIP2
-#define HEAP_SIZE	0x400000
+#define BOOT_HEAP_SIZE	0x400000
 #else
-#define HEAP_SIZE	0x10000
+#define BOOT_HEAP_SIZE	0x10000
 #endif
 
 static unsigned long free_mem_ptr = (unsigned long) _end;
-static unsigned long free_mem_end_ptr = (unsigned long) _end + HEAP_SIZE;
+static unsigned long free_mem_end_ptr = (unsigned long) _end + BOOT_HEAP_SIZE;
 
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
@@ -62,7 +61,7 @@
 #include "../../../../lib/decompress_unxz.c"
 #endif
 
-#define decompress_offset ALIGN((unsigned long)_end + HEAP_SIZE, PAGE_SIZE)
+#define decompress_offset ALIGN((unsigned long)_end + BOOT_HEAP_SIZE, PAGE_SIZE)
 
 unsigned long mem_safe_offset(void)
 {
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 44561b2..9427e2c 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -59,6 +59,19 @@
 	BOOT_DATA_PRESERVED
 
 	/*
+	 * This is the BSS section of the decompressor and not of the decompressed Linux kernel.
+	 * It will consume place in the decompressor's image.
+	 */
+	. = ALIGN(8);
+	.bss : {
+		_bss = . ;
+		*(.bss)
+		*(.bss.*)
+		*(COMMON)
+		_ebss = .;
+	}
+
+	/*
 	 * uncompressed image info used by the decompressor it should match
 	 * struct vmlinux_info. It comes from .vmlinux.info section of
 	 * uncompressed vmlinux in a form of info.o
@@ -81,15 +94,6 @@
 		FILL(0xff);
 		. = ALIGN(4096);
 	}
-	. = ALIGN(256);
-	.bss : {
-		_bss = . ;
-		*(.bss)
-		*(.bss.*)
-		*(COMMON)
-		. = ALIGN(8);	/* For convenience during zeroing */
-		_ebss = .;
-	}
 	_end = .;
 
 	/* Sections to be discarded */
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index e6bf5f4..1a2c2b1 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -329,7 +329,7 @@
 	.quad	.Lduct			# cr5: primary-aste origin
 	.quad	0			# cr6:	I/O interrupts
 	.quad	0			# cr7:	secondary space segment table
-	.quad	0			# cr8:	access registers translation
+	.quad	0x0000000000008000	# cr8:	access registers translation
 	.quad	0			# cr9:	tracing off
 	.quad	0			# cr10: tracing off
 	.quad	0			# cr11: tracing off
diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh
index bed227f..515b27a 100644
--- a/arch/s390/boot/install.sh
+++ b/arch/s390/boot/install.sh
@@ -21,15 +21,10 @@
 if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
 if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
 
-# Default install - same as make zlilo
+echo "Warning: '${INSTALLKERNEL}' command not available - additional " \
+     "bootloader config required" >&2
+if [ -f $4/vmlinuz-$1 ]; then mv $4/vmlinuz-$1 $4/vmlinuz-$1.old; fi
+if [ -f $4/System.map-$1 ]; then mv $4/System.map-$1 $4/System.map-$1.old; fi
 
-if [ -f $4/vmlinuz ]; then
-	mv $4/vmlinuz $4/vmlinuz.old
-fi
-
-if [ -f $4/System.map ]; then
-	mv $4/System.map $4/System.old
-fi
-
-cat $2 > $4/vmlinuz
-cp $3 $4/System.map
+cat $2 > $4/vmlinuz-$1
+cp $3 $4/System.map-$1
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 75905b5..c56bbf5 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -2,25 +2,26 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/ctype.h>
+#include <linux/pgtable.h>
 #include <asm/ebcdic.h>
 #include <asm/sclp.h>
 #include <asm/sections.h>
 #include <asm/boot_data.h>
 #include <asm/facility.h>
-#include <asm/pgtable.h>
 #include <asm/uv.h>
 #include "boot.h"
 
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
 struct ipl_parameter_block __bootdata_preserved(ipl_block);
 int __bootdata_preserved(ipl_block_valid);
+unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
 
 unsigned long __bootdata(vmalloc_size) = VMALLOC_DEFAULT_SIZE;
 unsigned long __bootdata(memory_end);
 int __bootdata(memory_end_set);
 int __bootdata(noexec_disabled);
 
-int kaslr_enabled __section(.data);
+int kaslr_enabled;
 
 static inline int __diag308(unsigned long subcode, void *addr)
 {
@@ -72,30 +73,44 @@
 static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
 					  const struct ipl_parameter_block *ipb)
 {
-	size_t count;
-	size_t i;
+	const __u8 *scp_data;
+	__u32 scp_data_len;
 	int has_lowercase;
+	size_t count = 0;
+	size_t i;
 
-	count = min(size - 1, scpdata_length(ipb->fcp.scp_data,
-					     ipb->fcp.scp_data_len));
+	switch (ipb->pb0_hdr.pbt) {
+	case IPL_PBT_FCP:
+		scp_data_len = ipb->fcp.scp_data_len;
+		scp_data = ipb->fcp.scp_data;
+		break;
+	case IPL_PBT_NVME:
+		scp_data_len = ipb->nvme.scp_data_len;
+		scp_data = ipb->nvme.scp_data;
+		break;
+	default:
+		goto out;
+	}
+
+	count = min(size - 1, scpdata_length(scp_data, scp_data_len));
 	if (!count)
 		goto out;
 
 	has_lowercase = 0;
 	for (i = 0; i < count; i++) {
-		if (!isascii(ipb->fcp.scp_data[i])) {
+		if (!isascii(scp_data[i])) {
 			count = 0;
 			goto out;
 		}
-		if (!has_lowercase && islower(ipb->fcp.scp_data[i]))
+		if (!has_lowercase && islower(scp_data[i]))
 			has_lowercase = 1;
 	}
 
 	if (has_lowercase)
-		memcpy(dest, ipb->fcp.scp_data, count);
+		memcpy(dest, scp_data, count);
 	else
 		for (i = 0; i < count; i++)
-			dest[i] = tolower(ipb->fcp.scp_data[i]);
+			dest[i] = tolower(scp_data[i]);
 out:
 	dest[count] = '\0';
 	return count;
@@ -117,6 +132,7 @@
 			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
 		break;
 	case IPL_PBT_FCP:
+	case IPL_PBT_NVME:
 		rc = ipl_block_get_ascii_scpdata(
 			parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
 		break;
@@ -211,7 +227,7 @@
 	check_cleared_facilities();
 }
 
-static char command_line_buf[COMMAND_LINE_SIZE] __section(.data);
+static char command_line_buf[COMMAND_LINE_SIZE];
 void parse_boot_command_line(void)
 {
 	char *param, *val;
@@ -232,6 +248,19 @@
 		if (!strcmp(param, "vmalloc") && val)
 			vmalloc_size = round_up(memparse(val, NULL), PAGE_SIZE);
 
+		if (!strcmp(param, "dfltcc") && val) {
+			if (!strcmp(val, "off"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_DISABLED;
+			else if (!strcmp(val, "on"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_FULL;
+			else if (!strcmp(val, "def_only"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_DEFLATE_ONLY;
+			else if (!strcmp(val, "inf_only"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_INFLATE_ONLY;
+			else if (!strcmp(val, "always"))
+				zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
+		}
+
 		if (!strcmp(param, "noexec")) {
 			rc = kstrtobool(val, &enabled);
 			if (!rc && !enabled)
@@ -243,17 +272,34 @@
 
 		if (!strcmp(param, "nokaslr"))
 			kaslr_enabled = 0;
+
+#if IS_ENABLED(CONFIG_KVM)
+		if (!strcmp(param, "prot_virt")) {
+			rc = kstrtobool(val, &enabled);
+			if (!rc && enabled)
+				prot_virt_host = 1;
+		}
+#endif
 	}
 }
 
+static inline bool is_ipl_block_dump(void)
+{
+	if (ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+	    ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
+		return true;
+	if (ipl_block.pb0_hdr.pbt == IPL_PBT_NVME &&
+	    ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+		return true;
+	return false;
+}
+
 void setup_memory_end(void)
 {
 #ifdef CONFIG_CRASH_DUMP
 	if (OLDMEM_BASE) {
 		kaslr_enabled = 0;
-	} else if (ipl_block_valid &&
-		   ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
-		   ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
+	} else if (ipl_block_valid && is_ipl_block_dump()) {
 		kaslr_enabled = 0;
 		if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
 			memory_end_set = 1;
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 5591243..d844a5e 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -2,8 +2,8 @@
 /*
  * Copyright IBM Corp. 2019
  */
+#include <linux/pgtable.h>
 #include <asm/mem_detect.h>
-#include <asm/pgtable.h>
 #include <asm/cpacf.h>
 #include <asm/timex.h>
 #include <asm/sclp.h>
@@ -42,7 +42,7 @@
 		return PRNG_MODE_TDES;
 }
 
-static unsigned long get_random(unsigned long limit)
+static int get_random(unsigned long limit, unsigned long *value)
 {
 	struct prng_parm prng = {
 		/* initial parameter block for tdes mode, copied from libica */
@@ -84,19 +84,101 @@
 			  (u8 *) &random, sizeof(random));
 		break;
 	default:
-		random = 0;
+		return -1;
 	}
-	return random % limit;
+	*value = random % limit;
+	return 0;
+}
+
+/*
+ * To randomize kernel base address we have to consider several facts:
+ * 1. physical online memory might not be continuous and have holes. mem_detect
+ *    info contains list of online memory ranges we should consider.
+ * 2. we have several memory regions which are occupied and we should not
+ *    overlap and destroy them. Currently safe_addr tells us the border below
+ *    which all those occupied regions are. We are safe to use anything above
+ *    safe_addr.
+ * 3. the upper limit might apply as well, even if memory above that limit is
+ *    online. Currently those limitations are:
+ *    3.1. Limit set by "mem=" kernel command line option
+ *    3.2. memory reserved at the end for kasan initialization.
+ * 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
+ *    Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
+ *    (16 pages when the kernel is built with kasan enabled)
+ * Assumptions:
+ * 1. kernel size (including .bss size) and upper memory limit are page aligned.
+ * 2. mem_detect memory region start is THREAD_SIZE aligned / end is PAGE_SIZE
+ *    aligned (in practice memory configurations granularity on z/VM and LPAR
+ *    is 1mb).
+ *
+ * To guarantee uniform distribution of kernel base address among all suitable
+ * addresses we generate random value just once. For that we need to build a
+ * continuous range in which every value would be suitable. We can build this
+ * range by simply counting all suitable addresses (let's call them positions)
+ * which would be valid as kernel base address. To count positions we iterate
+ * over online memory ranges. For each range which is big enough for the
+ * kernel image we count all suitable addresses we can put the kernel image at
+ * that is
+ * (end - start - kernel_size) / THREAD_SIZE + 1
+ * Two functions count_valid_kernel_positions and position_to_address help
+ * to count positions in memory range given and then convert position back
+ * to address.
+ */
+static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
+						  unsigned long _min,
+						  unsigned long _max)
+{
+	unsigned long start, end, pos = 0;
+	int i;
+
+	for_each_mem_detect_block(i, &start, &end) {
+		if (_min >= end)
+			continue;
+		if (start >= _max)
+			break;
+		start = max(_min, start);
+		end = min(_max, end);
+		if (end - start < kernel_size)
+			continue;
+		pos += (end - start - kernel_size) / THREAD_SIZE + 1;
+	}
+
+	return pos;
+}
+
+static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
+				 unsigned long _min, unsigned long _max)
+{
+	unsigned long start, end;
+	int i;
+
+	for_each_mem_detect_block(i, &start, &end) {
+		if (_min >= end)
+			continue;
+		if (start >= _max)
+			break;
+		start = max(_min, start);
+		end = min(_max, end);
+		if (end - start < kernel_size)
+			continue;
+		if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
+			return start + (pos - 1) * THREAD_SIZE;
+		pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
+	}
+
+	return 0;
 }
 
 unsigned long get_random_base(unsigned long safe_addr)
 {
-	unsigned long memory_limit = memory_end_set ? memory_end : 0;
-	unsigned long base, start, end, kernel_size;
-	unsigned long block_sum, offset;
+	unsigned long memory_limit = get_mem_detect_end();
+	unsigned long base_pos, max_pos, kernel_size;
 	unsigned long kasan_needs;
 	int i;
 
+	if (memory_end_set)
+		memory_limit = min(memory_limit, memory_end);
+
 	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) {
 		if (safe_addr < INITRD_START + INITRD_SIZE)
 			safe_addr = INITRD_START + INITRD_SIZE;
@@ -126,45 +208,17 @@
 	}
 
 	kernel_size = vmlinux.image_size + vmlinux.bss_size;
-	block_sum = 0;
-	for_each_mem_detect_block(i, &start, &end) {
-		if (memory_limit) {
-			if (start >= memory_limit)
-				break;
-			if (end > memory_limit)
-				end = memory_limit;
-		}
-		if (end - start < kernel_size)
-			continue;
-		block_sum += end - start - kernel_size;
-	}
-	if (!block_sum) {
+	if (safe_addr + kernel_size > memory_limit)
+		return 0;
+
+	max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit);
+	if (!max_pos) {
 		sclp_early_printk("KASLR disabled: not enough memory\n");
 		return 0;
 	}
 
-	base = get_random(block_sum);
-	if (base == 0)
+	/* we need a value in the range [1, base_pos] inclusive */
+	if (get_random(max_pos, &base_pos))
 		return 0;
-	if (base < safe_addr)
-		base = safe_addr;
-	block_sum = offset = 0;
-	for_each_mem_detect_block(i, &start, &end) {
-		if (memory_limit) {
-			if (start >= memory_limit)
-				break;
-			if (end > memory_limit)
-				end = memory_limit;
-		}
-		if (end - start < kernel_size)
-			continue;
-		block_sum += end - start - kernel_size;
-		if (base <= block_sum) {
-			base = start + base - offset;
-			base = ALIGN_DOWN(base, THREAD_SIZE);
-			break;
-		}
-		offset = block_sum;
-	}
-	return base;
+	return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
 }
diff --git a/arch/s390/boot/pgm_check_info.c b/arch/s390/boot/pgm_check_info.c
index 83b5b79..a3c9862 100644
--- a/arch/s390/boot/pgm_check_info.c
+++ b/arch/s390/boot/pgm_check_info.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <asm/lowcore.h>
+#include <asm/setup.h>
 #include <asm/sclp.h>
 #include "boot.h"
 
@@ -32,7 +33,8 @@
 	char *p;
 
 	add_str(buf, "Linux version ");
-	strlcat(buf, kernel_version, sizeof(buf));
+	strlcat(buf, kernel_version, sizeof(buf) - 1);
+	strlcat(buf, "\n", sizeof(buf));
 	sclp_early_printk(buf);
 
 	p = add_str(buf, "Kernel fault: interruption code ");
@@ -42,6 +44,13 @@
 	add_str(p, "\n");
 	sclp_early_printk(buf);
 
+	if (kaslr_enabled) {
+		p = add_str(buf, "Kernel random base: ");
+		p = add_val_as_hex(p, __kaslr_offset);
+		add_str(p, "\n");
+		sclp_early_printk(buf);
+	}
+
 	p = add_str(buf, "PSW : ");
 	p = add_val_as_hex(p, S390_lowcore.psw_save_area.mask);
 	p = add_str(p, " ");
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index fa01502..cc96b04 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -48,8 +48,6 @@
 };
 static struct diag210 _diag210_tmp_dma __section(".dma.data");
 struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
-void _swsusp_reset_dma(void);
-unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
 
 void error(char *x)
 {
@@ -120,6 +118,9 @@
 	}
 }
 
+/*
+ * This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
+ */
 static void clear_bss_section(void)
 {
 	memset((void *)vmlinux.default_lma + vmlinux.image_size, 0, vmlinux.bss_size);
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
index ea93314..5ff5fee 100644
--- a/arch/s390/boot/text_dma.S
+++ b/arch/s390/boot/text_dma.S
@@ -86,23 +86,6 @@
 ENDPROC(_diag0c_dma)
 
 /*
- * void _swsusp_reset_dma(void)
- */
-ENTRY(_swsusp_reset_dma)
-	larl	%r1,restart_entry
-	larl	%r2,.Lrestart_diag308_psw
-	og	%r1,0(%r2)
-	stg	%r1,0(%r0)
-	lghi	%r0,0
-	diag	%r0,%r0,0x308
-restart_entry:
-	lhi	%r1,1
-	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE
-	sam64
-	BR_EX_DMA_r14
-ENDPROC(_swsusp_reset_dma)
-
-/*
  * void _diag308_reset_dma(void)
  *
  * Calls diag 308 subcode 1 and continues execution
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index 3f50115..b3501ea 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -3,7 +3,14 @@
 #include <asm/facility.h>
 #include <asm/sections.h>
 
+/* will be used in arch/s390/kernel/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 int __bootdata_preserved(prot_virt_guest);
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+#endif
+struct uv_info __bootdata_preserved(uv_info);
 
 void uv_query_info(void)
 {
@@ -19,7 +26,22 @@
 	if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
 		return;
 
+	if (IS_ENABLED(CONFIG_KVM)) {
+		memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list));
+		uv_info.uv_base_stor_len = uvcb.uv_base_stor_len;
+		uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len;
+		uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len;
+		uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len;
+		uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len;
+		uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
+		uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
+		uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
+		uv_info.uv_feature_indications = uvcb.uv_feature_indications;
+	}
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 	if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
 	    test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
 		prot_virt_guest = 1;
+#endif
 }
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 38d6403..fe6f529 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -1,5 +1,7 @@
+CONFIG_UAPI_HEADER_TEST=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
 CONFIG_AUDIT=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -14,7 +16,6 @@
 CONFIG_IKCONFIG_PROC=y
 CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
@@ -31,9 +32,9 @@
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 # CONFIG_SYSFS_SYSCALL is not set
+CONFIG_BPF_LSM=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -51,18 +52,15 @@
 CONFIG_VFIO_CCW=m
 CONFIG_VFIO_AP=m
 CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
 CONFIG_CMM=m
 CONFIG_APPLDATA_BASE=y
 CONFIG_KVM=m
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
+CONFIG_S390_UNWIND_SELFTEST=y
 CONFIG_OPROFILE=m
 CONFIG_KPROBES=y
 CONFIG_JUMP_LABEL=y
 CONFIG_STATIC_KEYS_SELFTEST=y
-CONFIG_REFCOUNT_FULL=y
 CONFIG_LOCK_EVENT_COUNTS=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
@@ -77,6 +75,8 @@
 CONFIG_BLK_WBT=y
 CONFIG_BLK_CGROUP_IOLATENCY=y
 CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -94,10 +94,10 @@
 CONFIG_FRONTSWAP=y
 CONFIG_CMA_DEBUG=y
 CONFIG_CMA_DEBUGFS=y
+CONFIG_CMA_AREAS=7
 CONFIG_MEM_SOFT_DIRTY=y
 CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
-CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC=y
 CONFIG_ZSMALLOC_STAT=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
@@ -130,6 +130,7 @@
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
@@ -144,6 +145,7 @@
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_MIP6=m
 CONFIG_IPV6_VTI=m
@@ -151,7 +153,10 @@
 CONFIG_IPV6_GRE=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
 CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_SECMARK=y
 CONFIG_NF_CONNTRACK_EVENTS=y
@@ -317,6 +322,7 @@
 CONFIG_L2TP_IP=m
 CONFIG_L2TP_ETH=m
 CONFIG_BRIDGE=m
+CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_NET_SCHED=y
@@ -341,6 +347,7 @@
 CONFIG_NET_SCH_FQ_CODEL=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
 CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_CLS_ROUTE4=m
@@ -364,6 +371,7 @@
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
 CONFIG_VSOCKETS=m
@@ -372,14 +380,14 @@
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
-# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
 CONFIG_PCI_DEBUG=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_DEVTMPFS=y
 CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
+CONFIG_ZRAM=y
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
@@ -435,6 +443,7 @@
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_MULTIPATH_QL=m
 CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
 CONFIG_DM_DELAY=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
@@ -448,6 +457,8 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
 CONFIG_TUN=m
 CONFIG_VETH=m
 CONFIG_VIRTIO_NET=m
@@ -475,14 +486,12 @@
 # CONFIG_NET_VENDOR_EMULEX is not set
 # CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_GOOGLE is not set
-# CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
 CONFIG_MLX5_CORE=m
 CONFIG_MLX5_CORE_EN=y
-# CONFIG_MLXFW is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_MICROCHIP is not set
 # CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -515,6 +524,7 @@
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -533,6 +543,7 @@
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
 CONFIG_HANGCHECK_TIMER=m
@@ -561,6 +572,8 @@
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -608,11 +621,13 @@
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_NTFS_FS=m
 CONFIG_NTFS_RW=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -650,8 +665,8 @@
 CONFIG_DLM=m
 CONFIG_UNICODE=y
 CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_FORTIFY_SOURCE=y
@@ -675,8 +690,12 @@
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_GCM=y
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_SEQIV=y
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_PCBC=m
@@ -685,14 +704,13 @@
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
@@ -702,6 +720,7 @@
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -720,6 +739,9 @@
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 CONFIG_ZCRYPT=m
 CONFIG_PKEY=m
 CONFIG_CRYPTO_PAES_S390=m
@@ -732,6 +754,7 @@
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_CORDIC=m
 CONFIG_CRC32_SELFTEST=y
 CONFIG_CRC4=m
@@ -749,12 +772,13 @@
 CONFIG_GDB_SCRIPTS=y
 CONFIG_FRAME_WARN=1024
 CONFIG_HEADERS_INSTALL=y
-CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_PAGEALLOC=y
 CONFIG_PAGE_OWNER=y
 CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_SELFTEST=y
 CONFIG_DEBUG_OBJECTS_FREE=y
@@ -773,9 +797,10 @@
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
 CONFIG_DEBUG_SHIRQ=y
+CONFIG_PANIC_ON_OOPS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_WQ_WATCHDOG=y
-CONFIG_PANIC_ON_OOPS=y
+CONFIG_TEST_LOCKUP=m
 CONFIG_DEBUG_TIMEKEEPING=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_LOCK_STAT=y
@@ -784,36 +809,43 @@
 CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_DEBUG_CREDENTIALS=y
 CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
+# CONFIG_RCU_TRACE is not set
+CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_HIST_TRIGGERS=y
 CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
 CONFIG_FAILSLAB=y
 CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAULT_INJECTION_USERCOPY=y
 CONFIG_FAIL_MAKE_REQUEST=y
 CONFIG_FAIL_IO_TIMEOUT=y
 CONFIG_FAIL_FUTEX=y
 CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAIL_FUNCTION=y
 CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
-CONFIG_LATENCYTOP=y
-CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPT_TRACER=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-CONFIG_HIST_TRIGGERS=y
 CONFIG_LKDTM=m
 CONFIG_TEST_LIST_SORT=y
+CONFIG_TEST_MIN_HEAP=y
 CONFIG_TEST_SORT=y
 CONFIG_KPROBES_SANITY_TEST=y
 CONFIG_RBTREE_TEST=y
 CONFIG_INTERVAL_TREE_TEST=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
+CONFIG_TEST_BITOPS=m
 CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 25f7998..17d5df2 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -1,5 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_WATCH_QUEUE=y
 CONFIG_AUDIT=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -13,7 +14,6 @@
 CONFIG_IKCONFIG_PROC=y
 CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
-CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
@@ -30,9 +30,9 @@
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
-CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 # CONFIG_SYSFS_SYSCALL is not set
+CONFIG_BPF_LSM=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
@@ -41,7 +41,6 @@
 CONFIG_TUNE_ZEC12=y
 CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
-# CONFIG_NUMA_EMU is not set
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
 CONFIG_KEXEC_SIG=y
@@ -51,13 +50,11 @@
 CONFIG_VFIO_CCW=m
 CONFIG_VFIO_AP=m
 CONFIG_CRASH_DUMP=y
-CONFIG_HIBERNATION=y
-CONFIG_PM_DEBUG=y
+CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
 CONFIG_CMM=m
 CONFIG_APPLDATA_BASE=y
 CONFIG_KVM=m
-CONFIG_VHOST_NET=m
-CONFIG_VHOST_VSOCK=m
+CONFIG_S390_UNWIND_SELFTEST=m
 CONFIG_OPROFILE=m
 CONFIG_KPROBES=y
 CONFIG_JUMP_LABEL=y
@@ -73,6 +70,8 @@
 CONFIG_BLK_WBT=y
 CONFIG_BLK_CGROUP_IOLATENCY=y
 CONFIG_BLK_CGROUP_IOCOST=y
+CONFIG_BLK_INLINE_ENCRYPTION=y
+CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_BSD_DISKLABEL=y
@@ -88,10 +87,10 @@
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CLEANCACHE=y
 CONFIG_FRONTSWAP=y
+CONFIG_CMA_AREAS=7
 CONFIG_MEM_SOFT_DIRTY=y
 CONFIG_ZSWAP=y
-CONFIG_ZBUD=m
-CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC=y
 CONFIG_ZSMALLOC_STAT=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_IDLE_PAGE_TRACKING=y
@@ -124,6 +123,7 @@
 CONFIG_NET_IPVTI=m
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
+CONFIG_INET_ESPINTCP=y
 CONFIG_INET_IPCOMP=m
 CONFIG_INET_DIAG=m
 CONFIG_INET_UDP_DIAG=m
@@ -138,6 +138,7 @@
 CONFIG_IPV6_ROUTER_PREF=y
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
+CONFIG_INET6_ESPINTCP=y
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_MIP6=m
 CONFIG_IPV6_VTI=m
@@ -145,7 +146,10 @@
 CONFIG_IPV6_GRE=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
+CONFIG_MPTCP=y
 CONFIG_NETFILTER=y
+CONFIG_BRIDGE_NETFILTER=m
 CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_SECMARK=y
 CONFIG_NF_CONNTRACK_EVENTS=y
@@ -310,6 +314,7 @@
 CONFIG_L2TP_IP=m
 CONFIG_L2TP_ETH=m
 CONFIG_BRIDGE=m
+CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_NET_SCHED=y
@@ -334,6 +339,7 @@
 CONFIG_NET_SCH_FQ_CODEL=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_ETS=m
 CONFIG_NET_CLS_BASIC=m
 CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_CLS_ROUTE4=m
@@ -357,6 +363,7 @@
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_GATE=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
 CONFIG_VSOCKETS=m
@@ -365,14 +372,14 @@
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
-# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_PCI=y
+# CONFIG_PCIEASPM is not set
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_UEVENT_HELPER=y
 CONFIG_DEVTMPFS=y
 CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
+CONFIG_ZRAM=y
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_DRBD=m
@@ -429,6 +436,7 @@
 CONFIG_DM_MULTIPATH=m
 CONFIG_DM_MULTIPATH_QL=m
 CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
 CONFIG_DM_DELAY=m
 CONFIG_DM_UEVENT=y
 CONFIG_DM_FLAKEY=m
@@ -443,6 +451,8 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_BAREUDP=m
 CONFIG_TUN=m
 CONFIG_VETH=m
 CONFIG_VIRTIO_NET=m
@@ -470,14 +480,12 @@
 # CONFIG_NET_VENDOR_EMULEX is not set
 # CONFIG_NET_VENDOR_EZCHIP is not set
 # CONFIG_NET_VENDOR_GOOGLE is not set
-# CONFIG_NET_VENDOR_HP is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
 CONFIG_MLX5_CORE=m
 CONFIG_MLX5_CORE_EN=y
-# CONFIG_MLXFW is not set
 # CONFIG_NET_VENDOR_MICREL is not set
 # CONFIG_NET_VENDOR_MICROCHIP is not set
 # CONFIG_NET_VENDOR_MICROSEMI is not set
@@ -510,6 +518,7 @@
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VIA is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
 CONFIG_PPP_BSDCOMP=m
 CONFIG_PPP_DEFLATE=m
@@ -528,6 +537,7 @@
 # CONFIG_SERIO is not set
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_NULL_TTY=m
+CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
 CONFIG_HANGCHECK_TIMER=m
@@ -556,6 +566,8 @@
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -599,11 +611,13 @@
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
+CONFIG_EXFAT_FS=m
 CONFIG_NTFS_FS=m
 CONFIG_NTFS_RW=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -641,8 +655,8 @@
 CONFIG_DLM=m
 CONFIG_UNICODE=y
 CONFIG_PERSISTENT_KEYRINGS=y
-CONFIG_BIG_KEYS=y
 CONFIG_ENCRYPTED_KEYS=m
+CONFIG_KEY_NOTIFICATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
@@ -666,8 +680,12 @@
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
+CONFIG_CRYPTO_CURVE25519=m
+CONFIG_CRYPTO_GCM=y
 CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_AEGIS128=m
+CONFIG_CRYPTO_SEQIV=y
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_LRW=m
 CONFIG_CRYPTO_OFB=m
@@ -677,14 +695,13 @@
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_XXHASH=m
+CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
@@ -694,6 +711,7 @@
 CONFIG_CRYPTO_CAMELLIA=m
 CONFIG_CRYPTO_CAST5=m
 CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
 CONFIG_CRYPTO_SALSA20=m
@@ -712,6 +730,9 @@
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
 CONFIG_CRYPTO_STATS=y
+CONFIG_CRYPTO_LIB_BLAKE2S=m
+CONFIG_CRYPTO_LIB_CURVE25519=m
+CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
 CONFIG_ZCRYPT=m
 CONFIG_PKEY=m
 CONFIG_CRYPTO_PAES_S390=m
@@ -724,7 +745,9 @@
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
 CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_CORDIC=m
+CONFIG_PRIME_NUMBERS=m
 CONFIG_CRC4=m
 CONFIG_CRC7=m
 CONFIG_CRC8=m
@@ -737,20 +760,25 @@
 CONFIG_FRAME_WARN=1024
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_WX=y
+CONFIG_PTDUMP_DEBUGFS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_PANIC_ON_OOPS=y
+CONFIG_TEST_LOCKUP=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
 CONFIG_RCU_TORTURE_TEST=m
+CONFIG_RCU_REF_SCALE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_STACK_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
-CONFIG_STACK_TRACER=y
 CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_LKDTM=m
 CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
 CONFIG_TEST_BPF=m
-CONFIG_BUG_ON_DATA_CORRUPTION=y
-CONFIG_S390_PTDUMP=y
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 20c51e5..a302630 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -17,11 +17,11 @@
 # CONFIG_CHSC_SCH is not set
 # CONFIG_SCM_BUS is not set
 CONFIG_CRASH_DUMP=y
-# CONFIG_SECCOMP is not set
 # CONFIG_PFAULT is not set
 # CONFIG_S390_HYPFS_FS is not set
 # CONFIG_VIRTUALIZATION is not set
 # CONFIG_S390_GUEST is not set
+# CONFIG_SECCOMP is not set
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -30,6 +30,7 @@
 # CONFIG_BOUNCE is not set
 CONFIG_NET=y
 # CONFIG_IUCV is not set
+# CONFIG_ETHTOOL_NETLINK is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_RAM=y
 # CONFIG_BLK_DEV_XPRAM is not set
@@ -55,6 +56,8 @@
 # CONFIG_MONWRITER is not set
 # CONFIG_S390_VMUR is not set
 # CONFIG_HID is not set
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY_USER is not set
@@ -62,12 +65,15 @@
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_LSM="yama,loadpin,safesetid,integrity"
+# CONFIG_ZLIB_DFLTCC is not set
 CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
+# CONFIG_RCU_TRACE is not set
 # CONFIG_FTRACE is not set
 # CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9803e96..7304463 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@
 	int key_len;
 	unsigned long fc;
 	union {
-		struct crypto_sync_skcipher *blk;
+		struct crypto_skcipher *skcipher;
 		struct crypto_cipher *cip;
 	} fallback;
 };
@@ -54,7 +54,7 @@
 	u8 pcc_key[32];
 	int key_len;
 	unsigned long fc;
-	struct crypto_sync_skcipher *fallback;
+	struct crypto_skcipher *fallback;
 };
 
 struct gcm_sg_walk {
@@ -72,19 +72,12 @@
 		unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
 
 	sctx->fallback.cip->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
 	sctx->fallback.cip->base.crt_flags |= (tfm->crt_flags &
 			CRYPTO_TFM_REQ_MASK);
 
-	ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
-	if (ret) {
-		tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-		tfm->crt_flags |= (sctx->fallback.cip->base.crt_flags &
-				CRYPTO_TFM_RES_MASK);
-	}
-	return ret;
+	return crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len);
 }
 
 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@@ -178,66 +171,36 @@
 	}
 };
 
-static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
-		unsigned int len)
+static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				    unsigned int len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	unsigned int ret;
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 
-	crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
-					 CRYPTO_TFM_REQ_MASK);
-	crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
-						      CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
-
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
-			  CRYPTO_TFM_RES_MASK;
-
-	return ret;
+	crypto_skcipher_clear_flags(sctx->fallback.skcipher,
+				    CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(sctx->fallback.skcipher,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(sctx->fallback.skcipher, key, len);
 }
 
-static int fallback_blk_dec(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
+static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx,
+				   struct skcipher_request *req,
+				   unsigned long modifier)
 {
-	unsigned int ret;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
+	struct skcipher_request *subreq = skcipher_request_ctx(req);
 
-	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_decrypt(req);
-
-	skcipher_request_zero(req);
-	return ret;
+	*subreq = *req;
+	skcipher_request_set_tfm(subreq, sctx->fallback.skcipher);
+	return (modifier & CPACF_DECRYPT) ?
+		crypto_skcipher_decrypt(subreq) :
+		crypto_skcipher_encrypt(subreq);
 }
 
-static int fallback_blk_enc(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	unsigned int ret;
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
-	skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_encrypt(req);
-	return ret;
-}
-
-static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -248,111 +211,92 @@
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
 	return 0;
 }
 
-static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_km(sctx->fc | modifier, sctx->key,
-			 walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-
 	return ret;
 }
 
-static int ecb_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, 0, &walk);
+	return ecb_aes_crypt(req, 0);
 }
 
-static int ecb_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return ecb_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static int fallback_init_blk(struct crypto_tfm *tfm)
+static int fallback_init_skcipher(struct crypto_skcipher *tfm)
 {
-	const char *name = tfm->__crt_alg->cra_name;
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 
-	sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
-						   CRYPTO_ALG_NEED_FALLBACK);
+	sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
 
-	if (IS_ERR(sctx->fallback.blk)) {
+	if (IS_ERR(sctx->fallback.skcipher)) {
 		pr_err("Allocating AES fallback algorithm %s failed\n",
 		       name);
-		return PTR_ERR(sctx->fallback.blk);
+		return PTR_ERR(sctx->fallback.skcipher);
 	}
 
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(sctx->fallback.skcipher));
 	return 0;
 }
 
-static void fallback_exit_blk(struct crypto_tfm *tfm)
+static void fallback_exit_skcipher(struct crypto_skcipher *tfm)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_sync_skcipher(sctx->fallback.blk);
+	crypto_free_skcipher(sctx->fallback.skcipher);
 }
 
-static struct crypto_alg ecb_aes_alg = {
-	.cra_name		=	"ecb(aes)",
-	.cra_driver_name	=	"ecb-aes-s390",
-	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.setkey			=	ecb_aes_set_key,
-			.encrypt		=	ecb_aes_encrypt,
-			.decrypt		=	ecb_aes_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_aes_alg = {
+	.base.cra_name		=	"ecb(aes)",
+	.base.cra_driver_name	=	"ecb-aes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.setkey			=	ecb_aes_set_key,
+	.encrypt		=	ecb_aes_encrypt,
+	.decrypt		=	ecb_aes_decrypt,
 };
 
-static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -363,17 +307,18 @@
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
 	return 0;
 }
 
-static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 	struct {
@@ -381,134 +326,70 @@
 		u8 key[AES_MAX_KEY_SIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, modifier);
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
 	memcpy(param.key, sctx->key, sctx->key_len);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_kmc(sctx->fc | modifier, &param,
-			  walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			  walk.dst.virt.addr, walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
+	memzero_explicit(&param, sizeof(param));
 	return ret;
 }
 
-static int cbc_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, 0, &walk);
+	return cbc_aes_crypt(req, 0);
 }
 
-static int cbc_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return cbc_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_aes_alg = {
-	.cra_name		=	"cbc(aes)",
-	.cra_driver_name	=	"cbc-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	cbc_aes_set_key,
-			.encrypt		=	cbc_aes_encrypt,
-			.decrypt		=	cbc_aes_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_aes_alg = {
+	.base.cra_name		=	"cbc(aes)",
+	.base.cra_driver_name	=	"cbc-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_aes_set_key,
+	.encrypt		=	cbc_aes_encrypt,
+	.decrypt		=	cbc_aes_decrypt,
 };
 
-static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
-				   unsigned int len)
+static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int len)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
-	unsigned int ret;
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
-					 CRYPTO_TFM_REQ_MASK);
-	crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
-						     CRYPTO_TFM_REQ_MASK);
-
-	ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
-
-	tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
-	tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
-			  CRYPTO_TFM_RES_MASK;
-
-	return ret;
+	crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(xts_ctx->fallback,
+				  crypto_skcipher_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(xts_ctx->fallback, key, len);
 }
 
-static int xts_fallback_decrypt(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
-	unsigned int ret;
-
-	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_decrypt(req);
-
-	skcipher_request_zero(req);
-	return ret;
-}
-
-static int xts_fallback_encrypt(struct blkcipher_desc *desc,
-		struct scatterlist *dst, struct scatterlist *src,
-		unsigned int nbytes)
-{
-	struct crypto_blkcipher *tfm = desc->tfm;
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
-	SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
-	unsigned int ret;
-
-	skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
-	skcipher_request_set_callback(req, desc->flags, NULL, NULL);
-	skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
-	ret = crypto_skcipher_encrypt(req);
-
-	skcipher_request_zero(req);
-	return ret;
-}
-
-static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 	int err;
 
@@ -517,10 +398,8 @@
 		return err;
 
 	/* In fips mode only 128 bit or 256 bit keys are valid */
-	if (fips_enabled && key_len != 32 && key_len != 64) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+	if (fips_enabled && key_len != 32 && key_len != 64)
 		return -EINVAL;
-	}
 
 	/* Pick the correct function code based on the key length */
 	fc = (key_len == 32) ? CPACF_KM_XTS_128 :
@@ -539,10 +418,11 @@
 	return 0;
 }
 
-static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int offset, nbytes, n;
 	int ret;
 	struct {
@@ -557,113 +437,102 @@
 		u8 init[16];
 	} xts_param;
 
-	ret = blkcipher_walk_virt(desc, walk);
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+		struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+		*subreq = *req;
+		skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+		return (modifier & CPACF_DECRYPT) ?
+			crypto_skcipher_decrypt(subreq) :
+			crypto_skcipher_encrypt(subreq);
+	}
+
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
 	offset = xts_ctx->key_len & 0x10;
 	memset(pcc_param.block, 0, sizeof(pcc_param.block));
 	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
 	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
-	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
 	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
 	cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
 
 	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
 	memcpy(xts_param.init, pcc_param.xts, 16);
 
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
-			 walk->dst.virt.addr, walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			 walk.dst.virt.addr, walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
+	memzero_explicit(&pcc_param, sizeof(pcc_param));
+	memzero_explicit(&xts_param, sizeof(xts_param));
 	return ret;
 }
 
-static int xts_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int xts_aes_encrypt(struct skcipher_request *req)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (!nbytes)
-		return -EINVAL;
-
-	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
-		return xts_fallback_encrypt(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, 0, &walk);
+	return xts_aes_crypt(req, 0);
 }
 
-static int xts_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int xts_aes_decrypt(struct skcipher_request *req)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (!nbytes)
-		return -EINVAL;
-
-	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
-		return xts_fallback_decrypt(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
+	return xts_aes_crypt(req, CPACF_DECRYPT);
 }
 
-static int xts_fallback_init(struct crypto_tfm *tfm)
+static int xts_fallback_init(struct crypto_skcipher *tfm)
 {
-	const char *name = tfm->__crt_alg->cra_name;
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
-						  CRYPTO_ALG_NEED_FALLBACK);
+	xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+				CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
 
 	if (IS_ERR(xts_ctx->fallback)) {
 		pr_err("Allocating XTS fallback algorithm %s failed\n",
 		       name);
 		return PTR_ERR(xts_ctx->fallback);
 	}
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+				    crypto_skcipher_reqsize(xts_ctx->fallback));
 	return 0;
 }
 
-static void xts_fallback_exit(struct crypto_tfm *tfm)
+static void xts_fallback_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+	struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_sync_skcipher(xts_ctx->fallback);
+	crypto_free_skcipher(xts_ctx->fallback);
 }
 
-static struct crypto_alg xts_aes_alg = {
-	.cra_name		=	"xts(aes)",
-	.cra_driver_name	=	"xts-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_xts_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	xts_fallback_init,
-	.cra_exit		=	xts_fallback_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	2 * AES_MIN_KEY_SIZE,
-			.max_keysize		=	2 * AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	xts_aes_set_key,
-			.encrypt		=	xts_aes_encrypt,
-			.decrypt		=	xts_aes_decrypt,
-		}
-	}
+static struct skcipher_alg xts_aes_alg = {
+	.base.cra_name		=	"xts(aes)",
+	.base.cra_driver_name	=	"xts-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_xts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	xts_fallback_init,
+	.exit			=	xts_fallback_exit,
+	.min_keysize		=	2 * AES_MIN_KEY_SIZE,
+	.max_keysize		=	2 * AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_aes_set_key,
+	.encrypt		=	xts_aes_encrypt,
+	.decrypt		=	xts_aes_decrypt,
 };
 
-static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
-	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	unsigned long fc;
 
 	/* Pick the correct function code based on the key length */
@@ -674,7 +543,7 @@
 	/* Check if the function code is available */
 	sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
 	if (!sctx->fc)
-		return setkey_fallback_blk(tfm, in_key, key_len);
+		return setkey_fallback_skcipher(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
 	memcpy(sctx->key, in_key, key_len);
@@ -696,30 +565,34 @@
 	return n;
 }
 
-static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			 struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct skcipher_request *req)
 {
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
 	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int n, nbytes;
 	int ret, locked;
 
+	if (unlikely(!sctx->fc))
+		return fallback_skcipher_crypt(sctx, req, 0);
+
 	locked = mutex_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		n = AES_BLOCK_SIZE;
+
 		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		cpacf_kmctr(sctx->fc | modifier, sctx->key,
-			    walk->dst.virt.addr, walk->src.virt.addr,
-			    n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
 		if (ctrptr == ctrblk)
-			memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+			memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE,
 			       AES_BLOCK_SIZE);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	if (locked)
 		mutex_unlock(&ctrblk_lock);
@@ -727,67 +600,33 @@
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
-		cpacf_kmctr(sctx->fc | modifier, sctx->key,
-			    buf, walk->src.virt.addr,
-			    AES_BLOCK_SIZE, walk->iv);
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+			    AES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 
 	return ret;
 }
 
-static int ctr_aes_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_enc(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, 0, &walk);
-}
-
-static int ctr_aes_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	struct blkcipher_walk walk;
-
-	if (unlikely(!sctx->fc))
-		return fallback_blk_dec(desc, dst, src, nbytes);
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_aes_alg = {
-	.cra_name		=	"ctr(aes)",
-	.cra_driver_name	=	"ctr-aes-s390",
-	.cra_priority		=	402,	/* ecb-aes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
-					CRYPTO_ALG_NEED_FALLBACK,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_init		=	fallback_init_blk,
-	.cra_exit		=	fallback_exit_blk,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	AES_MIN_KEY_SIZE,
-			.max_keysize		=	AES_MAX_KEY_SIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ctr_aes_set_key,
-			.encrypt		=	ctr_aes_encrypt,
-			.decrypt		=	ctr_aes_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_aes_alg = {
+	.base.cra_name		=	"ctr(aes)",
+	.base.cra_driver_name	=	"ctr-aes-s390",
+	.base.cra_priority	=	402,	/* ecb-aes-s390 + 1 */
+	.base.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_aes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.init			=	fallback_init_skcipher,
+	.exit			=	fallback_exit_skcipher,
+	.min_keysize		=	AES_MIN_KEY_SIZE,
+	.max_keysize		=	AES_MAX_KEY_SIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_aes_set_key,
+	.encrypt		=	ctr_aes_crypt,
+	.decrypt		=	ctr_aes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
 };
 
 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1116,24 +955,27 @@
 	},
 };
 
-static struct crypto_alg *aes_s390_algs_ptr[5];
-static int aes_s390_algs_num;
+static struct crypto_alg *aes_s390_alg;
+static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static int aes_s390_skciphers_num;
 static struct aead_alg *aes_s390_aead_alg;
 
-static int aes_s390_register_alg(struct crypto_alg *alg)
+static int aes_s390_register_skcipher(struct skcipher_alg *alg)
 {
 	int ret;
 
-	ret = crypto_register_alg(alg);
+	ret = crypto_register_skcipher(alg);
 	if (!ret)
-		aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+		aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg;
 	return ret;
 }
 
 static void aes_s390_fini(void)
 {
-	while (aes_s390_algs_num--)
-		crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+	if (aes_s390_alg)
+		crypto_unregister_alg(aes_s390_alg);
+	while (aes_s390_skciphers_num--)
+		crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]);
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
 
@@ -1154,10 +996,11 @@
 	if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
-		ret = aes_s390_register_alg(&aes_alg);
+		ret = crypto_register_alg(&aes_alg);
 		if (ret)
 			goto out_err;
-		ret = aes_s390_register_alg(&ecb_aes_alg);
+		aes_s390_alg = &aes_alg;
+		ret = aes_s390_register_skcipher(&ecb_aes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -1165,14 +1008,14 @@
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
-		ret = aes_s390_register_alg(&cbc_aes_alg);
+		ret = aes_s390_register_skcipher(&cbc_aes_alg);
 		if (ret)
 			goto out_err;
 	}
 
 	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
-		ret = aes_s390_register_alg(&xts_aes_alg);
+		ret = aes_s390_register_skcipher(&xts_aes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -1185,7 +1028,7 @@
 			ret = -ENOMEM;
 			goto out_err;
 		}
-		ret = aes_s390_register_alg(&ctr_aes_alg);
+		ret = aes_s390_register_skcipher(&ctr_aes_alg);
 		if (ret)
 			goto out_err;
 	}
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 423ee05..fafecad 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -111,10 +111,8 @@
 {
 	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
 
-	if (newkeylen != sizeof(mctx->key)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (newkeylen != sizeof(mctx->key))
 		return -EINVAL;
-	}
 	mctx->key = le32_to_cpu(*(__le32 *)newkey);
 	return 0;
 }
@@ -124,10 +122,8 @@
 {
 	struct crc_ctx *mctx = crypto_shash_ctx(tfm);
 
-	if (newkeylen != sizeof(mctx->key)) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (newkeylen != sizeof(mctx->key))
 		return -EINVAL;
-	}
 	mctx->key = be32_to_cpu(*(__be32 *)newkey);
 	return 0;
 }
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 439b100..bfbafd3 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
 #include <asm/cpacf.h>
 
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
@@ -45,6 +46,12 @@
 	return 0;
 }
 
+static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+			       unsigned int key_len)
+{
+	return des_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
 static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -79,28 +86,30 @@
 	}
 };
 
-static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		cpacf_km(fc, ctx->key, walk->dst.virt.addr,
-			 walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		cpacf_km(fc, ctx->key, walk.dst.virt.addr,
+			 walk.src.virt.addr, n);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	return ret;
 }
 
-static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n;
 	int ret;
 	struct {
@@ -108,99 +117,69 @@
 		u8 key[DES3_KEY_SIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+	memcpy(param.iv, walk.iv, DES_BLOCK_SIZE);
 	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		cpacf_kmc(fc, &param, walk->dst.virt.addr,
-			  walk->src.virt.addr, n);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		cpacf_kmc(fc, &param, walk.dst.virt.addr,
+			  walk.src.virt.addr, n);
+		memcpy(walk.iv, param.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
-	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
 	return ret;
 }
 
-static int ecb_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_des_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_DEA);
 }
 
-static int ecb_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ecb_des_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_des_alg = {
-	.cra_name		=	"ecb(des)",
-	.cra_driver_name	=	"ecb-des-s390",
-	.cra_priority		=	400,	/* combo: des + ecb */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	ecb_des_encrypt,
-			.decrypt		=	ecb_des_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_des_alg = {
+	.base.cra_name		=	"ecb(des)",
+	.base.cra_driver_name	=	"ecb-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ecb_des_encrypt,
+	.decrypt		=	ecb_des_decrypt,
 };
 
-static int cbc_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_des_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_DEA);
 }
 
-static int cbc_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int cbc_des_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_des_alg = {
-	.cra_name		=	"cbc(des)",
-	.cra_driver_name	=	"cbc-des-s390",
-	.cra_priority		=	400,	/* combo: des + cbc */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	cbc_des_encrypt,
-			.decrypt		=	cbc_des_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_des_alg = {
+	.base.cra_name		=	"cbc(des)",
+	.base.cra_driver_name	=	"cbc-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	cbc_des_encrypt,
+	.decrypt		=	cbc_des_decrypt,
 };
 
 /*
@@ -232,6 +211,12 @@
 	return 0;
 }
 
+static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+				unsigned int key_len)
+{
+	return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
 static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -266,87 +251,53 @@
 	}
 };
 
-static int ecb_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_des3_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192);
 }
 
-static int ecb_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_des3_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_des3_alg = {
-	.cra_name		=	"ecb(des3_ede)",
-	.cra_driver_name	=	"ecb-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + ecb */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	ecb_des3_encrypt,
-			.decrypt		=	ecb_des3_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_des3_alg = {
+	.base.cra_name		=	"ecb(des3_ede)",
+	.base.cra_driver_name	=	"ecb-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ecb */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ecb_des3_encrypt,
+	.decrypt		=	ecb_des3_decrypt,
 };
 
-static int cbc_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_des3_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192);
 }
 
-static int cbc_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_des3_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
-				&walk);
+	return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_des3_alg = {
-	.cra_name		=	"cbc(des3_ede)",
-	.cra_driver_name	=	"cbc-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + cbc */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	DES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	cbc_des3_encrypt,
-			.decrypt		=	cbc_des3_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_des3_alg = {
+	.base.cra_name		=	"cbc(des3_ede)",
+	.base.cra_driver_name	=	"cbc-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + cbc */
+	.base.cra_blocksize	=	DES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	cbc_des3_encrypt,
+	.decrypt		=	cbc_des3_decrypt,
 };
 
 static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -364,128 +315,90 @@
 	return n;
 }
 
-static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
-			    struct blkcipher_walk *walk)
+static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 buf[DES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int n, nbytes;
 	int ret, locked;
 
 	locked = mutex_trylock(&ctrblk_lock);
 
-	ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) {
 		n = DES_BLOCK_SIZE;
 		if (nbytes >= 2*DES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
-			    walk->src.virt.addr, n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr,
+			    walk.src.virt.addr, n, ctrptr);
 		if (ctrptr == ctrblk)
-			memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+			memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE,
 				DES_BLOCK_SIZE);
-		crypto_inc(walk->iv, DES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes - n);
 	}
 	if (locked)
 		mutex_unlock(&ctrblk_lock);
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
-		cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
-			    DES_BLOCK_SIZE, walk->iv);
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, DES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr,
+			    DES_BLOCK_SIZE, walk.iv);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, DES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, 0);
 	}
 	return ret;
 }
 
-static int ctr_des_encrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
+static int ctr_des_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
+	return ctr_desall_crypt(req, CPACF_KMCTR_DEA);
 }
 
-static int ctr_des_decrypt(struct blkcipher_desc *desc,
-			   struct scatterlist *dst, struct scatterlist *src,
-			   unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_des_alg = {
-	.cra_name		=	"ctr(des)",
-	.cra_driver_name	=	"ctr-des-s390",
-	.cra_priority		=	400,	/* combo: des + ctr */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES_KEY_SIZE,
-			.max_keysize		=	DES_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des_setkey,
-			.encrypt		=	ctr_des_encrypt,
-			.decrypt		=	ctr_des_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_des_alg = {
+	.base.cra_name		=	"ctr(des)",
+	.base.cra_driver_name	=	"ctr-des-s390",
+	.base.cra_priority	=	400,	/* combo: des + ctr */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES_KEY_SIZE,
+	.max_keysize		=	DES_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des_setkey_skcipher,
+	.encrypt		=	ctr_des_crypt,
+	.decrypt		=	ctr_des_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
 };
 
-static int ctr_des3_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ctr_des3_crypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
+	return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192);
 }
 
-static int ctr_des3_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
-				&walk);
-}
-
-static struct crypto_alg ctr_des3_alg = {
-	.cra_name		=	"ctr(des3_ede)",
-	.cra_driver_name	=	"ctr-des3_ede-s390",
-	.cra_priority		=	400,	/* combo: des3 + ede */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_des_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	DES3_KEY_SIZE,
-			.max_keysize		=	DES3_KEY_SIZE,
-			.ivsize			=	DES_BLOCK_SIZE,
-			.setkey			=	des3_setkey,
-			.encrypt		=	ctr_des3_encrypt,
-			.decrypt		=	ctr_des3_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_des3_alg = {
+	.base.cra_name		=	"ctr(des3_ede)",
+	.base.cra_driver_name	=	"ctr-des3_ede-s390",
+	.base.cra_priority	=	400,	/* combo: des3 + ede */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_des_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.min_keysize		=	DES3_KEY_SIZE,
+	.max_keysize		=	DES3_KEY_SIZE,
+	.ivsize			=	DES_BLOCK_SIZE,
+	.setkey			=	des3_setkey_skcipher,
+	.encrypt		=	ctr_des3_crypt,
+	.decrypt		=	ctr_des3_crypt,
+	.chunksize		=	DES_BLOCK_SIZE,
 };
 
-static struct crypto_alg *des_s390_algs_ptr[8];
+static struct crypto_alg *des_s390_algs_ptr[2];
 static int des_s390_algs_num;
+static struct skcipher_alg *des_s390_skciphers_ptr[6];
+static int des_s390_skciphers_num;
 
 static int des_s390_register_alg(struct crypto_alg *alg)
 {
@@ -497,10 +410,22 @@
 	return ret;
 }
 
+static int des_s390_register_skcipher(struct skcipher_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_skcipher(alg);
+	if (!ret)
+		des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg;
+	return ret;
+}
+
 static void des_s390_exit(void)
 {
 	while (des_s390_algs_num--)
 		crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+	while (des_s390_skciphers_num--)
+		crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]);
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
 }
@@ -518,12 +443,12 @@
 		ret = des_s390_register_alg(&des_alg);
 		if (ret)
 			goto out_err;
-		ret = des_s390_register_alg(&ecb_des_alg);
+		ret = des_s390_register_skcipher(&ecb_des_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
-		ret = des_s390_register_alg(&cbc_des_alg);
+		ret = des_s390_register_skcipher(&cbc_des_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -531,12 +456,12 @@
 		ret = des_s390_register_alg(&des3_alg);
 		if (ret)
 			goto out_err;
-		ret = des_s390_register_alg(&ecb_des3_alg);
+		ret = des_s390_register_skcipher(&ecb_des3_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
-		ret = des_s390_register_alg(&cbc_des3_alg);
+		ret = des_s390_register_skcipher(&cbc_des3_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -551,12 +476,12 @@
 	}
 
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
-		ret = des_s390_register_alg(&ctr_des_alg);
+		ret = des_s390_register_skcipher(&ctr_des_alg);
 		if (ret)
 			goto out_err;
 	}
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
-		ret = des_s390_register_alg(&ctr_des3_alg);
+		ret = des_s390_register_skcipher(&ctr_des3_alg);
 		if (ret)
 			goto out_err;
 	}
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index a3e7400..6b07a2f 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -43,10 +43,8 @@
 {
 	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
 
-	if (keylen != GHASH_BLOCK_SIZE) {
-		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	if (keylen != GHASH_BLOCK_SIZE)
 		return -EINVAL;
-	}
 
 	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
 
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 6184dce..f3caeb1 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -5,7 +5,7 @@
  * s390 implementation of the AES Cipher Algorithm with protected keys.
  *
  * s390 Version:
- *   Copyright IBM Corp. 2017,2019
+ *   Copyright IBM Corp. 2017,2020
  *   Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *		Harald Freudenberger <freude@de.ibm.com>
  */
@@ -20,7 +20,9 @@
 #include <linux/module.h>
 #include <linux/cpufeature.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <crypto/internal/skcipher.h>
 #include <crypto/xts.h>
 #include <asm/cpacf.h>
 #include <asm/pkey.h>
@@ -31,11 +33,11 @@
  * is called. As paes can handle different kinds of key blobs
  * and padding is also possible, the limits need to be generous.
  */
-#define PAES_MIN_KEYSIZE 64
-#define PAES_MAX_KEYSIZE 256
+#define PAES_MIN_KEYSIZE 16
+#define PAES_MAX_KEYSIZE 320
 
 static u8 *ctrblk;
-static DEFINE_SPINLOCK(ctrblk_lock);
+static DEFINE_MUTEX(ctrblk_lock);
 
 static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
 
@@ -52,19 +54,46 @@
 	unsigned int keylen;
 };
 
-static inline int _copy_key_to_kb(struct key_blob *kb,
-				  const u8 *key,
-				  unsigned int keylen)
+static inline int _key_to_kb(struct key_blob *kb,
+			     const u8 *key,
+			     unsigned int keylen)
 {
-	if (keylen <= sizeof(kb->keybuf))
+	struct clearkey_header {
+		u8  type;
+		u8  res0[3];
+		u8  version;
+		u8  res1[3];
+		u32 keytype;
+		u32 len;
+	} __packed * h;
+
+	switch (keylen) {
+	case 16:
+	case 24:
+	case 32:
+		/* clear key value, prepare pkey clear key token in keybuf */
+		memset(kb->keybuf, 0, sizeof(kb->keybuf));
+		h = (struct clearkey_header *) kb->keybuf;
+		h->version = 0x02; /* TOKVER_CLEAR_KEY */
+		h->keytype = (keylen - 8) >> 3;
+		h->len = keylen;
+		memcpy(kb->keybuf + sizeof(*h), key, keylen);
+		kb->keylen = sizeof(*h) + keylen;
 		kb->key = kb->keybuf;
-	else {
-		kb->key = kmalloc(keylen, GFP_KERNEL);
-		if (!kb->key)
-			return -ENOMEM;
+		break;
+	default:
+		/* other key material, let pkey handle this */
+		if (keylen <= sizeof(kb->keybuf))
+			kb->key = kb->keybuf;
+		else {
+			kb->key = kmalloc(keylen, GFP_KERNEL);
+			if (!kb->key)
+				return -ENOMEM;
+		}
+		memcpy(kb->key, key, keylen);
+		kb->keylen = keylen;
+		break;
 	}
-	memcpy(kb->key, key, keylen);
-	kb->keylen = keylen;
 
 	return 0;
 }
@@ -81,16 +110,18 @@
 struct s390_paes_ctx {
 	struct key_blob kb;
 	struct pkey_protkey pk;
+	spinlock_t pk_lock;
 	unsigned long fc;
 };
 
 struct s390_pxts_ctx {
 	struct key_blob kb[2];
 	struct pkey_protkey pk[2];
+	spinlock_t pk_lock;
 	unsigned long fc;
 };
 
-static inline int __paes_convert_key(struct key_blob *kb,
+static inline int __paes_keyblob2pkey(struct key_blob *kb,
 				     struct pkey_protkey *pk)
 {
 	int i, ret;
@@ -105,11 +136,42 @@
 	return ret;
 }
 
-static int __paes_set_key(struct s390_paes_ctx *ctx)
+static inline int __paes_convert_key(struct s390_paes_ctx *ctx)
+{
+	struct pkey_protkey pkey;
+
+	if (__paes_keyblob2pkey(&ctx->kb, &pkey))
+		return -EINVAL;
+
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(&ctx->pk, &pkey, sizeof(pkey));
+	spin_unlock_bh(&ctx->pk_lock);
+
+	return 0;
+}
+
+static int ecb_paes_init(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->kb.key = NULL;
+	spin_lock_init(&ctx->pk_lock);
+
+	return 0;
+}
+
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
+{
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	_free_kb_keybuf(&ctx->kb);
+}
+
+static inline int __ecb_paes_set_key(struct s390_paes_ctx *ctx)
 {
 	unsigned long fc;
 
-	if (__paes_convert_key(&ctx->kb, &ctx->pk))
+	if (__paes_convert_key(ctx))
 		return -EINVAL;
 
 	/* Pick the correct function code based on the protected key type */
@@ -123,128 +185,106 @@
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int ecb_paes_init(struct crypto_tfm *tfm)
-{
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	ctx->kb.key = NULL;
-
-	return 0;
-}
-
-static void ecb_paes_exit(struct crypto_tfm *tfm)
-{
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	_free_kb_keybuf(&ctx->kb);
-}
-
-static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
-	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
+	rc = _key_to_kb(&ctx->kb, in_key, key_len);
 	if (rc)
 		return rc;
 
-	if (__paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	return 0;
+	return __ecb_paes_set_key(ctx);
 }
 
-static int ecb_paes_crypt(struct blkcipher_desc *desc,
-			  unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret;
+	struct {
+		u8 key[MAXPROTKEYSIZE];
+	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+	spin_unlock_bh(&ctx->pk_lock);
+
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
-			     walk->dst.virt.addr, walk->src.virt.addr, n);
+		k = cpacf_km(ctx->fc | modifier, &param,
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
 		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			ret = skcipher_walk_done(&walk, nbytes - k);
 		if (k < n) {
-			if (__paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+			if (__paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
 		}
 	}
 	return ret;
 }
 
-static int ecb_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+	return ecb_paes_crypt(req, 0);
 }
 
-static int ecb_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int ecb_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return ecb_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg ecb_paes_alg = {
-	.cra_name		=	"ecb(paes)",
-	.cra_driver_name	=	"ecb-paes-s390",
-	.cra_priority		=	401,	/* combo: aes + ecb + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.cra_list),
-	.cra_init		=	ecb_paes_init,
-	.cra_exit		=	ecb_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.setkey			=	ecb_paes_set_key,
-			.encrypt		=	ecb_paes_encrypt,
-			.decrypt		=	ecb_paes_decrypt,
-		}
-	}
+static struct skcipher_alg ecb_paes_alg = {
+	.base.cra_name		=	"ecb(paes)",
+	.base.cra_driver_name	=	"ecb-paes-s390",
+	.base.cra_priority	=	401,	/* combo: aes + ecb + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
+	.init			=	ecb_paes_init,
+	.exit			=	ecb_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.setkey			=	ecb_paes_set_key,
+	.encrypt		=	ecb_paes_encrypt,
+	.decrypt		=	ecb_paes_decrypt,
 };
 
-static int cbc_paes_init(struct crypto_tfm *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
+	spin_lock_init(&ctx->pk_lock);
 
 	return 0;
 }
 
-static void cbc_paes_exit(struct crypto_tfm *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
 
-static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
+static inline int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
 {
 	unsigned long fc;
 
-	if (__paes_convert_key(&ctx->kb, &ctx->pk))
+	if (__paes_convert_key(ctx))
 		return -EINVAL;
 
 	/* Pick the correct function code based on the protected key type */
@@ -258,28 +298,25 @@
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
-	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
+	rc = _key_to_kb(&ctx->kb, in_key, key_len);
 	if (rc)
 		return rc;
 
-	if (__cbc_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	return 0;
+	return __cbc_paes_set_key(ctx);
 }
 
-static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret;
 	struct {
@@ -287,94 +324,103 @@
 		u8 key[MAXPROTKEYSIZE];
 	} param;
 
-	ret = blkcipher_walk_virt(desc, walk);
-	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
+	memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
+	spin_lock_bh(&ctx->pk_lock);
 	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	spin_unlock_bh(&ctx->pk_lock);
+
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_kmc(ctx->fc | modifier, &param,
-			      walk->dst.virt.addr, walk->src.virt.addr, n);
-		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			      walk.dst.virt.addr, walk.src.virt.addr, n);
+		if (k) {
+			memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - k);
+		}
 		if (k < n) {
-			if (__cbc_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+			if (__paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
 			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
 		}
 	}
-	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
 	return ret;
 }
 
-static int cbc_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_paes_crypt(desc, 0, &walk);
+	return cbc_paes_crypt(req, 0);
 }
 
-static int cbc_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int cbc_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return cbc_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg cbc_paes_alg = {
-	.cra_name		=	"cbc(paes)",
-	.cra_driver_name	=	"cbc-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.cra_list),
-	.cra_init		=	cbc_paes_init,
-	.cra_exit		=	cbc_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	cbc_paes_set_key,
-			.encrypt		=	cbc_paes_encrypt,
-			.decrypt		=	cbc_paes_decrypt,
-		}
-	}
+static struct skcipher_alg cbc_paes_alg = {
+	.base.cra_name		=	"cbc(paes)",
+	.base.cra_driver_name	=	"cbc-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
+	.init			=	cbc_paes_init,
+	.exit			=	cbc_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	cbc_paes_set_key,
+	.encrypt		=	cbc_paes_encrypt,
+	.decrypt		=	cbc_paes_decrypt,
 };
 
-static int xts_paes_init(struct crypto_tfm *tfm)
+static int xts_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb[0].key = NULL;
 	ctx->kb[1].key = NULL;
+	spin_lock_init(&ctx->pk_lock);
 
 	return 0;
 }
 
-static void xts_paes_exit(struct crypto_tfm *tfm)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb[0]);
 	_free_kb_keybuf(&ctx->kb[1]);
 }
 
-static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
+static inline int __xts_paes_convert_key(struct s390_pxts_ctx *ctx)
+{
+	struct pkey_protkey pkey0, pkey1;
+
+	if (__paes_keyblob2pkey(&ctx->kb[0], &pkey0) ||
+	    __paes_keyblob2pkey(&ctx->kb[1], &pkey1))
+		return -EINVAL;
+
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(&ctx->pk[0], &pkey0, sizeof(pkey0));
+	memcpy(&ctx->pk[1], &pkey1, sizeof(pkey1));
+	spin_unlock_bh(&ctx->pk_lock);
+
+	return 0;
+}
+
+static inline int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
 {
 	unsigned long fc;
 
-	if (__paes_convert_key(&ctx->kb[0], &ctx->pk[0]) ||
-	    __paes_convert_key(&ctx->kb[1], &ctx->pk[1]))
+	if (__xts_paes_convert_key(ctx))
 		return -EINVAL;
 
 	if (ctx->pk[0].type != ctx->pk[1].type)
@@ -391,11 +437,11 @@
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int xts_key_len)
 {
 	int rc;
-	struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 ckey[2 * AES_MAX_KEY_SIZE];
 	unsigned int ckey_len, key_len;
 
@@ -406,17 +452,16 @@
 
 	_free_kb_keybuf(&ctx->kb[0]);
 	_free_kb_keybuf(&ctx->kb[1]);
-	rc = _copy_key_to_kb(&ctx->kb[0], in_key, key_len);
+	rc = _key_to_kb(&ctx->kb[0], in_key, key_len);
 	if (rc)
 		return rc;
-	rc = _copy_key_to_kb(&ctx->kb[1], in_key + key_len, key_len);
+	rc = _key_to_kb(&ctx->kb[1], in_key + key_len, key_len);
 	if (rc)
 		return rc;
 
-	if (__xts_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	rc = __xts_paes_set_key(ctx);
+	if (rc)
+		return rc;
 
 	/*
 	 * xts_check_key verifies the key length is not odd and makes
@@ -427,13 +472,14 @@
 		AES_KEYSIZE_128 : AES_KEYSIZE_256;
 	memcpy(ckey, ctx->pk[0].protkey, ckey_len);
 	memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
-	return xts_check_key(tfm, ckey, 2*ckey_len);
+	return xts_verify_key(tfm, ckey, 2*ckey_len);
 }
 
-static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
 {
-	struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct skcipher_walk walk;
 	unsigned int keylen, offset, nbytes, n, k;
 	int ret;
 	struct {
@@ -448,99 +494,92 @@
 		u8 init[16];
 	} xts_param;
 
-	ret = blkcipher_walk_virt(desc, walk);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
+
 	keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
 	offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
-retry:
-	memset(&pcc_param, 0, sizeof(pcc_param));
-	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
-	memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
-	cpacf_pcc(ctx->fc, pcc_param.key + offset);
 
+	memset(&pcc_param, 0, sizeof(pcc_param));
+	memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
 	memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
+	spin_unlock_bh(&ctx->pk_lock);
+	cpacf_pcc(ctx->fc, pcc_param.key + offset);
 	memcpy(xts_param.init, pcc_param.xts, 16);
 
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	while ((nbytes = walk.nbytes) != 0) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
 		k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
-			     walk->dst.virt.addr, walk->src.virt.addr, n);
+			     walk.dst.virt.addr, walk.src.virt.addr, n);
 		if (k)
-			ret = blkcipher_walk_done(desc, walk, nbytes - k);
+			ret = skcipher_walk_done(&walk, nbytes - k);
 		if (k < n) {
-			if (__xts_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
-			goto retry;
+			if (__xts_paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(xts_param.key + offset,
+			       ctx->pk[0].protkey, keylen);
+			spin_unlock_bh(&ctx->pk_lock);
 		}
 	}
+
 	return ret;
 }
 
-static int xts_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int xts_paes_encrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_paes_crypt(desc, 0, &walk);
+	return xts_paes_crypt(req, 0);
 }
 
-static int xts_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
+static int xts_paes_decrypt(struct skcipher_request *req)
 {
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+	return xts_paes_crypt(req, CPACF_DECRYPT);
 }
 
-static struct crypto_alg xts_paes_alg = {
-	.cra_name		=	"xts(paes)",
-	.cra_driver_name	=	"xts-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	AES_BLOCK_SIZE,
-	.cra_ctxsize		=	sizeof(struct s390_pxts_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.cra_list),
-	.cra_init		=	xts_paes_init,
-	.cra_exit		=	xts_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	2 * PAES_MIN_KEYSIZE,
-			.max_keysize		=	2 * PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	xts_paes_set_key,
-			.encrypt		=	xts_paes_encrypt,
-			.decrypt		=	xts_paes_decrypt,
-		}
-	}
+static struct skcipher_alg xts_paes_alg = {
+	.base.cra_name		=	"xts(paes)",
+	.base.cra_driver_name	=	"xts-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	AES_BLOCK_SIZE,
+	.base.cra_ctxsize	=	sizeof(struct s390_pxts_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
+	.init			=	xts_paes_init,
+	.exit			=	xts_paes_exit,
+	.min_keysize		=	2 * PAES_MIN_KEYSIZE,
+	.max_keysize		=	2 * PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	xts_paes_set_key,
+	.encrypt		=	xts_paes_encrypt,
+	.decrypt		=	xts_paes_decrypt,
 };
 
-static int ctr_paes_init(struct crypto_tfm *tfm)
+static int ctr_paes_init(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	ctx->kb.key = NULL;
+	spin_lock_init(&ctx->pk_lock);
 
 	return 0;
 }
 
-static void ctr_paes_exit(struct crypto_tfm *tfm)
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
 {
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
 }
 
-static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
+static inline int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
 {
 	unsigned long fc;
 
-	if (__paes_convert_key(&ctx->kb, &ctx->pk))
+	if (__paes_convert_key(ctx))
 		return -EINVAL;
 
 	/* Pick the correct function code based on the protected key type */
@@ -555,22 +594,18 @@
 	return ctx->fc ? 0 : -EINVAL;
 }
 
-static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
 	int rc;
-	struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	_free_kb_keybuf(&ctx->kb);
-	rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
+	rc = _key_to_kb(&ctx->kb, in_key, key_len);
 	if (rc)
 		return rc;
 
-	if (__ctr_paes_set_key(ctx)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
-	return 0;
+	return __ctr_paes_set_key(ctx);
 }
 
 static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -588,121 +623,111 @@
 	return n;
 }
 
-static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
-			  struct blkcipher_walk *walk)
+static int ctr_paes_crypt(struct skcipher_request *req)
 {
-	struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u8 buf[AES_BLOCK_SIZE], *ctrptr;
+	struct skcipher_walk walk;
 	unsigned int nbytes, n, k;
 	int ret, locked;
+	struct {
+		u8 key[MAXPROTKEYSIZE];
+	} param;
 
-	locked = spin_trylock(&ctrblk_lock);
+	ret = skcipher_walk_virt(&walk, req, false);
+	if (ret)
+		return ret;
 
-	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
-	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+	spin_lock_bh(&ctx->pk_lock);
+	memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+	spin_unlock_bh(&ctx->pk_lock);
+
+	locked = mutex_trylock(&ctrblk_lock);
+
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
 		n = AES_BLOCK_SIZE;
 		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
-			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
-		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
-		k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
-				walk->dst.virt.addr, walk->src.virt.addr,
-				n, ctrptr);
+			n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+		k = cpacf_kmctr(ctx->fc, &param, walk.dst.virt.addr,
+				walk.src.virt.addr, n, ctrptr);
 		if (k) {
 			if (ctrptr == ctrblk)
-				memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+				memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
 				       AES_BLOCK_SIZE);
-			crypto_inc(walk->iv, AES_BLOCK_SIZE);
-			ret = blkcipher_walk_done(desc, walk, nbytes - n);
+			crypto_inc(walk.iv, AES_BLOCK_SIZE);
+			ret = skcipher_walk_done(&walk, nbytes - k);
 		}
 		if (k < n) {
-			if (__ctr_paes_set_key(ctx) != 0) {
+			if (__paes_convert_key(ctx)) {
 				if (locked)
-					spin_unlock(&ctrblk_lock);
-				return blkcipher_walk_done(desc, walk, -EIO);
+					mutex_unlock(&ctrblk_lock);
+				return skcipher_walk_done(&walk, -EIO);
 			}
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
 		}
 	}
 	if (locked)
-		spin_unlock(&ctrblk_lock);
+		mutex_unlock(&ctrblk_lock);
 	/*
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
 		while (1) {
-			if (cpacf_kmctr(ctx->fc | modifier,
-					ctx->pk.protkey, buf,
-					walk->src.virt.addr, AES_BLOCK_SIZE,
-					walk->iv) == AES_BLOCK_SIZE)
+			if (cpacf_kmctr(ctx->fc, &param, buf,
+					walk.src.virt.addr, AES_BLOCK_SIZE,
+					walk.iv) == AES_BLOCK_SIZE)
 				break;
-			if (__ctr_paes_set_key(ctx) != 0)
-				return blkcipher_walk_done(desc, walk, -EIO);
+			if (__paes_convert_key(ctx))
+				return skcipher_walk_done(&walk, -EIO);
+			spin_lock_bh(&ctx->pk_lock);
+			memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
+			spin_unlock_bh(&ctx->pk_lock);
 		}
-		memcpy(walk->dst.virt.addr, buf, nbytes);
-		crypto_inc(walk->iv, AES_BLOCK_SIZE);
-		ret = blkcipher_walk_done(desc, walk, 0);
+		memcpy(walk.dst.virt.addr, buf, nbytes);
+		crypto_inc(walk.iv, AES_BLOCK_SIZE);
+		ret = skcipher_walk_done(&walk, nbytes);
 	}
 
 	return ret;
 }
 
-static int ctr_paes_encrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_paes_crypt(desc, 0, &walk);
-}
-
-static int ctr_paes_decrypt(struct blkcipher_desc *desc,
-			    struct scatterlist *dst, struct scatterlist *src,
-			    unsigned int nbytes)
-{
-	struct blkcipher_walk walk;
-
-	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_paes_alg = {
-	.cra_name		=	"ctr(paes)",
-	.cra_driver_name	=	"ctr-paes-s390",
-	.cra_priority		=	402,	/* ecb-paes-s390 + 1 */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
-	.cra_blocksize		=	1,
-	.cra_ctxsize		=	sizeof(struct s390_paes_ctx),
-	.cra_type		=	&crypto_blkcipher_type,
-	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.cra_list),
-	.cra_init		=	ctr_paes_init,
-	.cra_exit		=	ctr_paes_exit,
-	.cra_u			=	{
-		.blkcipher = {
-			.min_keysize		=	PAES_MIN_KEYSIZE,
-			.max_keysize		=	PAES_MAX_KEYSIZE,
-			.ivsize			=	AES_BLOCK_SIZE,
-			.setkey			=	ctr_paes_set_key,
-			.encrypt		=	ctr_paes_encrypt,
-			.decrypt		=	ctr_paes_decrypt,
-		}
-	}
+static struct skcipher_alg ctr_paes_alg = {
+	.base.cra_name		=	"ctr(paes)",
+	.base.cra_driver_name	=	"ctr-paes-s390",
+	.base.cra_priority	=	402,	/* ecb-paes-s390 + 1 */
+	.base.cra_blocksize	=	1,
+	.base.cra_ctxsize	=	sizeof(struct s390_paes_ctx),
+	.base.cra_module	=	THIS_MODULE,
+	.base.cra_list		=	LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
+	.init			=	ctr_paes_init,
+	.exit			=	ctr_paes_exit,
+	.min_keysize		=	PAES_MIN_KEYSIZE,
+	.max_keysize		=	PAES_MAX_KEYSIZE,
+	.ivsize			=	AES_BLOCK_SIZE,
+	.setkey			=	ctr_paes_set_key,
+	.encrypt		=	ctr_paes_crypt,
+	.decrypt		=	ctr_paes_crypt,
+	.chunksize		=	AES_BLOCK_SIZE,
 };
 
-static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
 {
-	if (!list_empty(&alg->cra_list))
-		crypto_unregister_alg(alg);
+	if (!list_empty(&alg->base.cra_list))
+		crypto_unregister_skcipher(alg);
 }
 
 static void paes_s390_fini(void)
 {
+	__crypto_unregister_skcipher(&ctr_paes_alg);
+	__crypto_unregister_skcipher(&xts_paes_alg);
+	__crypto_unregister_skcipher(&cbc_paes_alg);
+	__crypto_unregister_skcipher(&ecb_paes_alg);
 	if (ctrblk)
 		free_page((unsigned long) ctrblk);
-	__crypto_unregister_alg(&ctr_paes_alg);
-	__crypto_unregister_alg(&xts_paes_alg);
-	__crypto_unregister_alg(&cbc_paes_alg);
-	__crypto_unregister_alg(&ecb_paes_alg);
 }
 
 static int __init paes_s390_init(void)
@@ -717,7 +742,7 @@
 	if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
-		ret = crypto_register_alg(&ecb_paes_alg);
+		ret = crypto_register_skcipher(&ecb_paes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -725,14 +750,14 @@
 	if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
 	    cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
-		ret = crypto_register_alg(&cbc_paes_alg);
+		ret = crypto_register_skcipher(&cbc_paes_alg);
 		if (ret)
 			goto out_err;
 	}
 
 	if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
 	    cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
-		ret = crypto_register_alg(&xts_paes_alg);
+		ret = crypto_register_skcipher(&xts_paes_alg);
 		if (ret)
 			goto out_err;
 	}
@@ -740,14 +765,14 @@
 	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
 	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
-		ret = crypto_register_alg(&ctr_paes_alg);
-		if (ret)
-			goto out_err;
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
 			goto out_err;
 		}
+		ret = crypto_register_skcipher(&ctr_paes_alg);
+		if (ret)
+			goto out_err;
 	}
 
 	return 0;
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index d977643..5057773 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -249,7 +249,7 @@
 {
 	pr_debug("The prng module stopped "
 		 "after running in triple DES mode\n");
-	kzfree(prng_data);
+	kfree_sensitive(prng_data);
 }
 
 
@@ -442,7 +442,7 @@
 static void prng_sha512_deinstantiate(void)
 {
 	pr_debug("The prng module stopped after running in SHA-512 mode\n");
-	kzfree(prng_data);
+	kfree_sensitive(prng_data);
 }
 
 
@@ -693,7 +693,7 @@
 				   struct device_attribute *attr,
 				   char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
+	return scnprintf(buf, PAGE_SIZE, "%u\n", prng_chunk_size);
 }
 static DEVICE_ATTR(chunksize, 0444, prng_chunksize_show, NULL);
 
@@ -712,7 +712,7 @@
 		counter = prng_data->prngws.byte_counter;
 	mutex_unlock(&prng_data->mutex);
 
-	return snprintf(buf, PAGE_SIZE, "%llu\n", counter);
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", counter);
 }
 static DEVICE_ATTR(byte_counter, 0444, prng_counter_show, NULL);
 
@@ -721,7 +721,7 @@
 				   struct device_attribute *attr,
 				   char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
+	return scnprintf(buf, PAGE_SIZE, "%d\n", prng_errorflag);
 }
 static DEVICE_ATTR(errorflag, 0444, prng_errorflag_show, NULL);
 
@@ -731,9 +731,9 @@
 			      char *buf)
 {
 	if (prng_mode == PRNG_MODE_TDES)
-		return snprintf(buf, PAGE_SIZE, "TDES\n");
+		return scnprintf(buf, PAGE_SIZE, "TDES\n");
 	else
-		return snprintf(buf, PAGE_SIZE, "SHA512\n");
+		return scnprintf(buf, PAGE_SIZE, "SHA512\n");
 }
 static DEVICE_ATTR(mode, 0444, prng_mode_show, NULL);
 
@@ -756,7 +756,7 @@
 				      struct device_attribute *attr,
 				      char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
+	return scnprintf(buf, PAGE_SIZE, "%u\n", prng_reseed_limit);
 }
 static ssize_t prng_reseed_limit_store(struct device *dev,
 				       struct device_attribute *attr,
@@ -787,7 +787,7 @@
 				  struct device_attribute *attr,
 				  char *buf)
 {
-	return snprintf(buf, PAGE_SIZE, "256\n");
+	return scnprintf(buf, PAGE_SIZE, "256\n");
 }
 static DEVICE_ATTR(strength, 0444, prng_strength_show, NULL);
 
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 7c15542..698b1e6 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -27,7 +27,7 @@
 
 #include "sha.h"
 
-static int sha1_init(struct shash_desc *desc)
+static int s390_sha1_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
@@ -42,7 +42,7 @@
 	return 0;
 }
 
-static int sha1_export(struct shash_desc *desc, void *out)
+static int s390_sha1_export(struct shash_desc *desc, void *out)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	struct sha1_state *octx = out;
@@ -53,7 +53,7 @@
 	return 0;
 }
 
-static int sha1_import(struct shash_desc *desc, const void *in)
+static int s390_sha1_import(struct shash_desc *desc, const void *in)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 	const struct sha1_state *ictx = in;
@@ -67,11 +67,11 @@
 
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
-	.init		=	sha1_init,
+	.init		=	s390_sha1_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
-	.export		=	sha1_export,
-	.import		=	sha1_import,
+	.export		=	s390_sha1_export,
+	.import		=	s390_sha1_import,
 	.descsize	=	sizeof(struct s390_sha_ctx),
 	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c
index e1fcc03..a927adc 100644
--- a/arch/s390/hypfs/hypfs_vm.c
+++ b/arch/s390/hypfs/hypfs_vm.c
@@ -20,6 +20,7 @@
 
 static char local_guest[] = "        ";
 static char all_guests[] = "*       ";
+static char *all_groups = all_guests;
 static char *guest_query;
 
 struct diag2fc_data {
@@ -62,10 +63,11 @@
 
 	memcpy(parm_list.userid, query, NAME_LEN);
 	ASCEBC(parm_list.userid, NAME_LEN);
-	parm_list.addr = (unsigned long) addr ;
+	memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
+	ASCEBC(parm_list.aci_grp, NAME_LEN);
+	parm_list.addr = (unsigned long)addr;
 	parm_list.size = size;
 	parm_list.fmt = 0x02;
-	memset(parm_list.aci_grp, 0x40, NAME_LEN);
 	rc = -1;
 
 	diag_stat_inc(DIAG_STAT_X2FC);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 70139d0..5c97f48 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -209,17 +209,12 @@
 
 enum { Opt_uid, Opt_gid, };
 
-static const struct fs_parameter_spec hypfs_param_specs[] = {
+static const struct fs_parameter_spec hypfs_fs_parameters[] = {
 	fsparam_u32("gid", Opt_gid),
 	fsparam_u32("uid", Opt_uid),
 	{}
 };
 
-static const struct fs_parameter_description hypfs_fs_parameters = {
-	.name		= "hypfs",
-	.specs		= hypfs_param_specs,
-};
-
 static int hypfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct hypfs_sb_info *hypfs_info = fc->s_fs_info;
@@ -228,7 +223,7 @@
 	kgid_t gid;
 	int opt;
 
-	opt = fs_parse(fc, &hypfs_fs_parameters, param, &result);
+	opt = fs_parse(fc, hypfs_fs_parameters, param, &result);
 	if (opt < 0)
 		return opt;
 
@@ -455,7 +450,7 @@
 	.owner		= THIS_MODULE,
 	.name		= "s390_hypfs",
 	.init_fs_context = hypfs_init_fs_context,
-	.parameters	= &hypfs_fs_parameters,
+	.parameters	= hypfs_fs_parameters,
 	.kill_sb	= hypfs_kill_super
 };
 
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 2531f67..1a18d7b 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -5,22 +5,6 @@
 generated-y += unistd_nr.h
 
 generic-y += asm-offsets.h
-generic-y += cacheflush.h
-generic-y += device.h
-generic-y += dma-contiguous.h
-generic-y += dma-mapping.h
-generic-y += div64.h
-generic-y += emergency-restart.h
 generic-y += export.h
-generic-y += fb.h
-generic-y += irq_regs.h
-generic-y += irq_work.h
-generic-y += kmap_types.h
-generic-y += local.h
-generic-y += local64.h
+generic-y += kvm_types.h
 generic-y += mcs_spinlock.h
-generic-y += mm-arch-hooks.h
-generic-y += mmiowb.h
-generic-y += trace_clock.h
-generic-y += unaligned.h
-generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h
index c2cf7bc..1c8a38f 100644
--- a/arch/s390/include/asm/alternative.h
+++ b/arch/s390/include/asm/alternative.h
@@ -139,10 +139,10 @@
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, altinstr, facility)			\
-	asm volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
+	asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
 
 #define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
-	asm volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,	    \
+	asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1,   \
 				   altinstr2, facility2) ::: "memory")
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h
index c67b82d..de61ce5 100644
--- a/arch/s390/include/asm/archrandom.h
+++ b/arch/s390/include/asm/archrandom.h
@@ -21,29 +21,17 @@
 
 bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
 
-static inline bool arch_has_random(void)
+static inline bool __must_check arch_get_random_long(unsigned long *v)
 {
 	return false;
 }
 
-static inline bool arch_has_random_seed(void)
-{
-	if (static_branch_likely(&s390_arch_random_available))
-		return true;
-	return false;
-}
-
-static inline bool arch_get_random_long(unsigned long *v)
+static inline bool __must_check arch_get_random_int(unsigned int *v)
 {
 	return false;
 }
 
-static inline bool arch_get_random_int(unsigned int *v)
-{
-	return false;
-}
-
-static inline bool arch_get_random_seed_long(unsigned long *v)
+static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
 {
 	if (static_branch_likely(&s390_arch_random_available)) {
 		return s390_arch_random_generate((u8 *)v, sizeof(*v));
@@ -51,7 +39,7 @@
 	return false;
 }
 
-static inline bool arch_get_random_seed_int(unsigned int *v)
+static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
 {
 	if (static_branch_likely(&s390_arch_random_available)) {
 		return s390_arch_random_generate((u8 *)v, sizeof(*v));
diff --git a/arch/s390/include/asm/asm-const.h b/arch/s390/include/asm/asm-const.h
new file mode 100644
index 0000000..11f615e
--- /dev/null
+++ b/arch/s390/include/asm/asm-const.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ASM_CONST_H
+#define _ASM_S390_ASM_CONST_H
+
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)	__VA_ARGS__
+#else
+/* This version of stringify will deal with commas... */
+#  define __stringify_in_c(...)	#__VA_ARGS__
+#  define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+#endif
+#endif /* _ASM_S390_ASM_CONST_H */
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 491ad53..11c5952 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -15,8 +15,6 @@
 #include <asm/barrier.h>
 #include <asm/cmpxchg.h>
 
-#define ATOMIC_INIT(i)  { (i) }
-
 static inline int atomic_read(const atomic_t *v)
 {
 	int c;
@@ -47,7 +45,11 @@
 static inline void atomic_add(int i, atomic_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-	if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+	/*
+	 * Order of conditions is important to circumvent gcc 10 bug:
+	 * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+	 */
+	if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
 		__atomic_add_const(i, &v->counter);
 		return;
 	}
@@ -114,7 +116,11 @@
 static inline void atomic64_add(s64 i, atomic64_t *v)
 {
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-	if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
+	/*
+	 * Order of conditions is important to circumvent gcc 10 bug:
+	 * https://gcc.gnu.org/pipermail/gcc-patches/2020-July/549318.html
+	 */
+	if ((i > -129) && (i < 128) && __builtin_constant_p(i)) {
 		__atomic64_add_const(i, (long *)&v->counter);
 		return;
 	}
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index eb7eed4..431e208 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -241,7 +241,9 @@
 	arch___clear_bit(nr, ptr);
 }
 
-#include <asm-generic/bitops-instrumented.h>
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-non-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
 
 /*
  * Functions which use MSB0 bit numbering.
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 713fc97..0b25f28 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -2,23 +2,22 @@
 #ifndef _ASM_S390_BUG_H
 #define _ASM_S390_BUG_H
 
-#include <linux/kernel.h>
+#include <linux/compiler.h>
 
 #ifdef CONFIG_BUG
 
 #ifdef CONFIG_DEBUG_BUGVERBOSE
 
 #define __EMIT_BUG(x) do {					\
-	asm volatile(						\
-		"0:	j	0b+2\n"				\
-		"1:\n"						\
+	asm_inline volatile(					\
+		"0:	mc	0,0\n"				\
 		".section .rodata.str,\"aMS\",@progbits,1\n"	\
-		"2:	.asciz	\""__FILE__"\"\n"		\
+		"1:	.asciz	\""__FILE__"\"\n"		\
 		".previous\n"					\
 		".section __bug_table,\"awM\",@progbits,%2\n"	\
-		"3:	.long	1b-3b,2b-3b\n"			\
+		"2:	.long	0b-2b,1b-2b\n"			\
 		"	.short	%0,%1\n"			\
-		"	.org	3b+%2\n"			\
+		"	.org	2b+%2\n"			\
 		".previous\n"					\
 		: : "i" (__LINE__),				\
 		    "i" (x),					\
@@ -28,13 +27,12 @@
 #else /* CONFIG_DEBUG_BUGVERBOSE */
 
 #define __EMIT_BUG(x) do {					\
-	asm volatile(						\
-		"0:	j	0b+2\n"				\
-		"1:\n"						\
+	asm_inline volatile(					\
+		"0:	mc	0,0\n"				\
 		".section __bug_table,\"awM\",@progbits,%1\n"	\
-		"2:	.long	1b-2b\n"			\
+		"1:	.long	0b-1b\n"			\
 		"	.short	%0\n"				\
-		"	.org	2b+%1\n"			\
+		"	.org	1b+%1\n"			\
 		".previous\n"					\
 		: : "i" (x),					\
 		    "i" (sizeof(struct bug_entry)));		\
diff --git a/arch/s390/include/asm/cache.h b/arch/s390/include/asm/cache.h
index d5e22e8..0012817 100644
--- a/arch/s390/include/asm/cache.h
+++ b/arch/s390/include/asm/cache.h
@@ -14,6 +14,6 @@
 #define L1_CACHE_SHIFT     8
 #define NET_SKB_PAD	   32
 
-#define __read_mostly __section(.data..read_mostly)
+#define __read_mostly __section(".data..read_mostly")
 
 #endif
diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h
index 865ce1c..c0be5fe 100644
--- a/arch/s390/include/asm/ccwdev.h
+++ b/arch/s390/include/asm/ccwdev.h
@@ -11,6 +11,7 @@
 
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
+#include <asm/chsc.h>
 #include <asm/fcx.h>
 #include <asm/irq.h>
 #include <asm/schid.h>
@@ -236,4 +237,11 @@
 
 struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int);
 u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx);
+int ccw_device_pnso(struct ccw_device *cdev,
+		    struct chsc_pnso_area *pnso_area, u8 oc,
+		    struct chsc_pnso_resume_token resume_token, int cnc);
+int ccw_device_get_cssid(struct ccw_device *cdev, u8 *cssid);
+int ccw_device_get_iid(struct ccw_device *cdev, u8 *iid);
+int ccw_device_get_chpid(struct ccw_device *cdev, int chp_idx, u8 *chpid);
+int ccw_device_get_chid(struct ccw_device *cdev, int chp_idx, u16 *chid);
 #endif /* _S390_CCWDEV_H_ */
diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 7293c13..ad3acb1 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -36,11 +36,6 @@
  * @set_online: function called when device is set online
  * @set_offline: function called when device is set offline
  * @shutdown: function called when device is shut down
- * @prepare: prepare for pm state transition
- * @complete: undo work done in @prepare
- * @freeze: callback for freezing during hibernation snapshotting
- * @thaw: undo work done in @freeze
- * @restore: callback for restoring after hibernation
  * @driver: embedded driver structure
  * @ccw_driver: supported ccw_driver (optional)
  */
@@ -50,11 +45,6 @@
 	int (*set_online) (struct ccwgroup_device *);
 	int (*set_offline) (struct ccwgroup_device *);
 	void (*shutdown)(struct ccwgroup_device *);
-	int (*prepare) (struct ccwgroup_device *);
-	void (*complete) (struct ccwgroup_device *);
-	int (*freeze)(struct ccwgroup_device *);
-	int (*thaw) (struct ccwgroup_device *);
-	int (*restore)(struct ccwgroup_device *);
 
 	struct device_driver driver;
 	struct ccw_driver *ccw_driver;
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 91e376b..a8c02cf 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -13,21 +13,21 @@
 #define _S390_CHECKSUM_H
 
 #include <linux/uaccess.h>
+#include <linux/in6.h>
 
 /*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
  *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
+ * Returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic.
  *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
+ * This function must be called with even lengths, except
+ * for the last fragment, which may be odd.
  *
- * it's best to have buff aligned on a 32-bit boundary
+ * It's best to have buff aligned on a 32-bit boundary.
  */
-static inline __wsum
-csum_partial(const void *buff, int len, __wsum sum)
+static inline __wsum csum_partial(const void *buff, int len, __wsum sum)
 {
 	register unsigned long reg2 asm("2") = (unsigned long) buff;
 	register unsigned long reg3 asm("3") = (unsigned long) len;
@@ -40,100 +40,91 @@
 }
 
 /*
- * the same as csum_partial_copy, but copies from user space.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- *
- * Copy from userspace and compute checksum.
- */
-static inline __wsum
-csum_partial_copy_from_user(const void __user *src, void *dst,
-                                          int len, __wsum sum,
-                                          int *err_ptr)
-{
-	if (unlikely(copy_from_user(dst, src, len)))
-		*err_ptr = -EFAULT;
-	return csum_partial(dst, len, sum);
-}
-
-
-static inline __wsum
-csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
-{
-        memcpy(dst,src,len);
-	return csum_partial(dst, len, sum);
-}
-
-/*
- *      Fold a partial checksum without adding pseudo headers
+ * Fold a partial checksum without adding pseudo headers.
  */
 static inline __sum16 csum_fold(__wsum sum)
 {
 	u32 csum = (__force u32) sum;
 
-	csum += (csum >> 16) + (csum << 16);
+	csum += (csum >> 16) | (csum << 16);
 	csum >>= 16;
 	return (__force __sum16) ~csum;
 }
 
 /*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
- *
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksums on 4 octet boundaries.
  */
 static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
-	return csum_fold(csum_partial(iph, ihl*4, 0));
+	__u64 csum = 0;
+	__u32 *ptr = (u32 *)iph;
+
+	csum += *ptr++;
+	csum += *ptr++;
+	csum += *ptr++;
+	csum += *ptr++;
+	ihl -= 4;
+	while (ihl--)
+		csum += *ptr++;
+	csum += (csum >> 32) | (csum << 32);
+	return csum_fold((__force __wsum)(csum >> 32));
 }
 
 /*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 32-bit checksum
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 32-bit checksum.
  */
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
-                   __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
 {
-	__u32 csum = (__force __u32)sum;
+	__u64 csum = (__force __u64)sum;
 
 	csum += (__force __u32)saddr;
-	if (csum < (__force __u32)saddr)
-		csum++;
-
 	csum += (__force __u32)daddr;
-	if (csum < (__force __u32)daddr)
-		csum++;
-
-	csum += len + proto;
-	if (csum < len + proto)
-		csum++;
-
-	return (__force __wsum)csum;
+	csum += len;
+	csum += proto;
+	csum += (csum >> 32) | (csum << 32);
+	return (__force __wsum)(csum >> 32);
 }
 
 /*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
+ * Computes the checksum of the TCP/UDP pseudo-header.
+ * Returns a 16-bit checksum, already complemented.
  */
-
-static inline __sum16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto,
-                  __wsum sum)
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+					__u8 proto, __wsum sum)
 {
-	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+	return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }
 
 /*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
+ * Used for miscellaneous IP-like checksums, mainly icmp.
  */
-
 static inline __sum16 ip_compute_csum(const void *buff, int len)
 {
 	return csum_fold(csum_partial(buff, len, 0));
 }
 
+#define _HAVE_ARCH_IPV6_CSUM
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+				      const struct in6_addr *daddr,
+				      __u32 len, __u8 proto, __wsum csum)
+{
+	__u64 sum = (__force __u64)csum;
+
+	sum += (__force __u32)saddr->s6_addr32[0];
+	sum += (__force __u32)saddr->s6_addr32[1];
+	sum += (__force __u32)saddr->s6_addr32[2];
+	sum += (__force __u32)saddr->s6_addr32[3];
+	sum += (__force __u32)daddr->s6_addr32[0];
+	sum += (__force __u32)daddr->s6_addr32[1];
+	sum += (__force __u32)daddr->s6_addr32[2];
+	sum += (__force __u32)daddr->s6_addr32[3];
+	sum += len;
+	sum += proto;
+	sum += (sum >> 32) | (sum << 32);
+	return csum_fold((__force __wsum)(sum >> 32));
+}
+
 #endif /* _S390_CHECKSUM_H */
-
-
diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h
new file mode 100644
index 0000000..ae4d254
--- /dev/null
+++ b/arch/s390/include/asm/chsc.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s): Alexandra Winter <wintera@linux.ibm.com>
+ *
+ * Interface for Channel Subsystem Call
+ */
+#ifndef _ASM_S390_CHSC_H
+#define _ASM_S390_CHSC_H
+
+#include <uapi/asm/chsc.h>
+
+/**
+ * Operation codes for CHSC PNSO:
+ *    PNSO_OC_NET_BRIDGE_INFO - only addresses that are visible to a bridgeport
+ *    PNSO_OC_NET_ADDR_INFO   - all addresses
+ */
+#define PNSO_OC_NET_BRIDGE_INFO		0
+#define PNSO_OC_NET_ADDR_INFO		3
+/**
+ * struct chsc_pnso_naid_l2 - network address information descriptor
+ * @nit:  Network interface token
+ * @addr_lnid: network address and logical network id (VLAN ID)
+ */
+struct chsc_pnso_naid_l2 {
+	u64 nit;
+	struct { u8 mac[6]; u16 lnid; } addr_lnid;
+} __packed;
+
+struct chsc_pnso_resume_token {
+	u64 t1;
+	u64 t2;
+} __packed;
+
+struct chsc_pnso_naihdr {
+	struct chsc_pnso_resume_token resume_token;
+	u32:32;
+	u32 instance;
+	u32:24;
+	u8 naids;
+	u32 reserved[3];
+} __packed;
+
+struct chsc_pnso_area {
+	struct chsc_header request;
+	u8:2;
+	u8 m:1;
+	u8:5;
+	u8:2;
+	u8 ssid:2;
+	u8 fmt:4;
+	u16 sch;
+	u8:8;
+	u8 cssid;
+	u16:16;
+	u8 oc;
+	u32:24;
+	struct chsc_pnso_resume_token resume_token;
+	u32 n:1;
+	u32:31;
+	u32 reserved[3];
+	struct chsc_header response;
+	u32:32;
+	struct chsc_pnso_naihdr naihdr;
+	struct chsc_pnso_naid_l2 entries[0];
+} __packed __aligned(PAGE_SIZE);
+
+#endif /* _ASM_S390_CHSC_H */
diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h
index b5bfb31..5c58756 100644
--- a/arch/s390/include/asm/cio.h
+++ b/arch/s390/include/asm/cio.h
@@ -356,7 +356,6 @@
 	return 8 - ffs(mask);
 }
 
-void channel_subsystem_reinit(void);
 extern void css_schedule_reprobe(void);
 
 extern void *cio_dma_zalloc(size_t size);
@@ -372,6 +371,7 @@
 /* Function from drivers/s390/cio/chsc.c */
 int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
 int chsc_sstpi(void *page, void *result, size_t size);
+int chsc_stzi(void *page, void *result, size_t size);
 int chsc_sgib(u32 origin);
 
 #endif
diff --git a/arch/s390/include/asm/clocksource.h b/arch/s390/include/asm/clocksource.h
new file mode 100644
index 0000000..0343436
--- /dev/null
+++ b/arch/s390/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* s390-specific clocksource additions */
+
+#ifndef _ASM_S390_CLOCKSOURCE_H
+#define _ASM_S390_CLOCKSOURCE_H
+
+#endif /* _ASM_S390_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h
index 3925b0f..10919ee 100644
--- a/arch/s390/include/asm/clp.h
+++ b/arch/s390/include/asm/clp.h
@@ -5,6 +5,9 @@
 /* CLP common request & response block size */
 #define CLP_BLK_SIZE			PAGE_SIZE
 
+/* Call Logical Processor - Command Code */
+#define CLP_SLPC		0x0001
+
 #define CLP_LPS_BASE	0
 #define CLP_LPS_PCI	2
 
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 63b46e3..ea5b9c3 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -63,8 +63,6 @@
 typedef u16		compat_ipc_pid_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s64		compat_s64;
-typedef u64		compat_u64;
 
 typedef struct {
 	u32 mask;
@@ -177,11 +175,7 @@
 {
 	return (void __user *)(unsigned long)(uptr & 0x7fffffffUL);
 }
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
-	return (u32)(unsigned long)uptr;
-}
+#define compat_ptr(uptr) compat_ptr(uptr)
 
 #ifdef CONFIG_COMPAT
 
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 819803a..0d90cbe 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -313,7 +313,7 @@
 	return (unsigned long *) ret;
 }
 
-/* Return if the entry in the sample data block table (sdbt)
+/* Return true if the entry in the sample data block table (sdbt)
  * is a link to the next sdbt */
 static inline int is_link_entry(unsigned long *s)
 {
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 480bb02..638137d 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -36,7 +36,9 @@
 	u64 alt_ssi : 1; /* bit 108 */
 	u64 : 1;
 	u64 narf : 1;	 /* bit 110 */
-	u64 : 12;
+	u64 : 5;
+	u64 enarf: 1;	 /* bit 116 */
+	u64 : 6;
 	u64 util_str : 1;/* bit 123 */
 } __packed;
 
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 60f9075..ed5efbb 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -11,6 +11,7 @@
 #include <linux/bits.h>
 
 #define CR0_CLOCK_COMPARATOR_SIGN	BIT(63 - 10)
+#define CR0_LOW_ADDRESS_PROTECTION	BIT(63 - 35)
 #define CR0_EMERGENCY_SIGNAL_SUBMASK	BIT(63 - 49)
 #define CR0_EXTERNAL_CALL_SUBMASK	BIT(63 - 50)
 #define CR0_CLOCK_COMPARATOR_SUBMASK	BIT(63 - 52)
diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h
index 3101340..c1b82bc 100644
--- a/arch/s390/include/asm/debug.h
+++ b/arch/s390/include/asm/debug.h
@@ -2,7 +2,7 @@
 /*
  *   S/390 debug facility
  *
- *    Copyright IBM Corp. 1999, 2000
+ *    Copyright IBM Corp. 1999, 2020
  */
 #ifndef DEBUG_H
 #define DEBUG_H
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/time.h>
 #include <linux/refcount.h>
-#include <uapi/asm/debug.h>
+#include <linux/fs.h>
 
 #define DEBUG_MAX_LEVEL		   6  /* debug levels range from 0 to 6 */
 #define DEBUG_OFF_LEVEL		   -1 /* level where debug is switched off */
@@ -26,6 +26,16 @@
 #define DEBUG_DATA(entry) (char *)(entry + 1) /* data is stored behind */
 					      /* the entry information */
 
+#define __DEBUG_FEATURE_VERSION	   3  /* version of debug feature */
+
+struct __debug_entry {
+	unsigned long clock	: 60;
+	unsigned long exception	:  1;
+	unsigned long level	:  3;
+	void *caller;
+	unsigned short cpu;
+} __packed;
+
 typedef struct __debug_entry debug_entry_t;
 
 struct debug_view;
@@ -82,7 +92,6 @@
 };
 
 extern struct debug_view debug_hex_ascii_view;
-extern struct debug_view debug_raw_view;
 extern struct debug_view debug_sprintf_view;
 
 /* do NOT use the _common functions */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 0036eab..ca8f85b 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -298,10 +298,8 @@
 union diag318_info {
 	unsigned long val;
 	struct {
-		unsigned int cpnc : 8;
-		unsigned int cpvc_linux : 24;
-		unsigned char cpvc_distro[3];
-		unsigned char zero;
+		unsigned long cpnc : 8;
+		unsigned long cpvc : 56;
 	};
 };
 
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index ae27f75..ce0db81 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -1,12 +1,20 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __S390_EXTABLE_H
 #define __S390_EXTABLE_H
+
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+
 /*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of three addresses:
+ *
+ * - Address of an instruction that is allowed to fault.
+ * - Address at which the program should continue.
+ * - Optional address of handler that takes pt_regs * argument and runs in
+ *   interrupt context.
+ *
+ * No registers are modified, so it is entirely up to the continuation code
+ * to figure out what to do.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -17,6 +25,7 @@
 struct exception_table_entry
 {
 	int insn, fixup;
+	long handler;
 };
 
 extern struct exception_table_entry *__start_dma_ex_table;
@@ -29,6 +38,44 @@
 	return (unsigned long)&x->fixup + x->fixup;
 }
 
+typedef bool (*ex_handler_t)(const struct exception_table_entry *,
+			     struct pt_regs *);
+
+static inline ex_handler_t
+ex_fixup_handler(const struct exception_table_entry *x)
+{
+	if (likely(!x->handler))
+		return NULL;
+	return (ex_handler_t)((unsigned long)&x->handler + x->handler);
+}
+
+static inline bool ex_handle(const struct exception_table_entry *x,
+			     struct pt_regs *regs)
+{
+	ex_handler_t handler = ex_fixup_handler(x);
+
+	if (unlikely(handler))
+		return handler(x, regs);
+	regs->psw.addr = extable_fixup(x);
+	return true;
+}
+
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+static inline void swap_ex_entry_fixup(struct exception_table_entry *a,
+				       struct exception_table_entry *b,
+				       struct exception_table_entry tmp,
+				       int delta)
+{
+	a->fixup = b->fixup + delta;
+	b->fixup = tmp.fixup - delta;
+	a->handler = b->handler;
+	if (a->handler)
+		a->handler += delta;
+	b->handler = tmp.handler;
+	if (b->handler)
+		b->handler -= delta;
+}
+#define swap_ex_entry_fixup swap_ex_entry_fixup
+
 #endif
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 5e97a43..26f9144 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -29,7 +29,6 @@
 	mm_segment_t old_fs;
 
 	old_fs = enable_sacf_uaccess();
-	pagefault_disable();
 	switch (op) {
 	case FUTEX_OP_SET:
 		__futex_atomic_op("lr %2,%5\n",
@@ -54,7 +53,6 @@
 	default:
 		ret = -ENOSYS;
 	}
-	pagefault_enable();
 	disable_sacf_uaccess(old_fs);
 
 	if (!ret)
diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index 37f96b6..40264f6 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -9,6 +9,7 @@
 #ifndef _ASM_S390_GMAP_H
 #define _ASM_S390_GMAP_H
 
+#include <linux/radix-tree.h>
 #include <linux/refcount.h>
 
 /* Generic bits for GMAP notification on DAT table entry changes. */
@@ -31,6 +32,7 @@
  * @table: pointer to the page directory
  * @asce: address space control element for gmap page table
  * @pfault_enabled: defines if pfaults are applicable for the guest
+ * @guest_handle: protected virtual machine handle for the ultravisor
  * @host_to_rmap: radix tree with gmap_rmap lists
  * @children: list of shadow gmap structures
  * @pt_list: list of all page tables used in the shadow guest address space
@@ -54,6 +56,8 @@
 	unsigned long asce_end;
 	void *private;
 	bool pfault_enabled;
+	/* only set for protected virtual machines */
+	unsigned long guest_handle;
 	/* Additional data for shadow guest address spaces */
 	struct radix_tree_root host_to_rmap;
 	struct list_head children;
@@ -136,12 +140,12 @@
 
 void gmap_register_pte_notifier(struct gmap_notifier *);
 void gmap_unregister_pte_notifier(struct gmap_notifier *);
-void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
-		     unsigned long bits);
 
 int gmap_mprotect_notify(struct gmap *, unsigned long start,
 			 unsigned long len, int prot);
 
 void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
 			     unsigned long gaddr, unsigned long vmaddr);
+int gmap_mark_unmergeable(void);
+void s390_reset_acc(struct mm_struct *mm);
 #endif /* _ASM_S390_GMAP_H */
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index de8f0bf..60f9241 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -9,8 +9,8 @@
 #ifndef _ASM_S390_HUGETLB_H
 #define _ASM_S390_HUGETLB_H
 
+#include <linux/pgtable.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 
 #define hugetlb_free_pgd_range			free_pgd_range
 #define hugepages_supported()			(MACHINE_HAS_EDAT1)
@@ -21,13 +21,6 @@
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
 			      unsigned long addr, pte_t *ptep);
 
-static inline bool is_hugepage_only_range(struct mm_struct *mm,
-					  unsigned long addr,
-					  unsigned long len)
-{
-	return false;
-}
-
 /*
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
@@ -46,6 +39,7 @@
 {
 	clear_bit(PG_arch_1, &page->flags);
 }
+#define arch_clear_hugepage_flags arch_clear_hugepage_flags
 
 static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 				  pte_t *ptep, unsigned long sz)
diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h
index adae176..9078b5b 100644
--- a/arch/s390/include/asm/hw_irq.h
+++ b/arch/s390/include/asm/hw_irq.h
@@ -7,6 +7,5 @@
 
 void __init init_airq_interrupts(void);
 void __init init_cio_interrupts(void);
-void __init init_ext_interrupts(void);
 
 #endif
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index ca42161..28664ee 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -12,6 +12,7 @@
 
 #include <linux/kernel.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 #include <asm/pci_io.h>
 
 #define xlate_dev_mem_ptr xlate_dev_mem_ptr
@@ -26,11 +27,10 @@
 
 #define IO_SPACE_LIMIT 0
 
-#define ioremap_nocache(addr, size)	ioremap(addr, size)
-#define ioremap_wc			ioremap_nocache
-#define ioremap_wt			ioremap_nocache
-
-void __iomem *ioremap(unsigned long offset, unsigned long size);
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
+void __iomem *ioremap(phys_addr_t addr, size_t size);
+void __iomem *ioremap_wc(phys_addr_t addr, size_t size);
+void __iomem *ioremap_wt(phys_addr_t addr, size_t size);
 void iounmap(volatile void __iomem *addr);
 
 static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
@@ -56,6 +56,10 @@
 #define pci_iomap_wc pci_iomap_wc
 #define pci_iomap_wc_range pci_iomap_wc_range
 
+#define ioremap ioremap
+#define ioremap_wt ioremap_wt
+#define ioremap_wc ioremap_wc
+
 #define memcpy_fromio(dst, src, count)	zpci_memcpy_fromio(dst, src, count)
 #define memcpy_toio(dst, src, count)	zpci_memcpy_toio(dst, src, count)
 #define memset_io(dst, val, count)	zpci_memset_io(dst, val, count)
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 084e71b..a9e2c72 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -21,6 +21,7 @@
 		struct ipl_pb0_common common;
 		struct ipl_pb0_fcp fcp;
 		struct ipl_pb0_ccw ccw;
+		struct ipl_pb0_nvme nvme;
 		char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
 	};
 } __packed __aligned(PAGE_SIZE);
@@ -30,6 +31,11 @@
 #define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \
 			      sizeof(struct ipl_pb0_fcp))
 #define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp))
+
+#define IPL_BP_NVME_LEN (sizeof(struct ipl_pl_hdr) + \
+			      sizeof(struct ipl_pb0_nvme))
+#define IPL_BP0_NVME_LEN (sizeof(struct ipl_pb0_nvme))
+
 #define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \
 			      sizeof(struct ipl_pb0_ccw))
 #define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
@@ -59,6 +65,8 @@
 	IPL_TYPE_FCP		= 4,
 	IPL_TYPE_FCP_DUMP	= 8,
 	IPL_TYPE_NSS		= 16,
+	IPL_TYPE_NVME		= 32,
+	IPL_TYPE_NVME_DUMP	= 64,
 };
 
 struct ipl_info
@@ -74,6 +82,10 @@
 			u64 lun;
 		} fcp;
 		struct {
+			u32 fid;
+			u32 nsid;
+		} nvme;
+		struct {
 			char name[NSS_NAME_SIZE + 1];
 		} nss;
 	} data;
@@ -83,6 +95,12 @@
 extern void setup_ipl(void);
 extern void set_os_info_reipl_block(void);
 
+static inline bool is_ipl_type_dump(void)
+{
+	return (ipl_info.type == IPL_TYPE_FCP_DUMP) ||
+		(ipl_info.type == IPL_TYPE_NVME_DUMP);
+}
+
 struct ipl_report {
 	struct ipl_parameter_block *ipib;
 	struct list_head components;
@@ -119,6 +137,7 @@
 	DIAG308_LOAD_NORMAL_DUMP = 4,
 	DIAG308_SET = 5,
 	DIAG308_STORE = 6,
+	DIAG308_LOAD_NORMAL = 7,
 };
 
 enum diag308_rc {
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 39f747d..dcb1bba 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -10,7 +10,9 @@
 #define JUMP_LABEL_NOP_SIZE 6
 #define JUMP_LABEL_NOP_OFFSET 2
 
-#if __GNUC__ < 9
+#ifdef CONFIG_CC_IS_CLANG
+#define JUMP_LABEL_STATIC_KEY_CONSTRAINT "i"
+#elif __GNUC__ < 9
 #define JUMP_LABEL_STATIC_KEY_CONSTRAINT "X"
 #else
 #define JUMP_LABEL_STATIC_KEY_CONSTRAINT "jdd"
diff --git a/arch/s390/include/asm/kasan.h b/arch/s390/include/asm/kasan.h
index 70930fe..e9bf486 100644
--- a/arch/s390/include/asm/kasan.h
+++ b/arch/s390/include/asm/kasan.h
@@ -2,8 +2,6 @@
 #ifndef __ASM_KASAN_H
 #define __ASM_KASAN_H
 
-#include <asm/pgtable.h>
-
 #ifdef CONFIG_KASAN
 
 #define KASAN_SHADOW_SCALE_SHIFT 3
@@ -21,6 +19,7 @@
 extern void kasan_early_init(void);
 extern void kasan_copy_shadow(pgd_t *dst);
 extern void kasan_free_early_identity(void);
+extern unsigned long kasan_vmax;
 #else
 static inline void kasan_early_init(void) { }
 static inline void kasan_copy_shadow(pgd_t *dst) { }
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index ea398a0..7f3c9ac 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -74,6 +74,12 @@
 int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
 			 unsigned long addr);
 
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	void *ipl_buf;
+};
+
 extern const struct kexec_file_ops s390_kexec_image_ops;
 extern const struct kexec_file_ops s390_kexec_elf_ops;
 
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index b106aa2..09cdb63 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -54,7 +54,6 @@
 struct arch_specific_insn {
 	/* copy of original instruction */
 	kprobe_opcode_t *insn;
-	unsigned int is_ftrace_insn : 1;
 };
 
 struct prev_kprobe {
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 4d59d11..171913b 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -122,6 +122,17 @@
 	__u32 reserved;
 };
 
+#define CR0_INITIAL_MASK (CR0_UNUSED_56 | CR0_INTERRUPT_KEY_SUBMASK | \
+			  CR0_MEASUREMENT_ALERT_SUBMASK)
+#define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
+			   CR14_EXTERNAL_DAMAGE_SUBMASK)
+
+#define SIDAD_SIZE_MASK		0xff
+#define sida_origin(sie_block) \
+	((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+	((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
@@ -155,7 +166,13 @@
 	__u8	reserved08[4];		/* 0x0008 */
 #define PROG_IN_SIE (1<<0)
 	__u32	prog0c;			/* 0x000c */
-	__u8	reserved10[16];		/* 0x0010 */
+	union {
+		__u8	reserved10[16];		/* 0x0010 */
+		struct {
+			__u64	pv_handle_cpu;
+			__u64	pv_handle_config;
+		};
+	};
 #define PROG_BLOCK_SIE	(1<<0)
 #define PROG_REQUEST	(1<<1)
 	atomic_t prog20;		/* 0x0020 */
@@ -204,10 +221,23 @@
 #define ICPT_PARTEXEC	0x38
 #define ICPT_IOINST	0x40
 #define ICPT_KSS	0x5c
+#define ICPT_MCHKREQ	0x60
+#define ICPT_INT_ENABLE	0x64
+#define ICPT_PV_INSTR	0x68
+#define ICPT_PV_NOTIFY	0x6c
+#define ICPT_PV_PREF	0x70
 	__u8	icptcode;		/* 0x0050 */
 	__u8	icptstatus;		/* 0x0051 */
 	__u16	ihcpu;			/* 0x0052 */
-	__u8	reserved54[2];		/* 0x0054 */
+	__u8	reserved54;		/* 0x0054 */
+#define IICTL_CODE_NONE		 0x00
+#define IICTL_CODE_MCHK		 0x01
+#define IICTL_CODE_EXT		 0x02
+#define IICTL_CODE_IO		 0x03
+#define IICTL_CODE_RESTART	 0x04
+#define IICTL_CODE_SPECIFICATION 0x10
+#define IICTL_CODE_OPERAND	 0x11
+	__u8	iictl;			/* 0x0055 */
 	__u16	ipa;			/* 0x0056 */
 	__u32	ipb;			/* 0x0058 */
 	__u32	scaoh;			/* 0x005c */
@@ -228,9 +258,10 @@
 #define ECB3_RI  0x01
 	__u8    ecb3;			/* 0x0063 */
 	__u32	scaol;			/* 0x0064 */
-	__u8	reserved68;		/* 0x0068 */
+	__u8	sdf;			/* 0x0068 */
 	__u8    epdx;			/* 0x0069 */
-	__u8    reserved6a[2];		/* 0x006a */
+	__u8	cpnc;			/* 0x006a */
+	__u8	reserved6b;		/* 0x006b */
 	__u32	todpr;			/* 0x006c */
 #define GISA_FORMAT1 0x00000001
 	__u32	gd;			/* 0x0070 */
@@ -244,31 +275,58 @@
 #define HPID_KVM	0x4
 #define HPID_VSIE	0x5
 	__u8	hpid;			/* 0x00b8 */
-	__u8	reservedb9[11];		/* 0x00b9 */
-	__u16	extcpuaddr;		/* 0x00c4 */
-	__u16	eic;			/* 0x00c6 */
+	__u8	reservedb9[7];		/* 0x00b9 */
+	union {
+		struct {
+			__u32	eiparams;	/* 0x00c0 */
+			__u16	extcpuaddr;	/* 0x00c4 */
+			__u16	eic;		/* 0x00c6 */
+		};
+		__u64	mcic;			/* 0x00c0 */
+	} __packed;
 	__u32	reservedc8;		/* 0x00c8 */
-	__u16	pgmilc;			/* 0x00cc */
-	__u16	iprcc;			/* 0x00ce */
-	__u32	dxc;			/* 0x00d0 */
-	__u16	mcn;			/* 0x00d4 */
-	__u8	perc;			/* 0x00d6 */
-	__u8	peratmid;		/* 0x00d7 */
+	union {
+		struct {
+			__u16	pgmilc;		/* 0x00cc */
+			__u16	iprcc;		/* 0x00ce */
+		};
+		__u32	edc;			/* 0x00cc */
+	} __packed;
+	union {
+		struct {
+			__u32	dxc;		/* 0x00d0 */
+			__u16	mcn;		/* 0x00d4 */
+			__u8	perc;		/* 0x00d6 */
+			__u8	peratmid;	/* 0x00d7 */
+		};
+		__u64	faddr;			/* 0x00d0 */
+	} __packed;
 	__u64	peraddr;		/* 0x00d8 */
 	__u8	eai;			/* 0x00e0 */
 	__u8	peraid;			/* 0x00e1 */
 	__u8	oai;			/* 0x00e2 */
 	__u8	armid;			/* 0x00e3 */
 	__u8	reservede4[4];		/* 0x00e4 */
-	__u64	tecmc;			/* 0x00e8 */
-	__u8	reservedf0[12];		/* 0x00f0 */
+	union {
+		__u64	tecmc;		/* 0x00e8 */
+		struct {
+			__u16	subchannel_id;	/* 0x00e8 */
+			__u16	subchannel_nr;	/* 0x00ea */
+			__u32	io_int_parm;	/* 0x00ec */
+			__u32	io_int_word;	/* 0x00f0 */
+		};
+	} __packed;
+	__u8	reservedf4[8];		/* 0x00f4 */
 #define CRYCB_FORMAT_MASK 0x00000003
 #define CRYCB_FORMAT0 0x00000000
 #define CRYCB_FORMAT1 0x00000001
 #define CRYCB_FORMAT2 0x00000003
 	__u32	crycbd;			/* 0x00fc */
 	__u64	gcr[16];		/* 0x0100 */
-	__u64	gbea;			/* 0x0180 */
+	union {
+		__u64	gbea;		/* 0x0180 */
+		__u64	sidad;
+	};
 	__u8    reserved188[8];		/* 0x0188 */
 	__u64   sdnxo;			/* 0x0190 */
 	__u8    reserved198[8];		/* 0x0198 */
@@ -287,7 +345,7 @@
 	__u64	itdba;			/* 0x01e8 */
 	__u64   riccbd;			/* 0x01f0 */
 	__u64	gvrd;			/* 0x01f8 */
-} __attribute__((packed));
+} __packed __aligned(512);
 
 struct kvm_s390_itdb {
 	__u8	data[256];
@@ -296,7 +354,9 @@
 struct sie_page {
 	struct kvm_s390_sie_block sie_block;
 	struct mcck_volatile_info mcck_info;	/* 0x0200 */
-	__u8 reserved218[1000];		/* 0x0218 */
+	__u8 reserved218[360];		/* 0x0218 */
+	__u64 pv_grregs[16];		/* 0x0380 */
+	__u8 reserved400[512];		/* 0x0400 */
 	struct kvm_s390_itdb itdb;	/* 0x0600 */
 	__u8 reserved700[2304];		/* 0x0700 */
 };
@@ -316,6 +376,8 @@
 	u64 halt_poll_invalid;
 	u64 halt_no_poll_steal;
 	u64 halt_wakeup;
+	u64 halt_poll_success_ns;
+	u64 halt_poll_fail_ns;
 	u64 instruction_lctl;
 	u64 instruction_lctlg;
 	u64 instruction_stctl;
@@ -392,6 +454,7 @@
 	u64 diagnose_10;
 	u64 diagnose_44;
 	u64 diagnose_9c;
+	u64 diagnose_9c_ignored;
 	u64 diagnose_258;
 	u64 diagnose_308;
 	u64 diagnose_500;
@@ -470,6 +533,7 @@
 	IRQ_PEND_PFAULT_INIT,
 	IRQ_PEND_EXT_HOST,
 	IRQ_PEND_EXT_SERVICE,
+	IRQ_PEND_EXT_SERVICE_EV,
 	IRQ_PEND_EXT_TIMING,
 	IRQ_PEND_EXT_CPU_TIMER,
 	IRQ_PEND_EXT_CLOCK_COMP,
@@ -514,6 +578,7 @@
 			   (1UL << IRQ_PEND_EXT_TIMING)     | \
 			   (1UL << IRQ_PEND_EXT_HOST)       | \
 			   (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			   (1UL << IRQ_PEND_EXT_SERVICE_EV) | \
 			   (1UL << IRQ_PEND_VIRTIO)         | \
 			   (1UL << IRQ_PEND_PFAULT_INIT)    | \
 			   (1UL << IRQ_PEND_PFAULT_DONE))
@@ -530,6 +595,13 @@
 #define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
 			    (1UL << IRQ_PEND_MCHK_EX))
 
+#define IRQ_PEND_EXT_II_MASK ((1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			      (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			      (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			      (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			      (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			      (1UL << IRQ_PEND_EXT_SERVICE_EV))
+
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	u64	type;
@@ -588,6 +660,7 @@
 
 struct kvm_s390_float_interrupt {
 	unsigned long pending_irqs;
+	unsigned long masked_irqs;
 	spinlock_t lock;
 	struct list_head lists[FIRQ_LIST_COUNT];
 	int counters[FIRQ_MAX_COUNT];
@@ -639,6 +712,11 @@
 	unsigned long last_bp;
 };
 
+struct kvm_s390_pv_vcpu {
+	u64 handle;
+	unsigned long stor_base;
+};
+
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
 	/* if vsie is active, currently executed shadow sie control block */
@@ -667,6 +745,8 @@
 	__u64 cputm_start;
 	bool gs_enabled;
 	bool skey_enabled;
+	struct kvm_s390_pv_vcpu pv;
+	union diag318_info diag318_info;
 };
 
 struct kvm_vm_stat {
@@ -695,9 +775,6 @@
 	bool masked;
 	bool swap;
 	bool suppressible;
-	struct rw_semaphore maps_lock;
-	struct list_head maps;
-	atomic_t nr_maps;
 };
 
 #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
@@ -840,6 +917,13 @@
 	DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
 };
 
+struct kvm_s390_pv {
+	u64 handle;
+	u64 guest_len;
+	unsigned long stor_base;
+	void *stor_var;
+};
+
 struct kvm_arch{
 	void *sca;
 	int use_esca;
@@ -876,6 +960,7 @@
 	/* indexed by vcpu_idx */
 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
 	struct kvm_s390_gisa_interrupt gisa_int;
+	struct kvm_s390_pv pv;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
@@ -891,17 +976,19 @@
 	unsigned long pfault_token;
 };
 
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
 
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
 			       struct kvm_async_pf *work);
 
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work);
 
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 				 struct kvm_async_pf *work);
 
+static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {}
+
 void kvm_arch_crypto_clear_masks(struct kvm *kvm);
 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 			       unsigned long *aqm, unsigned long *adm);
@@ -914,10 +1001,9 @@
 
 static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
-		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+					 struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 7f22262..a0a7a2c 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -2,38 +2,27 @@
 #ifndef __ASM_LINKAGE_H
 #define __ASM_LINKAGE_H
 
+#include <asm/asm-const.h>
 #include <linux/stringify.h>
 
 #define __ALIGN .align 4, 0x07
 #define __ALIGN_STR __stringify(__ALIGN)
 
-#ifndef __ASSEMBLY__
-
 /*
  * Helper macro for exception table entries
  */
-#define EX_TABLE(_fault, _target)	\
-	".section __ex_table,\"a\"\n"	\
-	".align	4\n"			\
-	".long	(" #_fault ") - .\n"	\
-	".long	(" #_target ") - .\n"	\
-	".previous\n"
 
-#else /* __ASSEMBLY__ */
+#define __EX_TABLE(_section, _fault, _target)				\
+	stringify_in_c(.section	_section,"a";)				\
+	stringify_in_c(.align	8;)					\
+	stringify_in_c(.long	(_fault) - .;)				\
+	stringify_in_c(.long	(_target) - .;)				\
+	stringify_in_c(.quad	0;)					\
+	stringify_in_c(.previous)
 
-#define EX_TABLE(_fault, _target)	\
-	.section __ex_table,"a"	;	\
-	.align	4 ;			\
-	.long	(_fault) - . ;		\
-	.long	(_target) - . ;		\
-	.previous
+#define EX_TABLE(_fault, _target)					\
+	__EX_TABLE(__ex_table, _fault, _target)
+#define EX_TABLE_DMA(_fault, _target)					\
+	__EX_TABLE(.dma.ex_table, _fault, _target)
 
-#define EX_TABLE_DMA(_fault, _target)	\
-	.section .dma.ex_table, "a" ;	\
-	.align	4 ;			\
-	.long	(_fault) - . ;		\
-	.long	(_target) - . ;		\
-	.previous
-
-#endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bcfb637..e12ff0f 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,6 +16,8 @@
 	unsigned long asce;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
+	/* The mmu context belongs to a secure guest. */
+	atomic_t is_protected;
 	/*
 	 * The following bitfields need a down_write on the mm
 	 * semaphore when they are written to. As they are only
@@ -32,8 +34,6 @@
 	unsigned int uses_cmm:1;
 	/* The gmaps associated with this context are allowed to use huge pages. */
 	unsigned int allow_gmap_hpage_1m:1;
-	/* The mmu context is for compat task */
-	unsigned int compat_mm:1;
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8d04e6f..c9f3d8a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -18,14 +18,16 @@
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	unsigned long asce_type, init_entry;
+
 	spin_lock_init(&mm->context.lock);
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
+	atomic_set(&mm->context.is_protected, 0);
 	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
-	mm->context.compat_mm = test_thread_flag(TIF_31BIT);
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste ||
 		test_thread_flag(TIF_PGSTE) ||
@@ -36,33 +38,34 @@
 	mm->context.allow_gmap_hpage_1m = 0;
 #endif
 	switch (mm->context.asce_limit) {
-	case _REGION2_SIZE:
+	default:
 		/*
-		 * forked 3-level task, fall through to set new asce with new
-		 * mm->pgd
+		 * context created by exec, the value of asce_limit can
+		 * only be zero in this case
 		 */
-	case 0:
-		/* context created by exec, set asce limit to 4TB */
-		mm->context.asce_limit = STACK_TOP_MAX;
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+		VM_BUG_ON(mm->context.asce_limit);
+		/* continue as 3-level task */
+		mm->context.asce_limit = _REGION2_SIZE;
+		fallthrough;
+	case _REGION2_SIZE:
+		/* forked 3-level task */
+		init_entry = _REGION3_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION3;
 		break;
-	case -PAGE_SIZE:
-		/* forked 5-level task, set new asce with new_mm->pgd */
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-			_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+	case TASK_SIZE_MAX:
+		/* forked 5-level task */
+		init_entry = _REGION1_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION1;
 		break;
 	case _REGION1_SIZE:
-		/* forked 4-level task, set new asce with new mm->pgd */
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				   _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+		/* forked 4-level task */
+		init_entry = _REGION2_ENTRY_EMPTY;
+		asce_type = _ASCE_TYPE_REGION2;
 		break;
-	case _REGION3_SIZE:
-		/* forked 2-level compat task, set new asce with new mm->pgd */
-		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
 	}
-	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
+	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			   _ASCE_USER_BITS | asce_type;
+	crst_table_init((unsigned long *) mm->pgd, init_entry);
 	return 0;
 }
 
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b160da8..5afee80 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -99,7 +99,7 @@
 void nmi_free_per_cpu(struct lowcore *lc);
 
 void s390_handle_mcck(void);
-void s390_do_machine_check(struct pt_regs *regs);
+int s390_do_machine_check(struct pt_regs *regs);
 
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_NMI_H */
diff --git a/arch/s390/include/asm/numa.h b/arch/s390/include/asm/numa.h
index c759dcf..23cd5d1 100644
--- a/arch/s390/include/asm/numa.h
+++ b/arch/s390/include/asm/numa.h
@@ -13,23 +13,13 @@
 #ifdef CONFIG_NUMA
 
 #include <linux/numa.h>
-#include <linux/cpumask.h>
 
 void numa_setup(void);
-int numa_pfn_to_nid(unsigned long pfn);
-void numa_update_cpu_topology(void);
-
-extern cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-extern int numa_debug_enabled;
 
 #else
 
 static inline void numa_setup(void) { }
-static inline void numa_update_cpu_topology(void) { }
-static inline int numa_pfn_to_nid(unsigned long pfn)
-{
-	return 0;
-}
 
 #endif /* CONFIG_NUMA */
+
 #endif /* _ASM_S390_NUMA_H */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index e399102..cc98f9b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -153,6 +153,11 @@
 #define HAVE_ARCH_FREE_PAGE
 #define HAVE_ARCH_ALLOC_PAGE
 
+#if IS_ENABLED(CONFIG_PGSTE)
+int arch_make_page_accessible(struct page *page);
+#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #define __PAGE_OFFSET		0x0UL
@@ -161,25 +166,22 @@
 #define __pa(x)			((unsigned long)(x))
 #define __va(x)			((void *)(unsigned long)(x))
 
-#define virt_to_pfn(kaddr)	(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn)	__va((pfn) << PAGE_SHIFT)
+#define phys_to_pfn(phys)	((phys) >> PAGE_SHIFT)
+#define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+
+#define phys_to_page(phys)	pfn_to_page(phys_to_pfn(phys))
+#define page_to_phys(page)	pfn_to_phys(page_to_pfn(page))
+
+#define pfn_to_virt(pfn)	__va(pfn_to_phys(pfn))
+#define virt_to_pfn(kaddr)	(phys_to_pfn(__pa(kaddr)))
 #define pfn_to_kaddr(pfn)	pfn_to_virt(pfn)
 
 #define virt_to_page(kaddr)	pfn_to_page(virt_to_pfn(kaddr))
 #define page_to_virt(page)	pfn_to_virt(page_to_pfn(page))
 
-#define phys_to_pfn(kaddr)	((kaddr) >> PAGE_SHIFT)
-#define pfn_to_phys(pfn)	((pfn) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr)	pfn_valid(virt_to_pfn(kaddr))
 
-#define phys_to_page(kaddr)	pfn_to_page(phys_to_pfn(kaddr))
-#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
-
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-
-#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | \
-				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define ARCH_ZONE_DMA_BITS	31
+#define VM_DATA_DEFAULT_FLAGS	VM_DATA_FLAGS_NON_EXEC
 
 #include <asm-generic/memory_model.h>
 #include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 6087a4e..1226971 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -2,12 +2,10 @@
 #ifndef __ASM_S390_PCI_H
 #define __ASM_S390_PCI_H
 
-/* must be set before including pci_clp.h */
-#define PCI_BAR_COUNT	6
-
 #include <linux/pci.h>
 #include <linux/mutex.h>
 #include <linux/iommu.h>
+#include <linux/pci_hotplug.h>
 #include <asm-generic/pci.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
@@ -24,10 +22,16 @@
 int pci_proc_domain(struct pci_bus *);
 
 #define ZPCI_BUS_NR			0	/* default bus number */
-#define ZPCI_DEVFN			0	/* default device number */
 
 #define ZPCI_NR_DMA_SPACES		1
 #define ZPCI_NR_DEVICES			CONFIG_PCI_NR_FUNCTIONS
+#define ZPCI_DOMAIN_BITMAP_SIZE		(1 << 16)
+
+#ifdef PCI
+#if (ZPCI_NR_DEVICES > ZPCI_DOMAIN_BITMAP_SIZE)
+# error ZPCI_NR_DEVICES can not be bigger than ZPCI_DOMAIN_BITMAP_SIZE
+#endif
+#endif /* PCI */
 
 /* PCI Function Controls */
 #define ZPCI_FC_FN_ENABLED		0x80
@@ -95,10 +99,27 @@
 
 struct s390_domain;
 
+#define ZPCI_FUNCTIONS_PER_BUS 256
+struct zpci_bus {
+	struct kref		kref;
+	struct pci_bus		*bus;
+	struct zpci_dev		*function[ZPCI_FUNCTIONS_PER_BUS];
+	struct list_head	resources;
+	struct list_head	bus_next;
+	struct resource		bus_resource;
+	int			pchid;
+	int			domain_nr;
+	bool			multifunction;
+	enum pci_bus_speed	max_bus_speed;
+};
+
 /* Private data per function */
 struct zpci_dev {
-	struct pci_bus	*bus;
+	struct zpci_bus *zbus;
 	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
+	struct list_head bus_next;
+	struct kref kref;
+	struct hotplug_slot hotplug_slot;
 
 	enum zpci_state state;
 	u32		fid;		/* function ID, used by sclp */
@@ -107,7 +128,13 @@
 	u16		pchid;		/* physical channel ID */
 	u8		pfgid;		/* function group ID */
 	u8		pft;		/* pci function type */
-	u16		domain;
+	u8		port;
+	u8		rid_available	: 1;
+	u8		has_hp_slot	: 1;
+	u8		is_physfn	: 1;
+	u8		util_str_avail	: 1;
+	u8		reserved	: 4;
+	unsigned int	devfn;		/* DEVFN part of the RID*/
 
 	struct mutex lock;
 	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
@@ -138,7 +165,7 @@
 
 	char res_name[16];
 	bool mio_capable;
-	struct zpci_bar_struct bars[PCI_BAR_COUNT];
+	struct zpci_bar_struct bars[PCI_STD_NUM_BARS];
 
 	u64		start_dma;	/* Start of available DMA addresses */
 	u64		end_dma;	/* End of available DMA addresses */
@@ -153,6 +180,7 @@
 	atomic64_t mapped_pages;
 	atomic64_t unmapped_pages;
 
+	u8		version;
 	enum pci_bus_speed max_bus_speed;
 
 	struct dentry	*debugfs_dev;
@@ -167,28 +195,34 @@
 
 extern const struct attribute_group *zpci_attr_groups[];
 extern unsigned int s390_pci_force_floating __initdata;
+extern unsigned int s390_pci_no_rid;
 
 /* -----------------------------------------------------------------------------
   Prototypes
 ----------------------------------------------------------------------------- */
 /* Base stuff */
-int zpci_create_device(struct zpci_dev *);
-void zpci_remove_device(struct zpci_dev *zdev);
+int zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error);
 int zpci_enable_device(struct zpci_dev *);
 int zpci_disable_device(struct zpci_dev *);
+void zpci_device_reserved(struct zpci_dev *zdev);
+bool zpci_is_device_configured(struct zpci_dev *zdev);
+
 int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 void zpci_remove_reserved_devices(void);
 
 /* CLP */
+int clp_setup_writeback_mio(void);
 int clp_scan_pci_devices(void);
-int clp_rescan_pci_devices(void);
-int clp_rescan_pci_devices_simple(u32 *fid);
-int clp_add_pci_device(u32, u32, int);
+int clp_query_pci_fn(struct zpci_dev *zdev);
 int clp_enable_fh(struct zpci_dev *, u8);
 int clp_disable_fh(struct zpci_dev *);
 int clp_get_state(u32 fid, enum zpci_state *state);
 
+/* UID */
+void update_uid_checking(bool new);
+
 /* IOMMU Interface */
 int zpci_init_iommu(struct zpci_dev *zdev);
 void zpci_destroy_iommu(struct zpci_dev *zdev);
@@ -202,12 +236,10 @@
 /* Error handling and recovery */
 void zpci_event_error(void *);
 void zpci_event_availability(void *);
-void zpci_rescan(void);
 bool zpci_is_enabled(void);
 #else /* CONFIG_PCI */
 static inline void zpci_event_error(void *e) {}
 static inline void zpci_event_availability(void *e) {}
-static inline void zpci_rescan(void) {}
 #endif /* CONFIG_PCI */
 
 #ifdef CONFIG_HOTPLUG_PCI_S390
@@ -224,7 +256,14 @@
 /* Helpers */
 static inline struct zpci_dev *to_zpci(struct pci_dev *pdev)
 {
-	return pdev->sysdata;
+	struct zpci_bus *zbus = pdev->sysdata;
+
+	return zbus->function[pdev->devfn];
+}
+
+static inline struct zpci_dev *to_zpci_dev(struct device *dev)
+{
+	return to_zpci(to_pci_dev(dev));
 }
 
 struct zpci_dev *get_zdev_by_fid(u32);
@@ -245,7 +284,6 @@
 void zpci_debug_exit(void);
 void zpci_debug_init_device(struct zpci_dev *, const char *);
 void zpci_debug_exit_device(struct zpci_dev *);
-void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
 /* Error reporting */
 int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index 5035917..1f4b666 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -7,6 +7,7 @@
 /*
  * Call Logical Processor - Command Codes
  */
+#define CLP_SLPC		0x0001
 #define CLP_LIST_PCI		0x0002
 #define CLP_QUERY_PCI_FN	0x0003
 #define CLP_QUERY_PCI_FNGRP	0x0004
@@ -51,6 +52,19 @@
 
 extern bool zpci_unique_uid;
 
+struct clp_rsp_slpc_pci {
+	struct clp_rsp_hdr hdr;
+	u32 reserved2[4];
+	u32 lpif[8];
+	u32 reserved3[4];
+	u32 vwb		:  1;
+	u32		:  1;
+	u32 mio_wb	:  6;
+	u32		: 24;
+	u32 reserved5[3];
+	u32 lpic[8];
+} __packed;
+
 /* List PCI functions request */
 struct clp_req_list_pci {
 	struct clp_req_hdr hdr;
@@ -77,7 +91,7 @@
 	struct {
 		u64 wb;
 		u64 wt;
-	} addr[PCI_BAR_COUNT];
+	} addr[PCI_STD_NUM_BARS];
 	u32 reserved[6];
 } __packed;
 
@@ -93,21 +107,28 @@
 struct clp_rsp_query_pci {
 	struct clp_rsp_hdr hdr;
 	u16 vfn;			/* virtual fn number */
-	u16			:  6;
+	u16			:  3;
+	u16 rid_avail		:  1;
+	u16 is_physfn		:  1;
+	u16 reserved1		:  1;
 	u16 mio_addr_avail	:  1;
 	u16 util_str_avail	:  1;	/* utility string available? */
 	u16 pfgid		:  8;	/* pci function group id */
 	u32 fid;			/* pci function id */
-	u8 bar_size[PCI_BAR_COUNT];
+	u8 bar_size[PCI_STD_NUM_BARS];
 	u16 pchid;
-	__le32 bar[PCI_BAR_COUNT];
+	__le32 bar[PCI_STD_NUM_BARS];
 	u8 pfip[CLP_PFIP_NR_SEGMENTS];	/* pci function internal path */
-	u32			: 16;
+	u16			: 12;
+	u16 port		:  4;
 	u8 fmb_len;
 	u8 pft;				/* pci function type */
 	u64 sdma;			/* start dma as */
 	u64 edma;			/* end dma as */
-	u32 reserved[11];
+#define ZPCI_RID_MASK_DEVFN 0x00ff
+	u16 rid;			/* BUS/DEVFN PCI address */
+	u16 reserved0;
+	u32 reserved[10];
 	u32 uid;			/* user defined id */
 	u8 util_str[CLP_UTIL_STR_LEN];	/* utility string */
 	u32 reserved2[16];
@@ -165,6 +186,11 @@
 } __packed;
 
 /* Combined request/response block structures used by clp insn */
+struct clp_req_rsp_slpc_pci {
+	struct clp_req_slpc request;
+	struct clp_rsp_slpc_pci response;
+} __packed;
+
 struct clp_req_rsp_list_pci {
 	struct clp_req_list_pci request;
 	struct clp_rsp_list_pci response;
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 419fac7..f62cd3e 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -131,12 +131,6 @@
 	*entry |= ZPCI_TABLE_VALID;
 }
 
-static inline void invalidate_table_entry(unsigned long *entry)
-{
-	*entry &= ~ZPCI_TABLE_VALID_MASK;
-	*entry |= ZPCI_TABLE_INVALID;
-}
-
 static inline void invalidate_pt_entry(unsigned long *entry)
 {
 	WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
@@ -173,11 +167,6 @@
 	return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
 }
 
-static inline int entry_isprotected(unsigned long entry)
-{
-	return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
-}
-
 static inline unsigned long *get_rt_sto(unsigned long entry)
 {
 	return ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index e4dc64c..287bb88 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -14,12 +14,13 @@
 
 /* I/O Map */
 #define ZPCI_IOMAP_SHIFT		48
-#define ZPCI_IOMAP_ADDR_BASE		0x8000000000000000UL
+#define ZPCI_IOMAP_ADDR_SHIFT		62
+#define ZPCI_IOMAP_ADDR_BASE		(1UL << ZPCI_IOMAP_ADDR_SHIFT)
 #define ZPCI_IOMAP_ADDR_OFF_MASK	((1UL << ZPCI_IOMAP_SHIFT) - 1)
 #define ZPCI_IOMAP_MAX_ENTRIES							\
-	((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT))
+	(1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT))
 #define ZPCI_IOMAP_ADDR_IDX_MASK						\
-	(~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE)
+	((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK)
 
 struct zpci_iomap_entry {
 	u32 fh;
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index 4652fff..b9da716 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -12,6 +12,7 @@
 
 #include <linux/perf_event.h>
 #include <linux/device.h>
+#include <asm/stacktrace.h>
 
 /* Per-CPU flags for PMU states */
 #define PMU_F_RESERVED			0x1000
@@ -73,4 +74,10 @@
 #define SDB_FULL_BLOCKS(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS)
 #define SAMPLE_FREQ_MODE(hwc)	(SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FREQ_MODE)
 
+#define perf_arch_fetch_caller_regs(regs, __ip) do {			\
+	(regs)->psw.addr = (__ip);					\
+	(regs)->gprs[15] = (unsigned long)__builtin_frame_address(0) -	\
+		offsetof(struct stack_frame, back_chain);		\
+} while (0)
+
 #endif /* _ASM_S390_PERF_EVENT_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 77606c4..d1297d6 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -34,19 +34,21 @@
 	memset64((u64 *)crst, entry, _CRST_ENTRIES);
 }
 
-static inline unsigned long pgd_entry_type(struct mm_struct *mm)
-{
-	if (mm_pmd_folded(mm))
-		return _SEGMENT_ENTRY_EMPTY;
-	if (mm_pud_folded(mm))
-		return _REGION3_ENTRY_EMPTY;
-	if (mm_p4d_folded(mm))
-		return _REGION2_ENTRY_EMPTY;
-	return _REGION1_ENTRY_EMPTY;
-}
-
 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
-void crst_table_downgrade(struct mm_struct *);
+
+static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr,
+					     unsigned long len)
+{
+	int rc;
+
+	if (addr + len > mm->context.asce_limit &&
+	    addr + len <= TASK_SIZE) {
+		rc = crst_table_upgrade(mm, addr + len);
+		if (rc)
+			return (unsigned long) rc;
+	}
+	return addr;
+}
 
 static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
 {
@@ -116,24 +118,11 @@
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	unsigned long *table = crst_table_alloc(mm);
-
-	if (!table)
-		return NULL;
-	if (mm->context.asce_limit == _REGION3_SIZE) {
-		/* Forking a compat process with 2 page table levels */
-		if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
-			crst_table_free(mm, table);
-			return NULL;
-		}
-	}
-	return (pgd_t *) table;
+	return (pgd_t *) crst_table_alloc(mm);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	if (mm->context.asce_limit == _REGION3_SIZE)
-		pgtable_pmd_page_dtor(virt_to_page(pgd));
 	crst_table_free(mm, (unsigned long *) pgd);
 }
 
@@ -157,8 +146,6 @@
 #define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
 #define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
 
-extern void rcu_table_freelist_finish(void);
-
 void vmem_map_init(void);
 void *vmem_crst_alloc(unsigned long val);
 pte_t *vmem_pte_alloc(void);
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5ce5869..2338345 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <asm/bug.h>
 #include <asm/page.h>
+#include <asm/uv.h>
 
 extern pgd_t swapper_pg_dir[];
 extern void paging_init(void);
@@ -88,6 +89,7 @@
 extern unsigned long VMALLOC_END;
 #define VMALLOC_DEFAULT_SIZE	((128UL << 30) - MODULES_LEN)
 extern struct page *vmemmap;
+extern unsigned long vmemmap_size;
 
 #define VMEM_MAX_PHYS ((unsigned long) vmemmap)
 
@@ -266,11 +268,9 @@
 #endif
 
 #define _REGION_ENTRY_BITS	 0xfffffffffffff22fUL
-#define _REGION_ENTRY_BITS_LARGE 0xffffffff8000fe2fUL
 
 /* Bits in the segment table entry */
 #define _SEGMENT_ENTRY_BITS			0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE		0xfffffffffff0ff33UL
 #define _SEGMENT_ENTRY_HARDWARE_BITS		0xfffffffffffffe30UL
 #define _SEGMENT_ENTRY_HARDWARE_BITS_LARGE	0xfffffffffff00730UL
 #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address	    */
@@ -522,6 +522,15 @@
 	return 0;
 }
 
+static inline int mm_is_protected(struct mm_struct *mm)
+{
+#ifdef CONFIG_PGSTE
+	if (unlikely(atomic_read(&mm->context.is_protected)))
+		return 1;
+#endif
+	return 0;
+}
+
 static inline int mm_alloc_pgste(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
@@ -675,6 +684,7 @@
 	return pud_val(pud) == _REGION3_ENTRY_EMPTY;
 }
 
+#define pud_leaf	pud_large
 static inline int pud_large(pud_t pud)
 {
 	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) != _REGION_ENTRY_TYPE_R3)
@@ -682,16 +692,7 @@
 	return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
 }
 
-static inline unsigned long pud_pfn(pud_t pud)
-{
-	unsigned long origin_mask;
-
-	origin_mask = _REGION_ENTRY_ORIGIN;
-	if (pud_large(pud))
-		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
-	return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
-}
-
+#define pmd_leaf	pmd_large
 static inline int pmd_large(pmd_t pmd)
 {
 	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
@@ -699,10 +700,8 @@
 
 static inline int pmd_bad(pmd_t pmd)
 {
-	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+	if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0 || pmd_large(pmd))
 		return 1;
-	if (pmd_large(pmd))
-		return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
 	return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
 }
 
@@ -710,12 +709,10 @@
 {
 	unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
 
-	if (type > _REGION_ENTRY_TYPE_R3)
+	if (type > _REGION_ENTRY_TYPE_R3 || pud_large(pud))
 		return 1;
 	if (type < _REGION_ENTRY_TYPE_R3)
 		return 0;
-	if (pud_large(pud))
-		return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
 	return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
 }
 
@@ -740,16 +737,6 @@
 	return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
 }
 
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
-	unsigned long origin_mask;
-
-	origin_mask = _SEGMENT_ENTRY_ORIGIN;
-	if (pmd_large(pmd))
-		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
-	return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
-}
-
 #define pmd_write pmd_write
 static inline int pmd_write(pmd_t pmd)
 {
@@ -764,18 +751,12 @@
 
 static inline int pmd_dirty(pmd_t pmd)
 {
-	int dirty = 1;
-	if (pmd_large(pmd))
-		dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
-	return dirty;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
 }
 
 static inline int pmd_young(pmd_t pmd)
 {
-	int young = 1;
-	if (pmd_large(pmd))
-		young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
-	return young;
+	return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
 }
 
 static inline int pte_present(pte_t pte)
@@ -884,6 +865,25 @@
 }
 
 /*
+ * Extract the pgprot value from the given pte while at the same time making it
+ * usable for kernel address space mappings where fault driven dirty and
+ * young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
+ * must not be set.
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+	unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
+
+	if (pte_write(pte))
+		pte_flags |= pgprot_val(PAGE_KERNEL);
+	else
+		pte_flags |= pgprot_val(PAGE_KERNEL_RO);
+	pte_flags |= pte_val(pte) & mio_wb_bit_mask;
+
+	return __pgprot(pte_flags);
+}
+
+/*
  * pgd/pmd/pte modification functions
  */
 
@@ -1077,7 +1077,12 @@
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 				       unsigned long addr, pte_t *ptep)
 {
-	return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+	pte_t res;
+
+	res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+	if (mm_is_protected(mm) && pte_present(res))
+		uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+	return res;
 }
 
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -1089,7 +1094,12 @@
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
 				     unsigned long addr, pte_t *ptep)
 {
-	return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+	pte_t res;
+
+	res = ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID));
+	if (mm_is_protected(vma->vm_mm) && pte_present(res))
+		uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+	return res;
 }
 
 /*
@@ -1104,12 +1114,17 @@
 					    unsigned long addr,
 					    pte_t *ptep, int full)
 {
+	pte_t res;
+
 	if (full) {
-		pte_t pte = *ptep;
+		res = *ptep;
 		*ptep = __pte(_PAGE_INVALID);
-		return pte;
+	} else {
+		res = ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
 	}
-	return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID));
+	if (mm_is_protected(mm) && pte_present(res))
+		uv_convert_from_secure(pte_val(res) & PAGE_MASK);
+	return res;
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -1171,6 +1186,12 @@
 void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr);
 void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
 
+#define pgprot_writecombine	pgprot_writecombine
+pgprot_t pgprot_writecombine(pgprot_t prot);
+
+#define pgprot_writethrough	pgprot_writethrough
+pgprot_t pgprot_writethrough(pgprot_t prot);
+
 /*
  * Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
@@ -1194,7 +1215,8 @@
 static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 {
 	pte_t __pte;
-	pte_val(__pte) = physpage + pgprot_val(pgprot);
+
+	pte_val(__pte) = physpage | pgprot_val(pgprot);
 	if (!MACHINE_HAS_NX)
 		pte_val(__pte) &= ~_PAGE_NOEXEC;
 	return pte_mkyoung(__pte);
@@ -1214,13 +1236,40 @@
 #define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
 
-#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
-#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
 #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
 #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
 
+static inline unsigned long pmd_deref(pmd_t pmd)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _SEGMENT_ENTRY_ORIGIN;
+	if (pmd_large(pmd))
+		origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+	return pmd_val(pmd) & origin_mask;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	return pmd_deref(pmd) >> PAGE_SHIFT;
+}
+
+static inline unsigned long pud_deref(pud_t pud)
+{
+	unsigned long origin_mask;
+
+	origin_mask = _REGION_ENTRY_ORIGIN;
+	if (pud_large(pud))
+		origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+	return pud_val(pud) & origin_mask;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+	return pud_deref(pud) >> PAGE_SHIFT;
+}
+
 /*
  * The pgd_offset function *always* adds the index for the top-level
  * region/segment table. This is done to get a sequence like the
@@ -1245,7 +1294,6 @@
 }
 
 #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
 static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address)
 {
@@ -1288,16 +1336,11 @@
 }
 #define pmd_offset pmd_offset
 
-static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 {
-	return (pte_t *) pmd_deref(*pmd) + pte_index(address);
+	return (unsigned long) pmd_deref(pmd);
 }
 
-#define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
-
-static inline void pte_unmap(pte_t *pte) { }
-
 static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
 {
 	return end <= current->mm->context.asce_limit;
@@ -1323,29 +1366,23 @@
 static inline pmd_t pmd_mkwrite(pmd_t pmd)
 {
 	pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
-	if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
-		return pmd;
-	pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkclean(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
-		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
-	}
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkdirty(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY |
-				_SEGMENT_ENTRY_SOFT_DIRTY;
-		if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
-			pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
-	}
+	pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_SOFT_DIRTY;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
 	return pmd;
 }
 
@@ -1359,29 +1396,23 @@
 static inline pud_t pud_mkwrite(pud_t pud)
 {
 	pud_val(pud) |= _REGION3_ENTRY_WRITE;
-	if (pud_large(pud) && !(pud_val(pud) & _REGION3_ENTRY_DIRTY))
-		return pud;
-	pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
+	if (pud_val(pud) & _REGION3_ENTRY_DIRTY)
+		pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
 	return pud;
 }
 
 static inline pud_t pud_mkclean(pud_t pud)
 {
-	if (pud_large(pud)) {
-		pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
-		pud_val(pud) |= _REGION_ENTRY_PROTECT;
-	}
+	pud_val(pud) &= ~_REGION3_ENTRY_DIRTY;
+	pud_val(pud) |= _REGION_ENTRY_PROTECT;
 	return pud;
 }
 
 static inline pud_t pud_mkdirty(pud_t pud)
 {
-	if (pud_large(pud)) {
-		pud_val(pud) |= _REGION3_ENTRY_DIRTY |
-				_REGION3_ENTRY_SOFT_DIRTY;
-		if (pud_val(pud) & _REGION3_ENTRY_WRITE)
-			pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
-	}
+	pud_val(pud) |= _REGION3_ENTRY_DIRTY | _REGION3_ENTRY_SOFT_DIRTY;
+	if (pud_val(pud) & _REGION3_ENTRY_WRITE)
+		pud_val(pud) &= ~_REGION_ENTRY_PROTECT;
 	return pud;
 }
 
@@ -1405,38 +1436,29 @@
 
 static inline pmd_t pmd_mkyoung(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
-		if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
-			pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
-	}
+	pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+	if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+		pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
 static inline pmd_t pmd_mkold(pmd_t pmd)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
-		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
-	}
+	pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
+	pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
-	if (pmd_large(pmd)) {
-		pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
-			_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
-			_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
-		pmd_val(pmd) |= massage_pgprot_pmd(newprot);
-		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
-			pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
-		if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
-			pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
-		return pmd;
-	}
-	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
+	pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+		_SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+		_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SOFT_DIRTY;
 	pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+	if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+		pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
 	return pmd;
 }
 
@@ -1586,7 +1608,7 @@
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
-static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm,
+static inline pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
 						 unsigned long addr,
 						 pmd_t *pmdp, int full)
 {
@@ -1595,7 +1617,7 @@
 		*pmdp = __pmd(_SEGMENT_ENTRY_EMPTY);
 		return pmd;
 	}
-	return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+	return pmdp_xchg_lazy(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
 }
 
 #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH
@@ -1700,7 +1722,7 @@
 #define kern_addr_valid(addr)   (1)
 
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
-extern int vmem_remove_mapping(unsigned long start, unsigned long size);
+extern void vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int s390_enable_sie(void);
 extern int s390_enable_skey(void);
 extern void s390_reset_cmma(struct mm_struct *mm);
@@ -1709,6 +1731,4 @@
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#include <asm-generic/pgtable.h>
-
 #endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index b5ea9e1..b5f545d 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -29,12 +29,6 @@
 				  old, new) != old);
 }
 
-#define init_task_preempt_count(p)	do { } while (0)
-
-#define init_idle_preempt_count(p, cpu)	do { \
-	S390_lowcore.preempt_count = PREEMPT_ENABLED; \
-} while (0)
-
 static inline void set_preempt_need_resched(void)
 {
 	__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
@@ -88,12 +82,6 @@
 	S390_lowcore.preempt_count = pc;
 }
 
-#define init_task_preempt_count(p)	do { } while (0)
-
-#define init_idle_preempt_count(p, cpu)	do { \
-	S390_lowcore.preempt_count = PREEMPT_ENABLED; \
-} while (0)
-
 static inline void set_preempt_need_resched(void)
 {
 }
@@ -130,11 +118,15 @@
 
 #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
 
-#ifdef CONFIG_PREEMPT
+#define init_task_preempt_count(p)	do { } while (0)
+/* Deferred to CPU bringup time */
+#define init_idle_preempt_count(p, cpu)	do { } while (0)
+
+#ifdef CONFIG_PREEMPTION
 extern asmlinkage void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
 extern asmlinkage void preempt_schedule_notrace(void);
 #define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 48d6ccd..0987c3f 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,17 +14,15 @@
 
 #include <linux/bits.h>
 
-#define CIF_MCCK_PENDING	0	/* machine check handling is pending */
-#define CIF_ASCE_PRIMARY	1	/* primary asce needs fixup / uaccess */
-#define CIF_ASCE_SECONDARY	2	/* secondary asce needs fixup / uaccess */
-#define CIF_NOHZ_DELAY		3	/* delay HZ disable for a tick */
-#define CIF_FPU			4	/* restore FPU registers */
-#define CIF_IGNORE_IRQ		5	/* ignore interrupt (for udelay) */
-#define CIF_ENABLED_WAIT	6	/* in enabled wait state */
-#define CIF_MCCK_GUEST		7	/* machine check happening in guest */
-#define CIF_DEDICATED_CPU	8	/* this CPU is dedicated */
+#define CIF_ASCE_PRIMARY	0	/* primary asce needs fixup / uaccess */
+#define CIF_ASCE_SECONDARY	1	/* secondary asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
+#define CIF_FPU			3	/* restore FPU registers */
+#define CIF_IGNORE_IRQ		4	/* ignore interrupt (for udelay) */
+#define CIF_ENABLED_WAIT	5	/* in enabled wait state */
+#define CIF_MCCK_GUEST		6	/* machine check happening in guest */
+#define CIF_DEDICATED_CPU	7	/* this CPU is dedicated */
 
-#define _CIF_MCCK_PENDING	BIT(CIF_MCCK_PENDING)
 #define _CIF_ASCE_PRIMARY	BIT(CIF_ASCE_PRIMARY)
 #define _CIF_ASCE_SECONDARY	BIT(CIF_ASCE_SECONDARY)
 #define _CIF_NOHZ_DELAY		BIT(CIF_NOHZ_DELAY)
@@ -84,7 +82,6 @@
 void cpu_detect_mhz_feature(void);
 
 extern const struct seq_operations cpuinfo_op;
-extern int sysctl_ieee_emulation_warnings;
 extern void execve_tail(void);
 extern void __bpon(void);
 
@@ -93,15 +90,15 @@
  */
 
 #define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_31BIT) ? \
-					(1UL << 31) : -PAGE_SIZE)
+					_REGION3_SIZE : TASK_SIZE_MAX)
 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_31BIT) ? \
-					(1UL << 30) : (1UL << 41))
+					(_REGION3_SIZE >> 1) : (_REGION2_SIZE >> 1))
 #define TASK_SIZE		TASK_SIZE_OF(current)
 #define TASK_SIZE_MAX		(-PAGE_SIZE)
 
 #define STACK_TOP		(test_thread_flag(TIF_31BIT) ? \
-					(1UL << 31) : (1UL << 42))
-#define STACK_TOP_MAX		(1UL << 42)
+					_REGION3_SIZE : _REGION2_SIZE)
+#define STACK_TOP_MAX		_REGION2_SIZE
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
@@ -179,7 +176,6 @@
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
 	regs->psw.addr	= new_psw;					\
 	regs->gprs[15]	= new_stackp;					\
-	crst_table_downgrade(current->mm);				\
 	execve_tail();							\
 } while (0)
 
@@ -207,7 +203,7 @@
 /* Has task runtime instrumentation enabled ? */
 #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
 
-static inline unsigned long current_stack_pointer(void)
+static __always_inline unsigned long current_stack_pointer(void)
 {
 	unsigned long sp;
 
@@ -311,7 +307,7 @@
 /*
  * Function to drop a processor into disabled wait state
  */
-static inline void __noreturn disabled_wait(void)
+static __always_inline void __noreturn disabled_wait(void)
 {
 	psw_t psw;
 
diff --git a/arch/s390/include/asm/ptdump.h b/arch/s390/include/asm/ptdump.h
new file mode 100644
index 0000000..f960b28
--- /dev/null
+++ b/arch/s390/include/asm/ptdump.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_S390_PTDUMP_H
+#define _ASM_S390_PTDUMP_H
+
+void ptdump_check_wx(void);
+
+static inline void debug_checkwx(void)
+{
+	if (IS_ENABLED(CONFIG_DEBUG_WX))
+		ptdump_check_wx();
+}
+
+#endif /* _ASM_S390_PTDUMP_H */
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f009a13..16b3e43 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -184,5 +184,10 @@
 	return regs->gprs[15];
 }
 
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+	regs->gprs[2] = rc;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* _S390_PTRACE_H */
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index 4f35b10..19e84c9 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -26,9 +26,9 @@
 
 /**
  * struct qdesfmt0 - queue descriptor, format 0
- * @sliba: storage list information block address
- * @sla: storage list address
- * @slsba: storage list state block address
+ * @sliba: absolute address of storage list information block
+ * @sla: absolute address of storage list
+ * @slsba: absolute address of storage list state block
  * @akey: access key for SLIB
  * @bkey: access key for SL
  * @ckey: access key for SBALs
@@ -56,7 +56,7 @@
  * @oqdcnt: output queue descriptor count
  * @iqdsz: input queue descriptor size
  * @oqdsz: output queue descriptor size
- * @qiba: queue information block address
+ * @qiba: absolute address of queue information block
  * @qkey: queue information block key
  * @qdf0: queue descriptions
  */
@@ -201,7 +201,7 @@
  * @scount: SBAL count
  * @sflags: whole SBAL flags
  * @length: length
- * @addr: address
+ * @addr: absolute data address
 */
 struct qdio_buffer_element {
 	u8 eflags;
@@ -211,7 +211,7 @@
 	u8 scount;
 	u8 sflags;
 	u32 length;
-	void *addr;
+	u64 addr;
 } __attribute__ ((packed, aligned(16)));
 
 /**
@@ -276,6 +276,7 @@
 #define CHSC_AC2_MULTI_BUFFER_AVAILABLE	0x0080
 #define CHSC_AC2_MULTI_BUFFER_ENABLED	0x0040
 #define CHSC_AC2_DATA_DIV_AVAILABLE	0x0010
+#define CHSC_AC2_SNIFFER_AVAILABLE	0x0008
 #define CHSC_AC2_DATA_DIV_ENABLED	0x0002
 
 #define CHSC_AC3_FORMAT2_CQ_AVAILABLE	0x8000
@@ -324,10 +325,8 @@
 
 /**
  * struct qdio_initialize - qdio initialization data
- * @cdev: associated ccw device
  * @q_format: queue format
  * @qdr_ac: feature flags to set
- * @adapter_name: name for the adapter
  * @qib_param_field_format: format for qib_parm_field
  * @qib_param_field: pointer to 128 bytes or NULL, if no param field
  * @qib_rflags: rflags to set
@@ -337,18 +336,16 @@
  * @no_output_qs: number of output queues
  * @input_handler: handler to be called for input queues
  * @output_handler: handler to be called for output queues
- * @queue_start_poll_array: polling handlers (one per input queue or NULL)
+ * @irq_poll: Data IRQ polling handler (NULL when not supported)
  * @scan_threshold: # of in-use buffers that triggers scan on output queue
  * @int_parm: interruption parameter
- * @input_sbal_addr_array:  address of no_input_qs * 128 pointers
- * @output_sbal_addr_array: address of no_output_qs * 128 pointers
+ * @input_sbal_addr_array:  per-queue array, each element points to 128 SBALs
+ * @output_sbal_addr_array: per-queue array, each element points to 128 SBALs
  * @output_sbal_state_array: no_output_qs * 128 state info (for CQ or NULL)
  */
 struct qdio_initialize {
-	struct ccw_device *cdev;
 	unsigned char q_format;
 	unsigned char qdr_ac;
-	unsigned char adapter_name[8];
 	unsigned int qib_param_field_format;
 	unsigned char *qib_param_field;
 	unsigned char qib_rflags;
@@ -358,43 +355,14 @@
 	unsigned int no_output_qs;
 	qdio_handler_t *input_handler;
 	qdio_handler_t *output_handler;
-	void (**queue_start_poll_array) (struct ccw_device *, int,
-					  unsigned long);
+	void (*irq_poll)(struct ccw_device *cdev, unsigned long data);
 	unsigned int scan_threshold;
 	unsigned long int_parm;
-	struct qdio_buffer **input_sbal_addr_array;
-	struct qdio_buffer **output_sbal_addr_array;
+	struct qdio_buffer ***input_sbal_addr_array;
+	struct qdio_buffer ***output_sbal_addr_array;
 	struct qdio_outbuf_state *output_sbal_state_array;
 };
 
-/**
- * enum qdio_brinfo_entry_type - type of address entry for qdio_brinfo_desc()
- * @l3_ipv6_addr: entry contains IPv6 address
- * @l3_ipv4_addr: entry contains IPv4 address
- * @l2_addr_lnid: entry contains MAC address and VLAN ID
- */
-enum qdio_brinfo_entry_type {l3_ipv6_addr, l3_ipv4_addr, l2_addr_lnid};
-
-/**
- * struct qdio_brinfo_entry_XXX - Address entry for qdio_brinfo_desc()
- * @nit:  Network interface token
- * @addr: Address of one of the three types
- *
- * The struct is passed to the callback function by qdio_brinfo_desc()
- */
-struct qdio_brinfo_entry_l3_ipv6 {
-	u64 nit;
-	struct { unsigned char _s6_addr[16]; } addr;
-} __packed;
-struct qdio_brinfo_entry_l3_ipv4 {
-	u64 nit;
-	struct { uint32_t _s_addr; } addr;
-} __packed;
-struct qdio_brinfo_entry_l2 {
-	u64 nit;
-	struct { u8 mac[6]; u16 lnid; } addr_lnid;
-} __packed;
-
 #define QDIO_STATE_INACTIVE		0x00000002 /* after qdio_cleanup */
 #define QDIO_STATE_ESTABLISHED		0x00000004 /* after qdio_establish */
 #define QDIO_STATE_ACTIVE		0x00000008 /* after qdio_activate */
@@ -408,14 +376,16 @@
 void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
 void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
 
-extern int qdio_allocate(struct qdio_initialize *);
-extern int qdio_establish(struct qdio_initialize *);
+extern int qdio_allocate(struct ccw_device *cdev, unsigned int no_input_qs,
+			 unsigned int no_output_qs);
+extern int qdio_establish(struct ccw_device *cdev,
+			  struct qdio_initialize *init_data);
 extern int qdio_activate(struct ccw_device *);
 extern void qdio_release_aob(struct qaob *);
 extern int do_QDIO(struct ccw_device *, unsigned int, int, unsigned int,
 		   unsigned int);
-extern int qdio_start_irq(struct ccw_device *, int);
-extern int qdio_stop_irq(struct ccw_device *, int);
+extern int qdio_start_irq(struct ccw_device *cdev);
+extern int qdio_stop_irq(struct ccw_device *cdev);
 extern int qdio_get_next_buffers(struct ccw_device *, int, int *, int *);
 extern int qdio_inspect_queue(struct ccw_device *cdev, unsigned int nr,
 			      bool is_input, unsigned int *bufnr,
@@ -423,10 +393,5 @@
 extern int qdio_shutdown(struct ccw_device *, int);
 extern int qdio_free(struct ccw_device *);
 extern int qdio_get_ssqd_desc(struct ccw_device *, struct qdio_ssqd_desc *);
-extern int qdio_pnso_brinfo(struct subchannel_id schid,
-		int cnc, u16 *response,
-		void (*cb)(void *priv, enum qdio_brinfo_entry_type type,
-				void *entry),
-		void *priv);
 
 #endif /* __QDIO_H__ */
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index c563f83..a7bdd12 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -114,8 +114,7 @@
 void sclp_early_get_ipl_info(struct sclp_ipl_info *info);
 void sclp_early_detect(void);
 void sclp_early_printk(const char *s);
-void sclp_early_printk_force(const char *s);
-void __sclp_early_printk(const char *s, unsigned int len, unsigned int force);
+void __sclp_early_printk(const char *s, unsigned int len);
 
 int sclp_early_get_memsize(unsigned long *mem);
 int sclp_early_get_hsa_size(unsigned long *hsa_size);
@@ -129,6 +128,8 @@
 int sclp_chp_read_info(struct sclp_chp_info *info);
 int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
+int sclp_ap_configure(u32 apid);
+int sclp_ap_deconfigure(u32 apid);
 int sclp_pci_report(struct zpci_report_error_header *report, u32 fh, u32 fid);
 int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
 int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 42de04a..0c21514 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -26,14 +26,14 @@
  * final .boot.data section, which should be identical in the decompressor and
  * the decompressed kernel (that is checked during the build).
  */
-#define __bootdata(var) __section(.boot.data.var) var
+#define __bootdata(var) __section(".boot.data." #var) var
 
 /*
  * .boot.preserved.data is similar to .boot.data, but it is not part of the
  * .init section and thus will be preserved for later use in the decompressed
  * kernel.
  */
-#define __bootdata_preserved(var) __section(.boot.preserved.data.var) var
+#define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
 
 extern unsigned long __sdma, __edma;
 extern unsigned long __stext_dma, __etext_dma;
diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index c59a835..a22a5a8 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -2,6 +2,10 @@
 #ifndef _ASMS390_SET_MEMORY_H
 #define _ASMS390_SET_MEMORY_H
 
+#include <linux/mutex.h>
+
+extern struct mutex cpa_mutex;
+
 #define SET_MEMORY_RO	1UL
 #define SET_MEMORY_RW	2UL
 #define SET_MEMORY_NX	4UL
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index e6a5007..75a2ece 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -28,7 +28,6 @@
 #define MACHINE_FLAG_DIAG9C	BIT(3)
 #define MACHINE_FLAG_ESOP	BIT(4)
 #define MACHINE_FLAG_IDTE	BIT(5)
-#define MACHINE_FLAG_DIAG44	BIT(6)
 #define MACHINE_FLAG_EDAT1	BIT(7)
 #define MACHINE_FLAG_EDAT2	BIT(8)
 #define MACHINE_FLAG_TOPOLOGY	BIT(10)
@@ -82,12 +81,21 @@
 	char command_line[ARCH_COMMAND_LINE_SIZE];	/* 0x10480 */
 };
 
+extern unsigned int zlib_dfltcc_support;
+#define ZLIB_DFLTCC_DISABLED		0
+#define ZLIB_DFLTCC_FULL		1
+#define ZLIB_DFLTCC_DEFLATE_ONLY	2
+#define ZLIB_DFLTCC_INFLATE_ONLY	3
+#define ZLIB_DFLTCC_FULL_DEBUG		4
+
 extern int noexec_disabled;
 extern int memory_end_set;
 extern unsigned long memory_end;
 extern unsigned long vmalloc_size;
 extern unsigned long max_physmem_end;
-extern unsigned long __swsusp_reset_dma;
+
+/* The Write Back bit position in the physaddr is given by the SLPC PCI */
+extern unsigned long mio_wb_bit_mask;
 
 #define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
 #define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -96,7 +104,6 @@
 #define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
 #define MACHINE_HAS_ESOP	(S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
 #define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_DIAG44	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
 #define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
 #define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
 #define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
@@ -116,9 +123,6 @@
 extern unsigned int console_devno;
 extern unsigned int console_irq;
 
-extern char vmhalt_cmd[];
-extern char vmpoff_cmd[];
-
 #define CONSOLE_IS_UNDEFINED	(console_mode == 0)
 #define CONSOLE_IS_SCLP		(console_mode == 1)
 #define CONSOLE_IS_3215		(console_mode == 2)
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index b157a81..01e3600 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -10,6 +10,7 @@
 
 #include <asm/sigp.h>
 #include <asm/lowcore.h>
+#include <asm/processor.h>
 
 #define raw_smp_processor_id()	(S390_lowcore.cpu_nr)
 
@@ -30,10 +31,10 @@
 extern int smp_find_processor_id(u16 address);
 extern int smp_store_status(int cpu);
 extern void smp_save_dump_cpus(void);
-extern int smp_vcpu_scheduled(int cpu);
 extern void smp_yield_cpu(int cpu);
 extern void smp_cpu_set_polarization(int cpu, int val);
 extern int smp_cpu_get_polarization(int cpu);
+extern int smp_cpu_get_cpu_address(int cpu);
 extern void smp_fill_possible_mask(void);
 extern void smp_detect_cpus(void);
 
@@ -53,9 +54,14 @@
 	return cpu - (cpu % (smp_cpu_mtid + 1));
 }
 
+static inline void smp_cpus_done(unsigned int max_cpus)
+{
+}
+
 extern int smp_rescan_cpus(void);
 extern void __noreturn cpu_die(void);
 extern void __cpu_die(unsigned int cpu);
 extern int __cpu_disable(void);
+extern void schedule_mcck_handler(void);
 
 #endif /* __ASM_SMP_H */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index c02bff3..3a37172 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -85,7 +85,7 @@
 static inline void arch_spin_unlock(arch_spinlock_t *lp)
 {
 	typecheck(int, lp->lock);
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0070", 49)	/* NIAI 7 */
 		"	sth	%1,%0\n"
 		: "=Q" (((unsigned short *) &lp->lock)[1])
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index e192681..90488b0 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -33,8 +33,8 @@
 	return addr >= info->begin && addr + len <= info->end;
 }
 
-static inline unsigned long get_stack_pointer(struct task_struct *task,
-					      struct pt_regs *regs)
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+						       struct pt_regs *regs)
 {
 	if (regs)
 		return (unsigned long) kernel_stack_pointer(regs);
@@ -62,6 +62,17 @@
 };
 #endif
 
+/*
+ * Unlike current_stack_pointer() which simply returns current value of %r15
+ * current_frame_address() returns function stack frame address, which matches
+ * %r15 upon function invocation. It may differ from %r15 later if function
+ * allocates stack for local variables or new stack frame to call other
+ * functions.
+ */
+#define current_frame_address()						\
+	((unsigned long)__builtin_frame_address(0) -			\
+	 offsetof(struct stack_frame, back_chain))
+
 #define CALL_ARGS_0()							\
 	register unsigned long r2 asm("2")
 #define CALL_ARGS_1(arg1)						\
@@ -99,18 +110,20 @@
 
 #define CALL_ON_STACK(fn, stack, nr, args...)				\
 ({									\
+	unsigned long frame = current_frame_address();			\
 	CALL_ARGS_##nr(args);						\
 	unsigned long prev;						\
 									\
 	asm volatile(							\
 		"	la	%[_prev],0(15)\n"			\
-		"	la	15,0(%[_stack])\n"			\
-		"	stg	%[_prev],%[_bc](15)\n"			\
+		"	lg	15,%[_stack]\n"				\
+		"	stg	%[_frame],%[_bc](15)\n"			\
 		"	brasl	14,%[_fn]\n"				\
 		"	la	15,0(%[_prev])\n"			\
 		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
-		: [_stack] "a" (stack),					\
+		: [_stack] "R" (stack),					\
 		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
+		  [_frame] "d" (frame),					\
 		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
 	r2;								\
 })
diff --git a/arch/s390/include/asm/stp.h b/arch/s390/include/asm/stp.h
index f0ddefb..ba07463 100644
--- a/arch/s390/include/asm/stp.h
+++ b/arch/s390/include/asm/stp.h
@@ -6,43 +6,89 @@
 #ifndef __S390_STP_H
 #define __S390_STP_H
 
+#include <linux/compiler.h>
+
 /* notifier for syncs */
 extern struct atomic_notifier_head s390_epoch_delta_notifier;
 
 /* STP interruption parameter */
 struct stp_irq_parm {
-	unsigned int _pad0	: 14;
-	unsigned int tsc	: 1;	/* Timing status change */
-	unsigned int lac	: 1;	/* Link availability change */
-	unsigned int tcpc	: 1;	/* Time control parameter change */
-	unsigned int _pad2	: 15;
-} __attribute__ ((packed));
+	u32		: 14;
+	u32 tsc		:  1;	/* Timing status change */
+	u32 lac		:  1;	/* Link availability change */
+	u32 tcpc	:  1;	/* Time control parameter change */
+	u32		: 15;
+} __packed;
 
 #define STP_OP_SYNC	1
 #define STP_OP_CTRL	3
 
 struct stp_sstpi {
-	unsigned int rsvd0;
-	unsigned int rsvd1 : 8;
-	unsigned int stratum : 8;
-	unsigned int vbits : 16;
-	unsigned int leaps : 16;
-	unsigned int tmd : 4;
-	unsigned int ctn : 4;
-	unsigned int rsvd2 : 3;
-	unsigned int c : 1;
-	unsigned int tst : 4;
-	unsigned int tzo : 16;
-	unsigned int dsto : 16;
-	unsigned int ctrl : 16;
-	unsigned int rsvd3 : 16;
-	unsigned int tto;
-	unsigned int rsvd4;
-	unsigned int ctnid[3];
-	unsigned int rsvd5;
-	unsigned int todoff[4];
-	unsigned int rsvd6[48];
-} __attribute__ ((packed));
+	u32		: 32;
+	u32 tu		:  1;
+	u32 lu		:  1;
+	u32		:  6;
+	u32 stratum	:  8;
+	u32 vbits	: 16;
+	u32 leaps	: 16;
+	u32 tmd		:  4;
+	u32 ctn		:  4;
+	u32		:  3;
+	u32 c		:  1;
+	u32 tst		:  4;
+	u32 tzo		: 16;
+	u32 dsto	: 16;
+	u32 ctrl	: 16;
+	u32		: 16;
+	u32 tto;
+	u32		: 32;
+	u32 ctnid[3];
+	u32		: 32;
+	u32 todoff[4];
+	u32 rsvd[48];
+} __packed;
+
+struct stp_tzib {
+	u32 tzan	: 16;
+	u32		: 16;
+	u32 tzo		: 16;
+	u32 dsto	: 16;
+	u32 stn;
+	u32 dstn;
+	u64 dst_on_alg;
+	u64 dst_off_alg;
+} __packed;
+
+struct stp_tcpib {
+	u32 atcode	: 4;
+	u32 ntcode	: 4;
+	u32 d		: 1;
+	u32		: 23;
+	s32 tto;
+	struct stp_tzib atzib;
+	struct stp_tzib ntzib;
+	s32 adst_offset : 16;
+	s32 ndst_offset : 16;
+	u32 rsvd1;
+	u64 ntzib_update;
+	u64 ndsto_update;
+} __packed;
+
+struct stp_lsoib {
+	u32 p		: 1;
+	u32		: 31;
+	s32 also	: 16;
+	s32 nlso	: 16;
+	u64 nlsout;
+} __packed;
+
+struct stp_stzi {
+	u32 rsvd0[3];
+	u64 data_ts;
+	u32 rsvd1[22];
+	struct stp_tcpib tcpib;
+	struct stp_lsoib lsoib;
+} __packed;
 
 /* Functions needed by the machine check handler */
 int stp_sync_check(void);
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 3c3d6fe..1320f42 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -30,7 +30,7 @@
 })
 
 #define __S390_SYS_STUBx(x, name, ...)					\
-	asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
+	asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
 	ALLOW_ERROR_INJECTION(__s390_sys##name, ERRNO);			\
 	asmlinkage long __s390_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
 	{								\
@@ -46,7 +46,7 @@
 #define COMPAT_SYSCALL_DEFINE0(sname)					\
 	SYSCALL_METADATA(_##sname, 0);					\
 	asmlinkage long __s390_compat_sys_##sname(void);		\
-	ALLOW_ERROR_INJECTION(__s390_compat__sys_##sname, ERRNO);	\
+	ALLOW_ERROR_INJECTION(__s390_compat_sys_##sname, ERRNO);	\
 	asmlinkage long __s390_compat_sys_##sname(void)
 
 #define SYSCALL_DEFINE0(sname)						\
@@ -72,7 +72,7 @@
 	asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));	\
 	asmlinkage long __s390_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))	\
 		__attribute__((alias(__stringify(__se_compat_sys##name))));	\
-	ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO);				\
+	ALLOW_ERROR_INJECTION(__s390_compat_sys##name, ERRNO);			\
 	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
 	asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));	\
 	asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__))	\
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index e582fbe..13a04fc 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -24,7 +24,6 @@
 #ifndef __ASSEMBLY__
 #include <asm/lowcore.h>
 #include <asm/page.h>
-#include <asm/processor.h>
 
 #define STACK_INIT_OFFSET \
 	(THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index b6a4ce9..289aaff 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -49,11 +49,6 @@
 	asm volatile("sckc %0" : : "Q" (time));
 }
 
-static inline void store_clock_comparator(__u64 *time)
-{
-	asm volatile("stckc %0" : "=Q" (*time));
-}
-
 void clock_comparator_work(void);
 
 void __init time_early_init(void);
@@ -180,7 +175,6 @@
 
 int get_phys_clock(unsigned long *clock);
 void init_cpu_timer(void);
-unsigned long long monotonic_clock(void);
 
 extern unsigned char tod_clock_base[16] __aligned(8);
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index aa406c0..954fa8c 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -36,7 +36,6 @@
 #define p4d_free_tlb p4d_free_tlb
 #define pud_free_tlb pud_free_tlb
 
-#include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm-generic/tlb.h>
 
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 82703e0..6448bb5 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -5,8 +5,6 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
 
 /*
  * Flush all TLB entries on the local CPU.
@@ -32,8 +30,6 @@
 		: : "a" (opt), "a" (asce) : "cc");
 }
 
-void smp_ptlb_all(void);
-
 /*
  * Flush all TLB entries on all CPUs.
  */
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index ef9dd25..3a0ac0c 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -16,8 +16,8 @@
 	unsigned short socket_id;
 	unsigned short book_id;
 	unsigned short drawer_id;
-	unsigned short node_id;
 	unsigned short dedicated : 1;
+	int booted_cores;
 	cpumask_t thread_mask;
 	cpumask_t core_mask;
 	cpumask_t book_mask;
@@ -25,7 +25,6 @@
 };
 
 extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
-extern cpumask_t cpus_with_topology;
 
 #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
 #define topology_thread_id(cpu)		  (cpu_topology[cpu].thread_id)
@@ -37,6 +36,7 @@
 #define topology_drawer_id(cpu)		  (cpu_topology[cpu].drawer_id)
 #define topology_drawer_cpumask(cpu)	  (&cpu_topology[cpu].drawer_mask)
 #define topology_cpu_dedicated(cpu)	  (cpu_topology[cpu].dedicated)
+#define topology_booted_cores(cpu)	  (cpu_topology[cpu].booted_cores)
 
 #define mc_capable() 1
 
@@ -45,6 +45,7 @@
 int topology_set_cpu_management(int fc);
 void topology_schedule_update(void);
 void store_topology(struct sysinfo_15_1_x *info);
+void update_cpu_masks(void);
 void topology_expect_change(void);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 
@@ -54,6 +55,8 @@
 static inline void topology_schedule_update(void) { }
 static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
 static inline int topology_cpu_dedicated(int cpu_nr) { return 0; }
+static inline int topology_booted_cores(int cpu_nr) { return 1; }
+static inline void update_cpu_masks(void) { }
 static inline void topology_expect_change(void) { }
 
 #endif /* CONFIG_SCHED_TOPOLOGY */
@@ -71,14 +74,14 @@
 #define cpu_to_node cpu_to_node
 static inline int cpu_to_node(int cpu)
 {
-	return cpu_topology[cpu].node_id;
+	return 0;
 }
 
 /* Returns a pointer to the cpumask of CPUs on node 'node'. */
 #define cpumask_of_node cpumask_of_node
 static inline const struct cpumask *cpumask_of_node(int node)
 {
-	return &node_to_cpumask_map[node];
+	return cpu_possible_mask;
 }
 
 #define pcibus_to_node(bus) __pcibus_to_node(bus)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 3244388..c868e7e 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -32,7 +32,7 @@
 #define USER_DS_SACF	(3)
 
 #define get_fs()        (current->thread.mm_segment)
-#define segment_eq(a,b) (((a) & 2) == ((b) & 2))
+#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS)
 
 void set_fs(mm_segment_t fs);
 
@@ -60,6 +60,9 @@
 #define INLINE_COPY_TO_USER
 #endif
 
+int __put_user_bad(void) __attribute__((noreturn));
+int __get_user_bad(void) __attribute__((noreturn));
+
 #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
 
 #define __put_get_user_asm(to, from, size, spec)		\
@@ -109,6 +112,9 @@
 					(unsigned long *)x,
 					size, spec);
 		break;
+	default:
+		__put_user_bad();
+		break;
 	}
 	return rc;
 }
@@ -139,6 +145,9 @@
 					(unsigned long __user *)ptr,
 					size, spec);
 		break;
+	default:
+		__get_user_bad();
+		break;
 	}
 	return rc;
 }
@@ -179,7 +188,7 @@
 	default:						\
 		__put_user_bad();				\
 		break;						\
-	 }							\
+	}							\
 	__builtin_expect(__pu_err, 0);				\
 })
 
@@ -190,8 +199,6 @@
 })
 
 
-int __put_user_bad(void) __attribute__((noreturn));
-
 #define __get_user(x, ptr)					\
 ({								\
 	int __gu_err = -EFAULT;					\
@@ -238,8 +245,6 @@
 	__get_user(x, ptr);					\
 })
 
-int __get_user_bad(void) __attribute__((noreturn));
-
 unsigned long __must_check
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n);
 
@@ -278,4 +283,115 @@
 int copy_to_user_real(void __user *dest, void *src, unsigned long count);
 void *s390_kernel_write(void *dst, const void *src, size_t size);
 
+#define HAVE_GET_KERNEL_NOFAULT
+
+int __noreturn __put_kernel_bad(void);
+
+#define __put_kernel_asm(val, to, insn)					\
+({									\
+	int __rc;							\
+									\
+	asm volatile(							\
+		"0:   " insn "  %2,%1\n"				\
+		"1:	xr	%0,%0\n"				\
+		"2:\n"							\
+		".pushsection .fixup, \"ax\"\n"				\
+		"3:	lhi	%0,%3\n"				\
+		"	jg	2b\n"					\
+		".popsection\n"						\
+		EX_TABLE(0b,3b) EX_TABLE(1b,3b)				\
+		: "=d" (__rc), "+Q" (*(to))				\
+		: "d" (val), "K" (-EFAULT)				\
+		: "cc");						\
+	__rc;								\
+})
+
+#define __put_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	u64 __x = (u64)(*((type *)(src)));				\
+	int __pk_err;							\
+									\
+	switch (sizeof(type)) {						\
+	case 1:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "stc"); \
+		break;							\
+	case 2:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "sth"); \
+		break;							\
+	case 4:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "st");	\
+		break;							\
+	case 8:								\
+		__pk_err = __put_kernel_asm(__x, (type *)(dst), "stg"); \
+		break;							\
+	default:							\
+		__pk_err = __put_kernel_bad();				\
+		break;							\
+	}								\
+	if (unlikely(__pk_err))						\
+		goto err_label;						\
+} while (0)
+
+int __noreturn __get_kernel_bad(void);
+
+#define __get_kernel_asm(val, from, insn)				\
+({									\
+	int __rc;							\
+									\
+	asm volatile(							\
+		"0:   " insn "  %1,%2\n"				\
+		"1:	xr	%0,%0\n"				\
+		"2:\n"							\
+		".pushsection .fixup, \"ax\"\n"				\
+		"3:	lhi	%0,%3\n"				\
+		"	jg	2b\n"					\
+		".popsection\n"						\
+		EX_TABLE(0b,3b) EX_TABLE(1b,3b)				\
+		: "=d" (__rc), "+d" (val)				\
+		: "Q" (*(from)), "K" (-EFAULT)				\
+		: "cc");						\
+	__rc;								\
+})
+
+#define __get_kernel_nofault(dst, src, type, err_label)			\
+do {									\
+	int __gk_err;							\
+									\
+	switch (sizeof(type)) {						\
+	case 1: {							\
+		u8 __x = 0;						\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "ic");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	case 2: {							\
+		u16 __x = 0;						\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "lh");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	case 4: {							\
+		u32 __x = 0;						\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "l");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	case 8: {							\
+		u64 __x = 0;						\
+									\
+		__gk_err = __get_kernel_asm(__x, (type *)(src), "lg");	\
+		*((type *)(dst)) = (type)__x;				\
+		break;							\
+	};								\
+	default:							\
+		__gk_err = __get_kernel_bad();				\
+		break;							\
+	}								\
+	if (unlikely(__gk_err))						\
+		goto err_label;						\
+} while (0)
+
 #endif /* __S390_UACCESS_H */
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index eaaefec..de9006b 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -35,7 +35,6 @@
 	struct task_struct *task;
 	struct pt_regs *regs;
 	unsigned long sp, ip;
-	bool reuse_sp;
 	int graph_idx;
 	bool reliable;
 	bool error;
@@ -59,10 +58,11 @@
 static inline void unwind_start(struct unwind_state *state,
 				struct task_struct *task,
 				struct pt_regs *regs,
-				unsigned long sp)
+				unsigned long first_frame)
 {
-	sp = sp ? : get_stack_pointer(task, regs);
-	__unwind_start(state, task, regs, sp);
+	task = task ?: current;
+	first_frame = first_frame ?: get_stack_pointer(task, regs);
+	__unwind_start(state, task, regs, first_frame);
 }
 
 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index ef3c00b..12c5f00 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -14,23 +14,67 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/bug.h>
+#include <linux/sched.h>
 #include <asm/page.h>
+#include <asm/gmap.h>
 
 #define UVC_RC_EXECUTED		0x0001
 #define UVC_RC_INV_CMD		0x0002
 #define UVC_RC_INV_STATE	0x0003
 #define UVC_RC_INV_LEN		0x0005
 #define UVC_RC_NO_RESUME	0x0007
+#define UVC_RC_NEED_DESTROY	0x8000
 
 #define UVC_CMD_QUI			0x0001
+#define UVC_CMD_INIT_UV			0x000f
+#define UVC_CMD_CREATE_SEC_CONF		0x0100
+#define UVC_CMD_DESTROY_SEC_CONF	0x0101
+#define UVC_CMD_CREATE_SEC_CPU		0x0120
+#define UVC_CMD_DESTROY_SEC_CPU		0x0121
+#define UVC_CMD_CONV_TO_SEC_STOR	0x0200
+#define UVC_CMD_CONV_FROM_SEC_STOR	0x0201
+#define UVC_CMD_DESTR_SEC_STOR		0x0202
+#define UVC_CMD_SET_SEC_CONF_PARAMS	0x0300
+#define UVC_CMD_UNPACK_IMG		0x0301
+#define UVC_CMD_VERIFY_IMG		0x0302
+#define UVC_CMD_CPU_RESET		0x0310
+#define UVC_CMD_CPU_RESET_INITIAL	0x0311
+#define UVC_CMD_PREPARE_RESET		0x0320
+#define UVC_CMD_CPU_RESET_CLEAR		0x0321
+#define UVC_CMD_CPU_SET_STATE		0x0330
+#define UVC_CMD_SET_UNSHARE_ALL		0x0340
+#define UVC_CMD_PIN_PAGE_SHARED		0x0341
+#define UVC_CMD_UNPIN_PAGE_SHARED	0x0342
 #define UVC_CMD_SET_SHARED_ACCESS	0x1000
 #define UVC_CMD_REMOVE_SHARED_ACCESS	0x1001
 
 /* Bits in installed uv calls */
 enum uv_cmds_inst {
 	BIT_UVC_CMD_QUI = 0,
+	BIT_UVC_CMD_INIT_UV = 1,
+	BIT_UVC_CMD_CREATE_SEC_CONF = 2,
+	BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
+	BIT_UVC_CMD_CREATE_SEC_CPU = 4,
+	BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
+	BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
+	BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
 	BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
 	BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+	BIT_UVC_CMD_SET_SEC_PARMS = 11,
+	BIT_UVC_CMD_UNPACK_IMG = 13,
+	BIT_UVC_CMD_VERIFY_IMG = 14,
+	BIT_UVC_CMD_CPU_RESET = 15,
+	BIT_UVC_CMD_CPU_RESET_INITIAL = 16,
+	BIT_UVC_CMD_CPU_SET_STATE = 17,
+	BIT_UVC_CMD_PREPARE_RESET = 18,
+	BIT_UVC_CMD_CPU_PERFORM_CLEAR_RESET = 19,
+	BIT_UVC_CMD_UNSHARE_ALL = 20,
+	BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
+	BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+};
+
+enum uv_feat_ind {
+	BIT_UV_FEAT_MISC = 0,
 };
 
 struct uv_cb_header {
@@ -40,13 +84,128 @@
 	u16 rrc;	/* Return Reason Code */
 } __packed __aligned(8);
 
+/* Query Ultravisor Information */
 struct uv_cb_qui {
 	struct uv_cb_header header;
 	u64 reserved08;
 	u64 inst_calls_list[4];
-	u64 reserved30[15];
+	u64 reserved30[2];
+	u64 uv_base_stor_len;
+	u64 reserved48;
+	u64 conf_base_phys_stor_len;
+	u64 conf_base_virt_stor_len;
+	u64 conf_virt_var_stor_len;
+	u64 cpu_stor_len;
+	u32 reserved70[3];
+	u32 max_num_sec_conf;
+	u64 max_guest_stor_addr;
+	u8  reserved88[158 - 136];
+	u16 max_guest_cpu_id;
+	u64 uv_feature_indications;
+	u8  reserveda0[200 - 168];
 } __packed __aligned(8);
 
+/* Initialize Ultravisor */
+struct uv_cb_init {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 stor_origin;
+	u64 stor_len;
+	u64 reserved28[4];
+} __packed __aligned(8);
+
+/* Create Guest Configuration */
+struct uv_cb_cgc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 conf_base_stor_origin;
+	u64 conf_virt_stor_origin;
+	u64 reserved30;
+	u64 guest_stor_origin;
+	u64 guest_stor_len;
+	u64 guest_sca;
+	u64 guest_asce;
+	u64 reserved58[5];
+} __packed __aligned(8);
+
+/* Create Secure CPU */
+struct uv_cb_csc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 cpu_handle;
+	u64 guest_handle;
+	u64 stor_origin;
+	u8  reserved30[6];
+	u16 num;
+	u64 state_origin;
+	u64 reserved40[4];
+} __packed __aligned(8);
+
+/* Convert to Secure */
+struct uv_cb_cts {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 gaddr;
+} __packed __aligned(8);
+
+/* Convert from Secure / Pin Page Shared */
+struct uv_cb_cfs {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 paddr;
+} __packed __aligned(8);
+
+/* Set Secure Config Parameter */
+struct uv_cb_ssc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 sec_header_origin;
+	u32 sec_header_len;
+	u32 reserved2c;
+	u64 reserved30[4];
+} __packed __aligned(8);
+
+/* Unpack */
+struct uv_cb_unp {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 gaddr;
+	u64 tweak[2];
+	u64 reserved38[3];
+} __packed __aligned(8);
+
+#define PV_CPU_STATE_OPR	1
+#define PV_CPU_STATE_STP	2
+#define PV_CPU_STATE_CHKSTP	3
+#define PV_CPU_STATE_OPR_LOAD	5
+
+struct uv_cb_cpu_set_state {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 cpu_handle;
+	u8  reserved20[7];
+	u8  state;
+	u64 reserved28[5];
+};
+
+/*
+ * A common UV call struct for calls that take no payload
+ * Examples:
+ * Destroy cpu/config
+ * Verify
+ */
+struct uv_cb_nodata {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 handle;
+	u64 reserved20[4];
+} __packed __aligned(8);
+
+/* Set Shared Access */
 struct uv_cb_share {
 	struct uv_cb_header header;
 	u64 reserved08[3];
@@ -54,21 +213,77 @@
 	u64 reserved28;
 } __packed __aligned(8);
 
-static inline int uv_call(unsigned long r1, unsigned long r2)
+static inline int __uv_call(unsigned long r1, unsigned long r2)
 {
 	int cc;
 
 	asm volatile(
-		"0:	.insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
-		"		brc	3,0b\n"
-		"		ipm	%[cc]\n"
-		"		srl	%[cc],28\n"
+		"	.insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
 		: [cc] "=d" (cc)
 		: [r1] "a" (r1), [r2] "a" (r2)
 		: "memory", "cc");
 	return cc;
 }
 
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+	int cc;
+
+	do {
+		cc = __uv_call(r1, r2);
+	} while (cc > 1);
+	return cc;
+}
+
+/* Low level uv_call that avoids stalls for long running busy conditions  */
+static inline int uv_call_sched(unsigned long r1, unsigned long r2)
+{
+	int cc;
+
+	do {
+		cc = __uv_call(r1, r2);
+		cond_resched();
+	} while (cc > 1);
+	return cc;
+}
+
+/*
+ * special variant of uv_call that only transports the cpu or guest
+ * handle and the command, like destroy or verify.
+ */
+static inline int uv_cmd_nodata(u64 handle, u16 cmd, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_nodata uvcb = {
+		.header.cmd = cmd,
+		.header.len = sizeof(uvcb),
+		.handle = handle,
+	};
+	int cc;
+
+	WARN(!handle, "No handle provided to Ultravisor call cmd %x\n", cmd);
+	cc = uv_call_sched(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	return cc ? -EINVAL : 0;
+}
+
+struct uv_info {
+	unsigned long inst_calls_list[4];
+	unsigned long uv_base_stor_len;
+	unsigned long guest_base_stor_len;
+	unsigned long guest_virt_base_stor_len;
+	unsigned long guest_virt_var_stor_len;
+	unsigned long guest_cpu_stor_len;
+	unsigned long max_sec_stor_addr;
+	unsigned int max_num_sec_conf;
+	unsigned short max_guest_cpu_id;
+	unsigned long uv_feature_indications;
+};
+
+extern struct uv_info uv_info;
+
 #ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
 extern int prot_virt_guest;
 
@@ -86,7 +301,7 @@
 	};
 
 	if (!is_prot_virt_guest())
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	/*
 	 * Sharing is page wise, if we encounter addresses that are
 	 * not page aligned, we assume something went wrong. If
@@ -121,11 +336,46 @@
 	return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
 }
 
-void uv_query_info(void);
 #else
 #define is_prot_virt_guest() 0
 static inline int uv_set_shared(unsigned long addr) { return 0; }
 static inline int uv_remove_shared(unsigned long addr) { return 0; }
+#endif
+
+#if IS_ENABLED(CONFIG_KVM)
+extern int prot_virt_host;
+
+static inline int is_prot_virt_host(void)
+{
+	return prot_virt_host;
+}
+
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
+int uv_destroy_page(unsigned long paddr);
+int uv_convert_from_secure(unsigned long paddr);
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+
+void setup_uv(void);
+void adjust_to_uv_max(unsigned long *vmax);
+#else
+#define is_prot_virt_host() 0
+static inline void setup_uv(void) {}
+static inline void adjust_to_uv_max(unsigned long *vmax) {}
+
+static inline int uv_destroy_page(unsigned long paddr)
+{
+	return 0;
+}
+
+static inline int uv_convert_from_secure(unsigned long paddr)
+{
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+void uv_query_info(void);
+#else
 static inline void uv_query_info(void) {}
 #endif
 
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index f3ba84f..29b44a9 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -2,6 +2,8 @@
 #ifndef __S390_VDSO_H__
 #define __S390_VDSO_H__
 
+#include <vdso/datapage.h>
+
 /* Default link addresses for the vDSOs */
 #define VDSO32_LBASE	0
 #define VDSO64_LBASE	0
@@ -18,38 +20,22 @@
  * itself and may change without notice.
  */
 
-struct vdso_data {
-	__u64 tb_update_count;		/* Timebase atomicity ctr	0x00 */
-	__u64 xtime_tod_stamp;		/* TOD clock for xtime		0x08 */
-	__u64 xtime_clock_sec;		/* Kernel time			0x10 */
-	__u64 xtime_clock_nsec;		/*				0x18 */
-	__u64 xtime_coarse_sec;		/* Coarse kernel time		0x20 */
-	__u64 xtime_coarse_nsec;	/*				0x28 */
-	__u64 wtom_clock_sec;		/* Wall to monotonic clock	0x30 */
-	__u64 wtom_clock_nsec;		/*				0x38 */
-	__u64 wtom_coarse_sec;		/* Coarse wall to monotonic	0x40 */
-	__u64 wtom_coarse_nsec;		/*				0x48 */
-	__u32 tz_minuteswest;		/* Minutes west of Greenwich	0x50 */
-	__u32 tz_dsttime;		/* Type of dst correction	0x54 */
-	__u32 ectg_available;		/* ECTG instruction present	0x58 */
-	__u32 tk_mult;			/* Mult. used for xtime_nsec	0x5c */
-	__u32 tk_shift;			/* Shift used for xtime_nsec	0x60 */
-	__u32 ts_dir;			/* TOD steering direction	0x64 */
-	__u64 ts_end;			/* TOD steering end		0x68 */
-	__u32 hrtimer_res;		/* hrtimer resolution		0x70 */
-};
-
 struct vdso_per_cpu_data {
-	__u64 ectg_timer_base;
-	__u64 ectg_user_time;
-	__u32 cpu_nr;
-	__u32 node_id;
+	/*
+	 * Note: node_id and cpu_nr must be at adjacent memory locations.
+	 * VDSO userspace must read both values with a single instruction.
+	 */
+	union {
+		__u64 getcpu_val;
+		struct {
+			__u32 node_id;
+			__u32 cpu_nr;
+		};
+	};
 };
 
 extern struct vdso_data *vdso_data;
-extern struct vdso_data boot_vdso_data;
 
-void vdso_alloc_boot_cpu(struct lowcore *lowcore);
 int vdso_alloc_per_cpu(struct lowcore *lowcore);
 void vdso_free_per_cpu(struct lowcore *lowcore);
 
diff --git a/arch/s390/include/asm/vdso/clocksource.h b/arch/s390/include/asm/vdso/clocksource.h
new file mode 100644
index 0000000..a93eda0
--- /dev/null
+++ b/arch/s390/include/asm/vdso/clocksource.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_CLOCKSOURCE_H
+#define __ASM_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES	\
+	VDSO_CLOCKMODE_TOD
+
+#endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/s390/include/asm/vdso/data.h b/arch/s390/include/asm/vdso/data.h
new file mode 100644
index 0000000..73ee891
--- /dev/null
+++ b/arch/s390/include/asm/vdso/data.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __S390_ASM_VDSO_DATA_H
+#define __S390_ASM_VDSO_DATA_H
+
+#include <linux/types.h>
+#include <vdso/datapage.h>
+
+struct arch_vdso_data {
+	__s64 tod_steering_delta;
+	__u64 tod_steering_end;
+};
+
+#endif /* __S390_ASM_VDSO_DATA_H */
diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h
new file mode 100644
index 0000000..bf12306
--- /dev/null
+++ b/arch/s390/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_VDSO_GETTIMEOFDAY_H
+#define ASM_VDSO_GETTIMEOFDAY_H
+
+#define VDSO_HAS_TIME 1
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+#include <asm/timex.h>
+#include <asm/unistd.h>
+#include <asm/vdso.h>
+#include <linux/compiler.h>
+
+#define vdso_calc_delta __arch_vdso_calc_delta
+static __always_inline u64 __arch_vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+{
+	return (cycles - last) * mult;
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+	return _vdso_data;
+}
+
+static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd)
+{
+	const struct vdso_data *vdso = __arch_get_vdso_data();
+	u64 adj, now;
+
+	now = get_tod_clock();
+	adj = vdso->arch_data.tod_steering_end - now;
+	if (unlikely((s64) adj > 0))
+		now += (vdso->arch_data.tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+	return now;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+	register unsigned long r1 __asm__("r1") = __NR_clock_gettime;
+	register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
+	register void *r3 __asm__("r3") = ts;
+
+	asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+	return r2;
+}
+
+static __always_inline
+long gettimeofday_fallback(register struct __kernel_old_timeval *tv,
+			   register struct timezone *tz)
+{
+	register unsigned long r1 __asm__("r1") = __NR_gettimeofday;
+	register unsigned long r2 __asm__("r2") = (unsigned long)tv;
+	register void *r3 __asm__("r3") = tz;
+
+	asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+	return r2;
+}
+
+static __always_inline
+long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts)
+{
+	register unsigned long r1 __asm__("r1") = __NR_clock_getres;
+	register unsigned long r2 __asm__("r2") = (unsigned long)clkid;
+	register void *r3 __asm__("r3") = ts;
+
+	asm ("svc 0\n" : "+d" (r2) : "d" (r1), "d" (r3) : "cc", "memory");
+	return r2;
+}
+
+#endif
diff --git a/arch/s390/include/asm/vdso/processor.h b/arch/s390/include/asm/vdso/processor.h
new file mode 100644
index 0000000..cfcc3e1
--- /dev/null
+++ b/arch/s390/include/asm/vdso/processor.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_VDSO_PROCESSOR_H
+#define __ASM_VDSO_PROCESSOR_H
+
+#define cpu_relax() barrier()
+
+#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h
new file mode 100644
index 0000000..6c67c08
--- /dev/null
+++ b/arch/s390/include/asm/vdso/vsyscall.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_VDSO_VSYSCALL_H
+#define __ASM_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <asm/vdso.h>
+/*
+ * Update the vDSO data page to keep in sync with kernel timekeeping.
+ */
+
+static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
+{
+	return vdso_data;
+}
+#define __arch_get_k_vdso_data __s390_get_k_vdso_data
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h
new file mode 100644
index 0000000..3ba3a6b
--- /dev/null
+++ b/arch/s390/include/asm/vmalloc.h
@@ -0,0 +1,4 @@
+#ifndef _ASM_S390_VMALLOC_H
+#define _ASM_S390_VMALLOC_H
+
+#endif /* _ASM_S390_VMALLOC_H */
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
index 42f707d..e601ada 100644
--- a/arch/s390/include/asm/vtimer.h
+++ b/arch/s390/include/asm/vtimer.h
@@ -25,8 +25,6 @@
 extern int mod_virt_timer(struct vtimer_list *timer, u64 expires);
 extern int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires);
 extern int del_virt_timer(struct vtimer_list *timer);
-
-extern void init_cpu_vtimer(void);
 extern void vtime_init(void);
 
 #endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/uapi/asm/debug.h b/arch/s390/include/uapi/asm/debug.h
deleted file mode 100644
index c7c564d..0000000
--- a/arch/s390/include/uapi/asm/debug.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *   S/390 debug facility
- *
- *    Copyright IBM Corp. 1999, 2000
- */
-
-#ifndef _UAPIDEBUG_H
-#define _UAPIDEBUG_H
-
-#include <linux/fs.h>
-
-/* Note:
- * struct __debug_entry must be defined outside of #ifdef __KERNEL__ 
- * in order to allow a user program to analyze the 'raw'-view.
- */
-
-struct __debug_entry{
-        union {
-                struct {
-                        unsigned long long clock:52;
-                        unsigned long long exception:1;
-                        unsigned long long level:3;
-                        unsigned long long cpuid:8;
-                } fields;
-
-                unsigned long long stck;
-        } id;
-        void* caller;
-} __attribute__((packed));
-
-
-#define __DEBUG_FEATURE_VERSION      2  /* version of debug feature */
-
-#endif /* _UAPIDEBUG_H */
diff --git a/arch/s390/include/uapi/asm/ipcbuf.h b/arch/s390/include/uapi/asm/ipcbuf.h
index 5b1c4f4..1030cd1 100644
--- a/arch/s390/include/uapi/asm/ipcbuf.h
+++ b/arch/s390/include/uapi/asm/ipcbuf.h
@@ -2,6 +2,8 @@
 #ifndef __S390_IPCBUF_H__
 #define __S390_IPCBUF_H__
 
+#include <linux/posix_types.h>
+
 /*
  * The user_ipc_perm structure for S/390 architecture.
  * Note extra padding because this structure is passed back and forth
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index 451ba7d..d1ecd5d 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -27,6 +27,7 @@
 	IPL_PBT_FCP = 0,
 	IPL_PBT_SCP_DATA = 1,
 	IPL_PBT_CCW = 2,
+	IPL_PBT_NVME = 4,
 };
 
 /* IPL Parameter Block 0 with common fields */
@@ -67,6 +68,30 @@
 #define IPL_PB0_FCP_OPT_IPL	0x10
 #define IPL_PB0_FCP_OPT_DUMP	0x20
 
+/* IPL Parameter Block 0 for NVMe */
+struct ipl_pb0_nvme {
+	__u32 len;
+	__u8  pbt;
+	__u8  reserved1[3];
+	__u8  loadparm[8];
+	__u8  reserved2[304];
+	__u8  opt;
+	__u8  reserved3[3];
+	__u32 fid;
+	__u8 reserved4[12];
+	__u32 nsid;
+	__u8 reserved5[4];
+	__u32 bootprog;
+	__u8 reserved6[12];
+	__u64 br_lba;
+	__u32 scp_data_len;
+	__u8  reserved7[260];
+	__u8  scp_data[];
+} __packed;
+
+#define IPL_PB0_NVME_OPT_IPL	0x10
+#define IPL_PB0_NVME_OPT_DUMP	0x20
+
 /* IPL Parameter Block 0 for CCW */
 struct ipl_pb0_ccw {
 	__u32 len;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 436ec76..7a6b148 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -231,11 +231,13 @@
 #define KVM_SYNC_GSCB   (1UL << 9)
 #define KVM_SYNC_BPBC   (1UL << 10)
 #define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
 
 #define KVM_SYNC_S390_VALID_FIELDS \
 	(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
 	 KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
-	 KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+	 KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+	 KVM_SYNC_DIAG318)
 
 /* length and alignment of the sdnx as a power of two */
 #define SDNXC 8
@@ -264,7 +266,8 @@
 	__u8 reserved2 : 7;
 	__u8 padding1[51];	/* riccb needs to be 64byte aligned */
 	__u8 riccb[64];		/* runtime instrumentation controls block */
-	__u8 padding2[192];	/* sdnx needs to be 256byte aligned */
+	__u64 diag318;		/* diagnose 0x318 info */
+	__u8 padding2[184];	/* sdnx needs to be 256byte aligned */
 	union {
 		__u8 sdnx[SDNXL];  /* state description annex */
 		struct {
diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h
index e22f072..7349e96 100644
--- a/arch/s390/include/uapi/asm/pkey.h
+++ b/arch/s390/include/uapi/asm/pkey.h
@@ -25,20 +25,26 @@
 #define MAXPROTKEYSIZE	64	/* a protected key blob may be up to 64 bytes */
 #define MAXCLRKEYSIZE	32	   /* a clear key value may be up to 32 bytes */
 #define MAXAESCIPHERKEYSIZE 136  /* our aes cipher keys have always 136 bytes */
+#define MINEP11AESKEYBLOBSIZE 256  /* min EP11 AES key blob size  */
+#define MAXEP11AESKEYBLOBSIZE 320  /* max EP11 AES key blob size */
 
-/* Minimum and maximum size of a key blob */
+/* Minimum size of a key blob */
 #define MINKEYBLOBSIZE	SECKEYBLOBSIZE
-#define MAXKEYBLOBSIZE	MAXAESCIPHERKEYSIZE
 
 /* defines for the type field within the pkey_protkey struct */
 #define PKEY_KEYTYPE_AES_128		      1
 #define PKEY_KEYTYPE_AES_192		      2
 #define PKEY_KEYTYPE_AES_256		      3
+#define PKEY_KEYTYPE_ECC		      4
 
 /* the newer ioctls use a pkey_key_type enum for type information */
 enum pkey_key_type {
 	PKEY_TYPE_CCA_DATA   = (__u32) 1,
 	PKEY_TYPE_CCA_CIPHER = (__u32) 2,
+	PKEY_TYPE_EP11	     = (__u32) 3,
+	PKEY_TYPE_CCA_ECC    = (__u32) 0x1f,
+	PKEY_TYPE_EP11_AES   = (__u32) 6,
+	PKEY_TYPE_EP11_ECC   = (__u32) 7,
 };
 
 /* the newer ioctls use a pkey_key_size enum for key size information */
@@ -87,6 +93,20 @@
 };
 
 /*
+ * EP11 key blobs of type PKEY_TYPE_EP11_AES and PKEY_TYPE_EP11_ECC
+ * are ep11 blobs prepended by this header:
+ */
+struct ep11kblob_header {
+	__u8  type;	/* always 0x00 */
+	__u8  hver;	/* header version,  currently needs to be 0x00 */
+	__u16 len;	/* total length in bytes (including this header) */
+	__u8  version;	/* PKEY_TYPE_EP11_AES or PKEY_TYPE_EP11_ECC */
+	__u8  res0;	/* unused */
+	__u16 bitlen;	/* clear key bit len, 0 for unknown */
+	__u8  res1[8];	/* unused */
+} __packed;
+
+/*
  * Generate CCA AES secure key.
  */
 struct pkey_genseck {
@@ -200,7 +220,7 @@
 
 /*
  * Generate secure key, version 2.
- * Generate either a CCA AES secure key or a CCA AES cipher key.
+ * Generate CCA AES secure key, CCA AES cipher key or EP11 AES secure key.
  * There needs to be a list of apqns given with at least one entry in there.
  * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
  * is not supported. The implementation walks through the list of apqns and
@@ -210,10 +230,13 @@
  * (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to
  * generate a list of apqns based on the key type to generate.
  * The keygenflags argument is passed to the low level generation functions
- * individual for the key type and has a key type specific meaning. Currently
- * only CCA AES cipher keys react to this parameter: Use one or more of the
- * PKEY_KEYGEN_* flags to widen the export possibilities. By default a cipher
- * key is only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * individual for the key type and has a key type specific meaning. When
+ * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_*
+ * flags to widen the export possibilities. By default a cipher key is
+ * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * The keygenflag argument for generating an EP11 AES key should either be 0
+ * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and
+ * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags.
  */
 struct pkey_genseck2 {
 	struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets*/
@@ -229,8 +252,8 @@
 
 /*
  * Generate secure key from clear key value, version 2.
- * Construct a CCA AES secure key or CCA AES cipher key from a given clear key
- * value.
+ * Construct an CCA AES secure key, CCA AES cipher key or EP11 AES secure
+ * key from a given clear key value.
  * There needs to be a list of apqns given with at least one entry in there.
  * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
  * is not supported. The implementation walks through the list of apqns and
@@ -240,10 +263,13 @@
  * (return -1 with errno ENODEV). You may use the PKEY_APQNS4KT ioctl to
  * generate a list of apqns based on the key type to generate.
  * The keygenflags argument is passed to the low level generation functions
- * individual for the key type and has a key type specific meaning. Currently
- * only CCA AES cipher keys react to this parameter: Use one or more of the
- * PKEY_KEYGEN_* flags to widen the export possibilities. By default a cipher
- * key is only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * individual for the key type and has a key type specific meaning. When
+ * generating CCA cipher keys you can use one or more of the PKEY_KEYGEN_*
+ * flags to widen the export possibilities. By default a cipher key is
+ * only exportable for CPACF (PKEY_KEYGEN_XPRT_CPAC).
+ * The keygenflag argument for generating an EP11 AES key should either be 0
+ * to use the defaults which are XCP_BLOB_ENCRYPT, XCP_BLOB_DECRYPT and
+ * XCP_BLOB_PROTKEY_EXTRACTABLE or a valid combination of XCP_BLOB_* flags.
  */
 struct pkey_clr2seck2 {
 	struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
@@ -266,14 +292,19 @@
  * with one apqn able to handle this key.
  * The function also checks for the master key verification patterns
  * of the key matching to the current or alternate mkvp of the apqn.
- * Currently CCA AES secure keys and CCA AES cipher keys are supported.
- * The flags field is updated with some additional info about the apqn mkvp
+ * For CCA AES secure keys and CCA AES cipher keys this means to check
+ * the key's mkvp against the current or old mkvp of the apqns. The flags
+ * field is updated with some additional info about the apqn mkvp
  * match: If the current mkvp matches to the key's mkvp then the
  * PKEY_FLAGS_MATCH_CUR_MKVP bit is set, if the alternate mkvp matches to
  * the key's mkvp the PKEY_FLAGS_MATCH_ALT_MKVP is set. For CCA keys the
  * alternate mkvp is the old master key verification pattern.
  * CCA AES secure keys are also checked to have the CPACF export allowed
  * bit enabled (XPRTCPAC) in the kmf1 field.
+ * EP11 keys are also supported and the wkvp of the key is checked against
+ * the current wkvp of the apqns. There is no alternate for this type of
+ * key and so on a match the flag PKEY_FLAGS_MATCH_CUR_MKVP always is set.
+ * EP11 keys are also checked to have XCP_BLOB_PROTKEY_EXTRACTABLE set.
  * The ioctl returns 0 as long as the given or found apqn matches to
  * matches with the current or alternate mkvp to the key's mkvp. If the given
  * apqn does not match or there is no such apqn found, -1 with errno
@@ -291,7 +322,7 @@
 #define PKEY_VERIFYKEY2 _IOWR(PKEY_IOCTL_MAGIC, 0x17, struct pkey_verifykey2)
 
 /*
- * Transform a key blob (of any type) into a protected key, version 2.
+ * Transform a key blob into a protected key, version 2.
  * There needs to be a list of apqns given with at least one entry in there.
  * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
  * is not supported. The implementation walks through the list of apqns and
@@ -300,6 +331,8 @@
  * list is tried until success (return 0) or the end of the list is reached
  * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
  * generate a list of apqns based on the key.
+ * Deriving ECC protected keys from ECC secure keys is not supported with
+ * this ioctl, use PKEY_KBLOB2PROTK3 for this purpose.
  */
 struct pkey_kblob2pkey2 {
 	__u8 __user *key;	     /* in: pointer to key blob		   */
@@ -313,16 +346,20 @@
 /*
  * Build a list of APQNs based on a key blob given.
  * Is able to find out which type of secure key is given (CCA AES secure
- * key or CCA AES cipher key) and tries to find all matching crypto cards
- * based on the MKVP and maybe other criterias (like CCA AES cipher keys
- * need a CEX5C or higher). The list of APQNs is further filtered by the key's
- * mkvp which needs to match to either the current mkvp or the alternate mkvp
- * (which is the old mkvp on CCA adapters) of the apqns. The flags argument may
- * be used to limit the matching apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is
- * given, only the current mkvp of each apqn is compared. Likewise with the
- * PKEY_FLAGS_MATCH_ALT_MKVP. If both are given, it is assumed to
- * return apqns where either the current or the alternate mkvp
- * matches. At least one of the matching flags needs to be given.
+ * key, CCA AES cipher key, CCA ECC private key, EP11 AES key, EP11 ECC private
+ * key) and tries to find all matching crypto cards based on the MKVP and maybe
+ * other criterias (like CCA AES cipher keys need a CEX5C or higher, EP11 keys
+ * with BLOB_PKEY_EXTRACTABLE need a CEX7 and EP11 api version 4). The list of
+ * APQNs is further filtered by the key's mkvp which needs to match to either
+ * the current mkvp (CCA and EP11) or the alternate mkvp (old mkvp, CCA adapters
+ * only) of the apqns. The flags argument may be used to limit the matching
+ * apqns. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current mkvp of
+ * each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP. If both
+ * are given, it is assumed to return apqns where either the current or the
+ * alternate mkvp matches. At least one of the matching flags needs to be given.
+ * The flags argument for EP11 keys has no further action and is currently
+ * ignored (but needs to be given as PKEY_FLAGS_MATCH_CUR_MKVP) as there is only
+ * the wkvp from the key to match against the apqn's wkvp.
  * The list of matching apqns is stored into the space given by the apqns
  * argument and the number of stored entries goes into apqn_entries. If the list
  * is empty (apqn_entries is 0) the apqn_entries field is updated to the number
@@ -348,14 +385,19 @@
  * restrict the list by given master key verification patterns.
  * For different key types there may be different ways to match the
  * master key verification patterns. For CCA keys (CCA data key and CCA
- * cipher key) the first 8 bytes of cur_mkvp refer to the current mkvp value
- * of the apqn and the first 8 bytes of the alt_mkvp refer to the old mkvp.
- * The flags argument controls if the apqns current and/or alternate mkvp
+ * cipher key) the first 8 bytes of cur_mkvp refer to the current AES mkvp value
+ * of the apqn and the first 8 bytes of the alt_mkvp refer to the old AES mkvp.
+ * For CCA ECC keys it is similar but the match is against the APKA current/old
+ * mkvp. The flags argument controls if the apqns current and/or alternate mkvp
  * should match. If the PKEY_FLAGS_MATCH_CUR_MKVP is given, only the current
  * mkvp of each apqn is compared. Likewise with the PKEY_FLAGS_MATCH_ALT_MKVP.
  * If both are given, it is assumed to return apqns where either the
  * current or the alternate mkvp matches. If no match flag is given
  * (flags is 0) the mkvp values are ignored for the match process.
+ * For EP11 keys there is only the current wkvp. So if the apqns should also
+ * match to a given wkvp, then the PKEY_FLAGS_MATCH_CUR_MKVP flag should be
+ * set. The wkvp value is 32 bytes but only the leftmost 16 bytes are compared
+ * against the leftmost 16 byte of the wkvp of the apqn.
  * The list of matching apqns is stored into the space given by the apqns
  * argument and the number of stored entries goes into apqn_entries. If the list
  * is empty (apqn_entries is 0) the apqn_entries field is updated to the number
@@ -376,4 +418,30 @@
 };
 #define PKEY_APQNS4KT _IOWR(PKEY_IOCTL_MAGIC, 0x1C, struct pkey_apqns4keytype)
 
+/*
+ * Transform a key blob into a protected key, version 3.
+ * The difference to version 2 of this ioctl is that the protected key
+ * buffer is now explicitly and not within a struct pkey_protkey any more.
+ * So this ioctl is also able to handle EP11 and CCA ECC secure keys and
+ * provide ECC protected keys.
+ * There needs to be a list of apqns given with at least one entry in there.
+ * All apqns in the list need to be exact apqns, 0xFFFF as ANY card or domain
+ * is not supported. The implementation walks through the list of apqns and
+ * tries to send the request to each apqn without any further checking (like
+ * card type or online state). If the apqn fails, simple the next one in the
+ * list is tried until success (return 0) or the end of the list is reached
+ * (return -1 with errno ENODEV). You may use the PKEY_APQNS4K ioctl to
+ * generate a list of apqns based on the key.
+ */
+struct pkey_kblob2pkey3 {
+	__u8 __user *key;	     /* in: pointer to key blob		   */
+	__u32 keylen;		     /* in: key blob size		   */
+	struct pkey_apqn __user *apqns; /* in: ptr to list of apqn targets */
+	__u32 apqn_entries;	     /* in: # of apqn target list entries  */
+	__u32 pkeytype;		/* out: prot key type (enum pkey_key_type) */
+	__u32 pkeylen;	 /* in/out: size of pkey buffer/actual len of pkey */
+	__u8 __user *pkey;		 /* in: pkey blob buffer space ptr */
+};
+#define PKEY_KBLOB2PROTK3 _IOWR(PKEY_IOCTL_MAGIC, 0x1D, struct pkey_kblob2pkey3)
+
 #endif /* _UAPI_PKEY_H */
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 6ca1e68..ede3186 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -29,7 +29,7 @@
 	{ 0x13, "SIGP conditional emergency signal" },		\
 	{ 0x15, "SIGP sense running" },				\
 	{ 0x16, "SIGP set multithreading"},			\
-	{ 0x17, "SIGP store additional status ait address"}
+	{ 0x17, "SIGP store additional status at address"}
 
 #define icpt_prog_codes						\
 	{ 0x0001, "Prog Operation" },				\
diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h
index f9e5e1f..22fd202 100644
--- a/arch/s390/include/uapi/asm/zcrypt.h
+++ b/arch/s390/include/uapi/asm/zcrypt.h
@@ -36,12 +36,12 @@
  * - length(n_modulus) = inputdatalength
  */
 struct ica_rsa_modexpo {
-	char __user  *inputdata;
-	unsigned int  inputdatalength;
-	char __user  *outputdata;
-	unsigned int  outputdatalength;
-	char __user  *b_key;
-	char __user  *n_modulus;
+	__u8 __user  *inputdata;
+	__u32	      inputdatalength;
+	__u8 __user  *outputdata;
+	__u32	      outputdatalength;
+	__u8 __user  *b_key;
+	__u8 __user  *n_modulus;
 };
 
 /**
@@ -59,15 +59,15 @@
  * - length(u_mult_inv) = inputdatalength/2 + 8
  */
 struct ica_rsa_modexpo_crt {
-	char __user  *inputdata;
-	unsigned int  inputdatalength;
-	char __user  *outputdata;
-	unsigned int  outputdatalength;
-	char __user  *bp_key;
-	char __user  *bq_key;
-	char __user  *np_prime;
-	char __user  *nq_prime;
-	char __user  *u_mult_inv;
+	__u8 __user  *inputdata;
+	__u32	      inputdatalength;
+	__u8 __user  *outputdata;
+	__u32	      outputdatalength;
+	__u8 __user  *bp_key;
+	__u8 __user  *bq_key;
+	__u8 __user  *np_prime;
+	__u8 __user  *nq_prime;
+	__u8 __user  *u_mult_inv;
 };
 
 /**
@@ -83,67 +83,67 @@
  *	    key block
  */
 struct CPRBX {
-	unsigned short	cprb_len;	/* CPRB length	      220	 */
-	unsigned char	cprb_ver_id;	/* CPRB version id.   0x02	 */
-	unsigned char	pad_000[3];	/* Alignment pad bytes		 */
-	unsigned char	func_id[2];	/* function id	      0x5432	 */
-	unsigned char	cprb_flags[4];	/* Flags			 */
-	unsigned int	req_parml;	/* request parameter buffer len	 */
-	unsigned int	req_datal;	/* request data buffer		 */
-	unsigned int	rpl_msgbl;	/* reply  message block length	 */
-	unsigned int	rpld_parml;	/* replied parameter block len	 */
-	unsigned int	rpl_datal;	/* reply data block len		 */
-	unsigned int	rpld_datal;	/* replied data block len	 */
-	unsigned int	req_extbl;	/* request extension block len	 */
-	unsigned char	pad_001[4];	/* reserved			 */
-	unsigned int	rpld_extbl;	/* replied extension block len	 */
-	unsigned char	padx000[16 - sizeof(char *)];
-	unsigned char  *req_parmb;	/* request parm block 'address'	 */
-	unsigned char	padx001[16 - sizeof(char *)];
-	unsigned char  *req_datab;	/* request data block 'address'	 */
-	unsigned char	padx002[16 - sizeof(char *)];
-	unsigned char  *rpl_parmb;	/* reply parm block 'address'	 */
-	unsigned char	padx003[16 - sizeof(char *)];
-	unsigned char  *rpl_datab;	/* reply data block 'address'	 */
-	unsigned char	padx004[16 - sizeof(char *)];
-	unsigned char  *req_extb;	/* request extension block 'addr'*/
-	unsigned char	padx005[16 - sizeof(char *)];
-	unsigned char  *rpl_extb;	/* reply extension block 'address'*/
-	unsigned short	ccp_rtcode;	/* server return code		 */
-	unsigned short	ccp_rscode;	/* server reason code		 */
-	unsigned int	mac_data_len;	/* Mac Data Length		 */
-	unsigned char	logon_id[8];	/* Logon Identifier		 */
-	unsigned char	mac_value[8];	/* Mac Value			 */
-	unsigned char	mac_content_flgs;/* Mac content flag byte	 */
-	unsigned char	pad_002;	/* Alignment			 */
-	unsigned short	domain;		/* Domain			 */
-	unsigned char	usage_domain[4];/* Usage domain			 */
-	unsigned char	cntrl_domain[4];/* Control domain		 */
-	unsigned char	S390enf_mask[4];/* S/390 enforcement mask	 */
-	unsigned char	pad_004[36];	/* reserved			 */
+	__u16	     cprb_len;		/* CPRB length	      220	 */
+	__u8	     cprb_ver_id;	/* CPRB version id.   0x02	 */
+	__u8	     pad_000[3];	/* Alignment pad bytes		 */
+	__u8	     func_id[2];	/* function id	      0x5432	 */
+	__u8	     cprb_flags[4];	/* Flags			 */
+	__u32	     req_parml;		/* request parameter buffer len	 */
+	__u32	     req_datal;		/* request data buffer		 */
+	__u32	     rpl_msgbl;		/* reply  message block length	 */
+	__u32	     rpld_parml;	/* replied parameter block len	 */
+	__u32	     rpl_datal;		/* reply data block len		 */
+	__u32	     rpld_datal;	/* replied data block len	 */
+	__u32	     req_extbl;		/* request extension block len	 */
+	__u8	     pad_001[4];	/* reserved			 */
+	__u32	     rpld_extbl;	/* replied extension block len	 */
+	__u8	     padx000[16 - sizeof(__u8 *)];
+	__u8 __user *req_parmb;		/* request parm block 'address'	 */
+	__u8	     padx001[16 - sizeof(__u8 *)];
+	__u8 __user *req_datab;		/* request data block 'address'	 */
+	__u8	     padx002[16 - sizeof(__u8 *)];
+	__u8 __user *rpl_parmb;		/* reply parm block 'address'	 */
+	__u8	     padx003[16 - sizeof(__u8 *)];
+	__u8 __user *rpl_datab;		/* reply data block 'address'	 */
+	__u8	     padx004[16 - sizeof(__u8 *)];
+	__u8 __user *req_extb;		/* request extension block 'addr'*/
+	__u8	     padx005[16 - sizeof(__u8 *)];
+	__u8 __user *rpl_extb;		/* reply extension block 'address'*/
+	__u16	     ccp_rtcode;	/* server return code		 */
+	__u16	     ccp_rscode;	/* server reason code		 */
+	__u32	     mac_data_len;	/* Mac Data Length		 */
+	__u8	     logon_id[8];	/* Logon Identifier		 */
+	__u8	     mac_value[8];	/* Mac Value			 */
+	__u8	     mac_content_flgs;	/* Mac content flag byte	 */
+	__u8	     pad_002;		/* Alignment			 */
+	__u16	     domain;		/* Domain			 */
+	__u8	     usage_domain[4];	/* Usage domain			 */
+	__u8	     cntrl_domain[4];	/* Control domain		 */
+	__u8	     S390enf_mask[4];	/* S/390 enforcement mask	 */
+	__u8	     pad_004[36];	/* reserved			 */
 } __attribute__((packed));
 
 /**
  * xcRB
  */
 struct ica_xcRB {
-	unsigned short	agent_ID;
-	unsigned int	user_defined;
-	unsigned short	request_ID;
-	unsigned int	request_control_blk_length;
-	unsigned char	padding1[16 - sizeof(char *)];
-	char __user    *request_control_blk_addr;
-	unsigned int	request_data_length;
-	char		padding2[16 - sizeof(char *)];
-	char __user    *request_data_address;
-	unsigned int	reply_control_blk_length;
-	char		padding3[16 - sizeof(char *)];
-	char __user    *reply_control_blk_addr;
-	unsigned int	reply_data_length;
-	char		padding4[16 - sizeof(char *)];
-	char __user    *reply_data_addr;
-	unsigned short	priority_window;
-	unsigned int	status;
+	__u16	      agent_ID;
+	__u32	      user_defined;
+	__u16	      request_ID;
+	__u32	      request_control_blk_length;
+	__u8	      _padding1[16 - sizeof(__u8 *)];
+	__u8 __user  *request_control_blk_addr;
+	__u32	      request_data_length;
+	__u8	      _padding2[16 - sizeof(__u8 *)];
+	__u8 __user  *request_data_address;
+	__u32	      reply_control_blk_length;
+	__u8	      _padding3[16 - sizeof(__u8 *)];
+	__u8 __user  *reply_control_blk_addr;
+	__u32	      reply_data_length;
+	__u8	      __padding4[16 - sizeof(__u8 *)];
+	__u8 __user  *reply_data_addr;
+	__u16	      priority_window;
+	__u32	      status;
 } __attribute__((packed));
 
 /**
@@ -161,17 +161,17 @@
  * @payload_len:	Payload length
  */
 struct ep11_cprb {
-	__u16		cprb_len;
-	unsigned char	cprb_ver_id;
-	unsigned char	pad_000[2];
-	unsigned char	flags;
-	unsigned char	func_id[2];
-	__u32		source_id;
-	__u32		target_id;
-	__u32		ret_code;
-	__u32		reserved1;
-	__u32		reserved2;
-	__u32		payload_len;
+	__u16	cprb_len;
+	__u8	cprb_ver_id;
+	__u8	pad_000[2];
+	__u8	flags;
+	__u8	func_id[2];
+	__u32	source_id;
+	__u32	target_id;
+	__u32	ret_code;
+	__u32	reserved1;
+	__u32	reserved2;
+	__u32	payload_len;
 } __attribute__((packed));
 
 /**
@@ -197,13 +197,13 @@
  */
 struct ep11_urb {
 	__u16		targets_num;
-	__u64		targets;
+	__u8 __user    *targets;
 	__u64		weight;
 	__u64		req_no;
 	__u64		req_len;
-	__u64		req;
+	__u8 __user    *req;
 	__u64		resp_len;
-	__u64		resp;
+	__u8 __user    *resp;
 } __attribute__((packed));
 
 /**
@@ -237,7 +237,9 @@
 	struct zcrypt_device_status_ext device[MAX_ZDEV_ENTRIES_EXT];
 };
 
-#define AUTOSELECT 0xFFFFFFFF
+#define AUTOSELECT  0xFFFFFFFF
+#define AUTOSEL_AP  ((__u16) 0xFFFF)
+#define AUTOSEL_DOM ((__u16) 0xFFFF)
 
 #define ZCRYPT_IOCTL_MAGIC 'z'
 
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
index c5f676c..bbb90f9 100644
--- a/arch/s390/kernel/.gitignore
+++ b/arch/s390/kernel/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7edbbcd..dd73b7f 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -33,11 +33,6 @@
 CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
 CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
 
-#
-# Pass UTS_MACHINE for user_regset definition
-#
-CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o
@@ -54,7 +49,7 @@
 
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o
-obj-$(CONFIG_HIBERNATION)	+= suspend.o swsusp.o
+obj-$(CONFIG_NUMA)		+= numa.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
 obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o
@@ -62,6 +57,7 @@
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_KPROBES)		+= kprobes_insn_page.o
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o
@@ -70,7 +66,7 @@
 obj-$(CONFIG_KEXEC_FILE)	+= machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_KEXEC_FILE)	+= kexec_elf.o
 
-obj-$(CONFIG_IMA)		+= ima_arch.o
+obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
 
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_cpum_cf_common.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf.o perf_cpum_sf.o
@@ -78,7 +74,7 @@
 obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_diag.o
 
 obj-$(CONFIG_TRACEPOINTS)	+= trace.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
 
 # vdso
 obj-y				+= vdso64/
-obj-$(CONFIG_COMPAT_VDSO)	+= vdso32/
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index a65cb49..483051e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -11,9 +11,9 @@
 #include <linux/kvm_host.h>
 #include <linux/sched.h>
 #include <linux/purgatory.h>
+#include <linux/pgtable.h>
 #include <asm/idle.h>
 #include <asm/vdso.h>
-#include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include <asm/nmi.h>
 #include <asm/stacktrace.h>
@@ -53,42 +53,13 @@
 	/* stack_frame offsets */
 	OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
 	OFFSET(__SF_GPRS, stack_frame, gprs);
-	OFFSET(__SF_EMPTY, stack_frame, empty1);
-	OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]);
-	OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]);
-	OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]);
-	OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]);
+	OFFSET(__SF_EMPTY, stack_frame, empty1[0]);
+	OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]);
+	OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]);
+	OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]);
+	OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]);
 	BLANK();
-	/* timeval/timezone offsets for use by vdso */
-	OFFSET(__VDSO_UPD_COUNT, vdso_data, tb_update_count);
-	OFFSET(__VDSO_XTIME_STAMP, vdso_data, xtime_tod_stamp);
-	OFFSET(__VDSO_XTIME_SEC, vdso_data, xtime_clock_sec);
-	OFFSET(__VDSO_XTIME_NSEC, vdso_data, xtime_clock_nsec);
-	OFFSET(__VDSO_XTIME_CRS_SEC, vdso_data, xtime_coarse_sec);
-	OFFSET(__VDSO_XTIME_CRS_NSEC, vdso_data, xtime_coarse_nsec);
-	OFFSET(__VDSO_WTOM_SEC, vdso_data, wtom_clock_sec);
-	OFFSET(__VDSO_WTOM_NSEC, vdso_data, wtom_clock_nsec);
-	OFFSET(__VDSO_WTOM_CRS_SEC, vdso_data, wtom_coarse_sec);
-	OFFSET(__VDSO_WTOM_CRS_NSEC, vdso_data, wtom_coarse_nsec);
-	OFFSET(__VDSO_TIMEZONE, vdso_data, tz_minuteswest);
-	OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
-	OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
-	OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
-	OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
-	OFFSET(__VDSO_TS_END, vdso_data, ts_end);
-	OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
-	OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
-	OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
-	OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
-	OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id);
-	BLANK();
-	/* constants used by the vdso */
-	DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
-	DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
-	DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
-	DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
-	DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
-	DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
+	OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
 	BLANK();
 	/* idle data offsets */
 	OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f96a585..205b2e2 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -141,7 +141,7 @@
 	while (count) {
 		from = __pa(src);
 		if (!OLDMEM_BASE && from < sclp.hsa_size) {
-			/* Copy from zfcpdump HSA area */
+			/* Copy from zfcp/nvme dump HSA area */
 			len = min(count, sclp.hsa_size - from);
 			rc = memcpy_hsa_kernel(dst, from, len);
 			if (rc)
@@ -184,7 +184,7 @@
 	while (count) {
 		from = __pa(src);
 		if (!OLDMEM_BASE && from < sclp.hsa_size) {
-			/* Copy from zfcpdump HSA area */
+			/* Copy from zfcp/nvme dump HSA area */
 			len = min(count, sclp.hsa_size - from);
 			rc = memcpy_hsa_user(dst, from, len);
 			if (rc)
@@ -258,7 +258,7 @@
 }
 
 /*
- * Remap "oldmem" for zfcpdump
+ * Remap "oldmem" for zfcp/nvme dump
  *
  * We only map available memory above HSA size. Memory below HSA size
  * is read on demand using the copy_oldmem_page() function.
@@ -283,7 +283,7 @@
 }
 
 /*
- * Remap "oldmem" for kdump or zfcpdump
+ * Remap "oldmem" for kdump or zfcp/nvme dump
  */
 int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
 			   unsigned long pfn, unsigned long size, pgprot_t prot)
@@ -549,8 +549,7 @@
 	int cnt = 0;
 	u64 idx;
 
-	for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
-			   MEMBLOCK_NONE, NULL, NULL, NULL)
+	for_each_physmem_range(idx, &oldmem_type, NULL, NULL)
 		cnt++;
 	return cnt;
 }
@@ -563,8 +562,7 @@
 	phys_addr_t start, end;
 	u64 idx;
 
-	for_each_mem_range(idx, &memblock.physmem, &oldmem_type, NUMA_NO_NODE,
-			   MEMBLOCK_NONE, &start, &end, NULL) {
+	for_each_physmem_range(idx, &oldmem_type, &start, &end) {
 		phdr->p_filesz = end - start;
 		phdr->p_type = PT_LOAD;
 		phdr->p_offset = start;
@@ -634,11 +632,11 @@
 	u32 alloc_size;
 	u64 hdr_off;
 
-	/* If we are not in kdump or zfcpdump mode return */
-	if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+	/* If we are not in kdump or zfcp/nvme dump mode return */
+	if (!OLDMEM_BASE && !is_ipl_type_dump())
 		return 0;
-	/* If we cannot get HSA size for zfcpdump return error */
-	if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size)
+	/* If we cannot get HSA size for zfcp/nvme dump return error */
+	if (is_ipl_type_dump() && !sclp.hsa_size)
 		return -ENODEV;
 
 	/* For kdump, exclude previous crashkernel memory */
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index b1aadc3..89fbfb3 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -2,7 +2,7 @@
 /*
  *   S/390 debug facility
  *
- *    Copyright IBM Corp. 1999, 2012
+ *    Copyright IBM Corp. 1999, 2020
  *
  *    Author(s): Michael Holzheu (holzheu@de.ibm.com),
  *		 Holger Smolinski (Holger.Smolinski@de.ibm.com)
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/minmax.h>
 #include <linux/debugfs.h>
 
 #include <asm/debug.h>
@@ -90,27 +91,13 @@
 				size_t user_buf_size, loff_t *offset);
 static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
 				     char *out_buf, const char *in_buf);
-static int debug_raw_format_fn(debug_info_t *id,
-			       struct debug_view *view, char *out_buf,
-			       const char *in_buf);
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
-			       int area, debug_entry_t *entry, char *out_buf);
-
 static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
 				   char *out_buf, debug_sprintf_entry_t *curr_event);
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
+static void debug_events_append(debug_info_t *dest, debug_info_t *src);
 
 /* globals */
 
-struct debug_view debug_raw_view = {
-	"raw",
-	NULL,
-	&debug_raw_header_fn,
-	&debug_raw_format_fn,
-	NULL,
-	NULL
-};
-EXPORT_SYMBOL(debug_raw_view);
-
 struct debug_view debug_hex_ascii_view = {
 	"hex_ascii",
 	NULL,
@@ -414,7 +401,7 @@
 	act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
 				       [p_info->act_page] + p_info->act_entry);
 
-	if (act_entry->id.stck == 0LL)
+	if (act_entry->clock == 0LL)
 		goto out; /* empty entry */
 	if (view->header_proc)
 		len += view->header_proc(id_snap, view, p_info->act_area,
@@ -752,35 +739,28 @@
  */
 static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area)
 {
-	debug_entry_t ***new_areas;
+	debug_info_t *new_id;
 	unsigned long flags;
-	int rc = 0;
 
 	if (!id || (nr_areas <= 0) || (pages_per_area < 0))
 		return -EINVAL;
-	if (pages_per_area > 0) {
-		new_areas = debug_areas_alloc(pages_per_area, nr_areas);
-		if (!new_areas) {
-			pr_info("Allocating memory for %i pages failed\n",
-				pages_per_area);
-			rc = -ENOMEM;
-			goto out;
-		}
-	} else {
-		new_areas = NULL;
+
+	new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+				  id->level, ALL_AREAS);
+	if (!new_id) {
+		pr_info("Allocating memory for %i pages failed\n",
+			pages_per_area);
+		return -ENOMEM;
 	}
+
 	spin_lock_irqsave(&id->lock, flags);
-	debug_areas_free(id);
-	id->areas = new_areas;
-	id->nr_areas = nr_areas;
-	id->pages_per_area = pages_per_area;
-	id->active_area = 0;
-	memset(id->active_entries, 0, sizeof(int)*id->nr_areas);
-	memset(id->active_pages, 0, sizeof(int)*id->nr_areas);
+	debug_events_append(new_id, id);
+	debug_areas_swap(new_id, id);
+	debug_info_free(new_id);
 	spin_unlock_irqrestore(&id->lock, flags);
 	pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area);
-out:
-	return rc;
+
+	return 0;
 }
 
 /**
@@ -847,6 +827,42 @@
 				  id->active_entries[id->active_area]);
 }
 
+/* Swap debug areas of a and b. */
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b)
+{
+	swap(a->nr_areas, b->nr_areas);
+	swap(a->pages_per_area, b->pages_per_area);
+	swap(a->areas, b->areas);
+	swap(a->active_area, b->active_area);
+	swap(a->active_pages, b->active_pages);
+	swap(a->active_entries, b->active_entries);
+}
+
+/* Append all debug events in active area from source to destination log. */
+static void debug_events_append(debug_info_t *dest, debug_info_t *src)
+{
+	debug_entry_t *from, *to, *last;
+
+	if (!src->areas || !dest->areas)
+		return;
+
+	/* Loop over all entries in src, starting with oldest. */
+	from = get_active_entry(src);
+	last = from;
+	do {
+		if (from->clock != 0LL) {
+			to = get_active_entry(dest);
+			memset(to, 0, dest->entry_size);
+			memcpy(to, from, min(src->entry_size,
+					     dest->entry_size));
+			proceed_active_entry(dest);
+		}
+
+		proceed_active_entry(src);
+		from = get_active_entry(src);
+	} while (from != last);
+}
+
 /*
  * debug_finish_entry:
  * - set timestamp, caller address, cpu number etc.
@@ -855,12 +871,17 @@
 static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
 				      int level, int exception)
 {
-	active->id.stck = get_tod_clock_fast() -
-		*(unsigned long long *) &tod_clock_base[1];
-	active->id.fields.cpuid = smp_processor_id();
+	unsigned char clk[STORE_CLOCK_EXT_SIZE];
+	unsigned long timestamp;
+
+	get_tod_clock_ext(clk);
+	timestamp = *(unsigned long *) &clk[0] >> 4;
+	timestamp -= TOD_UNIX_EPOCH >> 12;
+	active->clock = timestamp;
+	active->cpu = smp_processor_id();
 	active->caller = __builtin_return_address(0);
-	active->id.fields.exception = exception;
-	active->id.fields.level = level;
+	active->exception = exception;
+	active->level = level;
 	proceed_active_entry(id);
 	if (exception)
 		proceed_active_area(id);
@@ -878,7 +899,7 @@
  * if debug_active is already off
  */
 static int s390dbf_procactive(struct ctl_table *table, int write,
-			      void __user *buffer, size_t *lenp, loff_t *ppos)
+			      void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!write || debug_stoppable || !debug_active)
 		return proc_dointvec(table, write, buffer, lenp, ppos);
@@ -1396,32 +1417,6 @@
 }
 
 /*
- * prints debug header in raw format
- */
-static int debug_raw_header_fn(debug_info_t *id, struct debug_view *view,
-			       int area, debug_entry_t *entry, char *out_buf)
-{
-	int rc;
-
-	rc = sizeof(debug_entry_t);
-	memcpy(out_buf, entry, sizeof(debug_entry_t));
-	return rc;
-}
-
-/*
- * prints debug data in raw format
- */
-static int debug_raw_format_fn(debug_info_t *id, struct debug_view *view,
-			       char *out_buf, const char *in_buf)
-{
-	int rc;
-
-	rc = id->buf_size;
-	memcpy(out_buf, in_buf, id->buf_size);
-	return rc;
-}
-
-/*
  * prints debug data in hex/ascii format
  */
 static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
@@ -1450,25 +1445,24 @@
 int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
 			 int area, debug_entry_t *entry, char *out_buf)
 {
-	unsigned long base, sec, usec;
+	unsigned long sec, usec;
 	unsigned long caller;
 	unsigned int level;
 	char *except_str;
 	int rc = 0;
 
-	level = entry->id.fields.level;
-	base = (*(unsigned long *) &tod_clock_base[0]) >> 4;
-	sec = (entry->id.stck >> 12) + base - (TOD_UNIX_EPOCH >> 12);
+	level = entry->level;
+	sec = entry->clock;
 	usec = do_div(sec, USEC_PER_SEC);
 
-	if (entry->id.fields.exception)
+	if (entry->exception)
 		except_str = "*";
 	else
 		except_str = "-";
 	caller = (unsigned long) entry->caller;
-	rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK  ",
+	rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %pK  ",
 		      area, sec, usec, level, except_str,
-		      entry->id.fields.cpuid, (void *)caller);
+		      entry->cpu, (void *)caller);
 	return rc;
 }
 EXPORT_SYMBOL(debug_dflt_header_fn);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index ccba63a..b8b0cd7 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -104,18 +104,7 @@
 	.show	= show_diag_stat,
 };
 
-static int show_diag_stat_open(struct inode *inode, struct file *file)
-{
-	return seq_open(file, &show_diag_stat_sops);
-}
-
-static const struct file_operations show_diag_stat_fops = {
-	.open		= show_diag_stat_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release,
-};
-
+DEFINE_SEQ_ATTRIBUTE(show_diag_stat);
 
 static int __init show_diag_stat_init(void)
 {
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 03e35b3..5412efe 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -482,31 +482,37 @@
 	return (int) (ptr - buffer);
 }
 
+static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len)
+{
+	if (user_mode(regs)) {
+		if (copy_from_user(dst, (char __user *)src, len))
+			return -EFAULT;
+	} else {
+		if (copy_from_kernel_nofault(dst, src, len))
+			return -EFAULT;
+	}
+	return 0;
+}
+
 void show_code(struct pt_regs *regs)
 {
 	char *mode = user_mode(regs) ? "User" : "Krnl";
 	unsigned char code[64];
 	char buffer[128], *ptr;
-	mm_segment_t old_fs;
 	unsigned long addr;
 	int start, end, opsize, hops, i;
 
 	/* Get a snapshot of the 64 bytes surrounding the fault address. */
-	old_fs = get_fs();
-	set_fs(user_mode(regs) ? USER_DS : KERNEL_DS);
 	for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
 		addr = regs->psw.addr - 34 + start;
-		if (__copy_from_user(code + start - 2,
-				     (char __user *) addr, 2))
+		if (copy_from_regs(regs, code + start - 2, (void *)addr, 2))
 			break;
 	}
 	for (end = 32; end < 64; end += 2) {
 		addr = regs->psw.addr + end - 32;
-		if (__copy_from_user(code + end,
-				     (char __user *) addr, 2))
+		if (copy_from_regs(regs, code + end, (void *)addr, 2))
 			break;
 	}
-	set_fs(old_fs);
 	/* Code snapshot useable ? */
 	if ((regs->psw.addr & 1) || start >= end) {
 		printk("%s Code: Bad PSW.\n", mode);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 34bdc60..0dc4b25 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -38,6 +38,7 @@
 		return "unknown";
 	}
 }
+EXPORT_SYMBOL_GPL(stack_type_name);
 
 static inline bool in_stack(unsigned long sp, struct stack_info *info,
 			    enum stack_type type, unsigned long low,
@@ -93,7 +94,9 @@
 	if (!sp)
 		goto unknown;
 
-	task = task ? : current;
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto unknown;
 
 	/* Check per-task stack */
 	if (in_task_stack(sp, task, info))
@@ -123,17 +126,16 @@
 	return -EINVAL;
 }
 
-void show_stack(struct task_struct *task, unsigned long *stack)
+void show_stack(struct task_struct *task, unsigned long *stack,
+		       const char *loglvl)
 {
 	struct unwind_state state;
 
-	printk("Call Trace:\n");
-	if (!task)
-		task = current;
+	printk("%sCall Trace:\n", loglvl);
 	unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
-		printk(state.reliable ? " [<%016lx>] %pSR \n" :
-					"([<%016lx>] %pSR)\n",
-		       state.ip, (void *) state.ip);
+		printk(state.reliable ? "%s [<%016lx>] %pSR \n" :
+					"%s([<%016lx>] %pSR)\n",
+		       loglvl, state.ip, (void *) state.ip);
 	debug_show_held_locks(task ? : current);
 }
 
@@ -174,7 +176,7 @@
 	show_registers(regs);
 	/* Show stack backtrace if pt_regs is from kernel mode */
 	if (!user_mode(regs))
-		show_stack(NULL, (unsigned long *) regs->gprs[15]);
+		show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT);
 	show_last_breaking_event(regs);
 }
 
@@ -194,6 +196,8 @@
 	       regs->int_code >> 17, ++die_counter);
 #ifdef CONFIG_PREEMPT
 	pr_cont("PREEMPT ");
+#elif defined(CONFIG_PREEMPT_RT)
+	pr_cont("PREEMPT_RT ");
 #endif
 	pr_cont("SMP ");
 	if (debug_pagealloc_enabled())
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index eb89cb0..985e1e7 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
 #include <asm/sclp.h>
 #include <asm/facility.h>
 #include <asm/boot_data.h>
+#include <asm/switch_to.h>
 #include "entry.h"
 
 static void __init reset_tod_clock(void)
@@ -205,21 +206,6 @@
 		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
 }
 
-static __init void detect_diag44(void)
-{
-	int rc;
-
-	diag_stat_inc(DIAG_STAT_X044);
-	asm volatile(
-		"	diag	0,0,0x44\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
-	if (!rc)
-		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-}
-
 static __init void detect_machine_facilities(void)
 {
 	if (test_facility(8)) {
@@ -240,7 +226,7 @@
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
 		__ctl_set_bit(0, 17);
 	}
-	if (test_facility(130)) {
+	if (test_facility(130) && !noexec_disabled) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
 		__ctl_set_bit(0, 20);
 	}
@@ -266,6 +252,24 @@
 #endif
 }
 
+static inline void setup_control_registers(void)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, 0, 0);
+	reg |= CR0_LOW_ADDRESS_PROTECTION;
+	reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+	reg |= CR0_EXTERNAL_CALL_SUBMASK;
+	__ctl_load(reg, 0, 0);
+}
+
+static inline void setup_access_registers(void)
+{
+	unsigned int acrs[NUM_ACRS] = { 0 };
+
+	restore_access_regs(acrs);
+}
+
 static int __init disable_vector_extension(char *str)
 {
 	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -274,34 +278,6 @@
 }
 early_param("novx", disable_vector_extension);
 
-static int __init noexec_setup(char *str)
-{
-	bool enabled;
-	int rc;
-
-	rc = kstrtobool(str, &enabled);
-	if (!rc && !enabled) {
-		/* Disable no-execute support */
-		S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
-		__ctl_clear_bit(0, 20);
-	}
-	return rc;
-}
-early_param("noexec", noexec_setup);
-
-static int __init cad_setup(char *str)
-{
-	bool enabled;
-	int rc;
-
-	rc = kstrtobool(str, &enabled);
-	if (!rc && enabled && test_facility(128))
-		/* Enable problem state CAD. */
-		__ctl_set_bit(2, 3);
-	return rc;
-}
-early_param("cad", cad_setup);
-
 char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
 static void __init setup_boot_command_line(void)
 {
@@ -333,10 +309,11 @@
 	setup_arch_string();
 	setup_boot_command_line();
 	detect_diag9c();
-	detect_diag44();
 	detect_machine_facilities();
 	save_vector_registers();
 	setup_topology();
 	sclp_early_detect();
+	setup_control_registers();
+	setup_access_registers();
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index 6f24d83..d9d53f4 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -10,7 +10,7 @@
 
 static void sclp_early_write(struct console *con, const char *s, unsigned int len)
 {
-	__sclp_early_printk(s, len, 0);
+	__sclp_early_printk(s, len);
 }
 
 static struct console sclp_early_console = {
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 5cba181..963e8cb 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -55,14 +55,11 @@
 		   _TIF_UPROBE | _TIF_GUARDED_STORAGE | _TIF_PATCH_PENDING)
 _TIF_TRACE	= (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		   _TIF_SYSCALL_TRACEPOINT)
-_CIF_WORK	= (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
-		   _CIF_ASCE_SECONDARY | _CIF_FPU)
+_CIF_WORK	= (_CIF_ASCE_PRIMARY | _CIF_ASCE_SECONDARY | _CIF_FPU)
 _PIF_WORK	= (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART)
 
 _LPP_OFFSET	= __LC_LPP
 
-#define BASED(name) name-cleanup_critical(%r13)
-
 	.macro	TRACE_IRQS_ON
 #ifdef CONFIG_TRACE_IRQFLAGS
 	basr	%r2,%r0
@@ -113,31 +110,69 @@
 #endif
 	.endm
 
-	.macro	SWITCH_ASYNC savearea,timer
+	.macro	SWITCH_ASYNC savearea,timer,clock
 	tmhh	%r8,0x0001		# interrupting from user ?
-	jnz	2f
+	jnz	4f
+#if IS_ENABLED(CONFIG_KVM)
 	lgr	%r14,%r9
-	cghi	%r14,__LC_RETURN_LPSWE
-	je	0f
-	slg	%r14,BASED(.Lcritical_start)
-	clg	%r14,BASED(.Lcritical_length)
-	jhe	1f
-0:
+	larl	%r13,.Lsie_gmap
+	slgr	%r14,%r13
+	lghi	%r13,.Lsie_done - .Lsie_gmap
+	clgr	%r14,%r13
+	jhe	0f
 	lghi	%r11,\savearea		# inside critical section, do cleanup
-	brasl	%r14,cleanup_critical
-	tmhh	%r8,0x0001		# retest problem state after cleanup
-	jnz	2f
-1:	lg	%r14,__LC_ASYNC_STACK	# are we already on the target stack?
+	brasl	%r14,.Lcleanup_sie
+#endif
+0:	larl	%r13,.Lpsw_idle_exit
+	cgr	%r13,%r9
+	jne	3f
+
+	larl	%r1,smp_cpu_mtid
+	llgf	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	2f			# no SMT, skip mt_cycles calculation
+	.insn	rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
+	larl	%r3,mt_cycles
+	ag	%r3,__LC_PERCPU_OFFSET
+	la	%r4,__SF_EMPTY+16(%r15)
+1:	lg	%r0,0(%r3)
+	slg	%r0,0(%r4)
+	alg	%r0,64(%r4)
+	stg	%r0,0(%r3)
+	la	%r3,8(%r3)
+	la	%r4,8(%r4)
+	brct	%r1,1b
+
+2:	mvc	__CLOCK_IDLE_EXIT(8,%r2), \clock
+	mvc	__TIMER_IDLE_EXIT(8,%r2), \timer
+	# account system time going idle
+	ni	__LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
+
+	lg	%r13,__LC_STEAL_TIMER
+	alg	%r13,__CLOCK_IDLE_ENTER(%r2)
+	slg	%r13,__LC_LAST_UPDATE_CLOCK
+	stg	%r13,__LC_STEAL_TIMER
+
+	mvc	__LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
+
+	lg	%r13,__LC_SYSTEM_TIMER
+	alg	%r13,__LC_LAST_UPDATE_TIMER
+	slg	%r13,__TIMER_IDLE_ENTER(%r2)
+	stg	%r13,__LC_SYSTEM_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
+
+	nihh	%r8,0xfcfd		# clear wait state and irq bits
+3:	lg	%r14,__LC_ASYNC_STACK	# are we already on the target stack?
 	slgr	%r14,%r15
 	srag	%r14,%r14,STACK_SHIFT
-	jnz	3f
+	jnz	5f
 	CHECK_STACK \savearea
 	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
-	j	4f
-2:	UPDATE_VTIME %r14,%r15,\timer
+	j	6f
+4:	UPDATE_VTIME %r14,%r15,\timer
 	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-3:	lg	%r15,__LC_ASYNC_STACK	# load async stack
-4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+5:	lg	%r15,__LC_ASYNC_STACK	# load async stack
+6:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	.endm
 
 	.macro UPDATE_VTIME w1,w2,enter_timer
@@ -152,12 +187,30 @@
 	mvc	__LC_LAST_UPDATE_TIMER(8),\enter_timer
 	.endm
 
-	.macro REENABLE_IRQS
+	.macro RESTORE_SM_CLEAR_PER
 	stg	%r8,__LC_RETURN_PSW
 	ni	__LC_RETURN_PSW,0xbf
 	ssm	__LC_RETURN_PSW
 	.endm
 
+	.macro ENABLE_INTS
+	stosm	__SF_EMPTY(%r15),3
+	.endm
+
+	.macro ENABLE_INTS_TRACE
+	TRACE_IRQS_ON
+	ENABLE_INTS
+	.endm
+
+	.macro DISABLE_INTS
+	stnsm	__SF_EMPTY(%r15),0xfc
+	.endm
+
+	.macro DISABLE_INTS_TRACE
+	DISABLE_INTS
+	TRACE_IRQS_OFF
+	.endm
+
 	.macro STCK savearea
 #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
 	.insn	s,0xb27c0000,\savearea		# store clock fast
@@ -254,8 +307,6 @@
 	BR_EX	%r14
 ENDPROC(__switch_to)
 
-.L__critical_start:
-
 #if IS_ENABLED(CONFIG_KVM)
 /*
  * sie64a calling convention:
@@ -288,7 +339,6 @@
 	BPEXIT	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
 .Lsie_entry:
 	sie	0(%r14)
-.Lsie_exit:
 	BPOFF
 	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
 .Lsie_skip:
@@ -336,21 +386,19 @@
 
 /*
  * SVC interrupt handler routine. System calls are synchronous events and
- * are executed with interrupts enabled.
+ * are entered with interrupts disabled.
  */
 
 ENTRY(system_call)
 	stpt	__LC_SYNC_ENTER_TIMER
-.Lsysc_stmg:
 	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
 	BPOFF
 	lg	%r12,__LC_CURRENT
-	lghi	%r13,__TASK_thread
 	lghi	%r14,_PIF_SYSCALL
 .Lsysc_per:
+	lghi	%r13,__TASK_thread
 	lg	%r15,__LC_KERNEL_STACK
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
-.Lsysc_vtime:
 	UPDATE_VTIME %r8,%r9,__LC_SYNC_ENTER_TIMER
 	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -358,6 +406,8 @@
 	mvc	__PT_PSW(16,%r11),__LC_SVC_OLD_PSW
 	mvc	__PT_INT_CODE(4,%r11),__LC_SVC_ILC
 	stg	%r14,__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	ENABLE_INTS
 .Lsysc_do_svc:
 	# clear user controlled register to prevent speculative use
 	xgr	%r0,%r0
@@ -373,7 +423,6 @@
 	jnl	.Lsysc_nr_ok
 	slag	%r8,%r1,3
 .Lsysc_nr_ok:
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stg	%r2,__PT_ORIG_GPR2(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lg	%r9,0(%r8,%r10)			# get system call add.
@@ -389,30 +438,30 @@
 #endif
 	LOCKDEP_SYS_EXIT
 .Lsysc_tif:
+	DISABLE_INTS
 	TSTMSK	__PT_FLAGS(%r11),_PIF_WORK
 	jnz	.Lsysc_work
 	TSTMSK	__TI_flags(%r12),_TIF_WORK
 	jnz	.Lsysc_work			# check for work
-	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
+	TSTMSK	__LC_CPU_FLAGS,(_CIF_WORK-_CIF_FPU)
 	jnz	.Lsysc_work
 	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
 .Lsysc_restore:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
+	DISABLE_INTS
+	TSTMSK	__LC_CPU_FLAGS, _CIF_FPU
+	jz	.Lsysc_skip_fpu
+	brasl	%r14,load_fpu_regs
+.Lsysc_skip_fpu:
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
-.Lsysc_exit_timer:
 	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
-	lmg	%r11,%r15,__PT_R11(%r11)
-	b	__LC_RETURN_LPSWE(%r0)
-.Lsysc_done:
+	lmg	%r0,%r15,__PT_R0(%r11)
+	b	__LC_RETURN_LPSWE
 
 #
 # One of the work bits is on. Find out which one.
 #
 .Lsysc_work:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
-	jo	.Lsysc_mcck_pending
+	ENABLE_INTS
 	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jo	.Lsysc_reschedule
 	TSTMSK	__PT_FLAGS(%r11),_PIF_SYSCALL_RESTART
@@ -436,11 +485,9 @@
 	jo	.Lsysc_sigpending
 	TSTMSK	__TI_flags(%r12),_TIF_NOTIFY_RESUME
 	jo	.Lsysc_notify_resume
-	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
-	jo	.Lsysc_vxrs
 	TSTMSK	__LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
 	jnz	.Lsysc_asce
-	j	.Lsysc_return		# beware of critical section cleanup
+	j	.Lsysc_return
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
@@ -450,13 +497,6 @@
 	jg	schedule
 
 #
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lsysc_mcck_pending:
-	larl	%r14,.Lsysc_return
-	jg	s390_handle_mcck	# TIF bit will be cleared by handler
-
-#
 # _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce
 #
 .Lsysc_asce:
@@ -475,12 +515,6 @@
 	larl	%r14,.Lsysc_return
 	jg	set_fs_fixup
 
-#
-# CIF_FPU is set, restore floating-point controls and floating-point registers.
-#
-.Lsysc_vxrs:
-	larl	%r14,.Lsysc_return
-	jg	load_fpu_regs
 
 #
 # _TIF_SIGPENDING is set, call do_signal
@@ -564,7 +598,6 @@
 	jnh	.Lsysc_tracenogo
 	sllg	%r8,%r2,3
 	lg	%r9,0(%r8,%r10)
-.Lsysc_tracego:
 	lmg	%r3,%r7,__PT_R3(%r11)
 	stg	%r7,STACK_FRAME_OVERHEAD(%r15)
 	lg	%r2,__PT_ORIG_GPR2(%r11)
@@ -585,8 +618,6 @@
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	lg	%r12,__LC_CURRENT
 	brasl	%r14,schedule_tail
-	TRACE_IRQS_ON
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
 	jne	.Lsysc_tracenogo
 	# it's a kernel thread
@@ -620,15 +651,16 @@
 	lghi	%r10,1
 0:	lg	%r12,__LC_CURRENT
 	lghi	%r11,0
-	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_PGM_OLD_PSW
 	tmhh	%r8,0x0001		# test problem state bit
 	jnz	3f			# -> fault in user space
 #if IS_ENABLED(CONFIG_KVM)
 	# cleanup critical section for program checks in sie64a
 	lgr	%r14,%r9
-	slg	%r14,BASED(.Lsie_critical_start)
-	clg	%r14,BASED(.Lsie_critical_length)
+	larl	%r13,.Lsie_gmap
+	slgr	%r14,%r13
+	lghi	%r13,.Lsie_done - .Lsie_gmap
+	clgr	%r14,%r13
 	jhe	1f
 	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
@@ -680,8 +712,8 @@
 	mvc	__THREAD_per_address(8,%r14),__LC_PER_ADDRESS
 	mvc	__THREAD_per_cause(2,%r14),__LC_PER_CODE
 	mvc	__THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-6:	REENABLE_IRQS
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+6:	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	RESTORE_SM_CLEAR_PER
 	larl	%r1,pgm_check_table
 	llgh	%r10,__PT_INT_CODE+2(%r11)
 	nill	%r10,0x007f
@@ -702,8 +734,8 @@
 # PER event in supervisor state, must be kprobes
 #
 .Lpgm_kprobe:
-	REENABLE_IRQS
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	RESTORE_SM_CLEAR_PER
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_per_trap
 	j	.Lpgm_return
@@ -713,11 +745,10 @@
 #
 .Lpgm_svcper:
 	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
-	lghi	%r13,__TASK_thread
 	larl	%r14,.Lsysc_per
 	stg	%r14,__LC_RETURN_PSW+8
 	lghi	%r14,_PIF_SYSCALL | _PIF_PER_TRAP
-	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per and enable irqs
+	lpswe	__LC_RETURN_PSW		# branch to .Lsysc_per
 ENDPROC(pgm_check_handler)
 
 /*
@@ -729,9 +760,8 @@
 	BPOFF
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r12,__LC_CURRENT
-	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_IO_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK
 	stmg	%r0,%r7,__PT_R0(%r11)
 	# clear user controlled registers to prevent speculative use
 	xgr	%r0,%r0
@@ -747,10 +777,10 @@
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	mvc	__PT_INT_CODE(12,%r11),__LC_SUBCHANNEL_ID
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
 	jo	.Lio_restore
 	TRACE_IRQS_OFF
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 .Lio_loop:
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lghi	%r3,IO_INTERRUPT
@@ -767,25 +797,20 @@
 	j	.Lio_loop
 .Lio_return:
 	LOCKDEP_SYS_EXIT
-	TRACE_IRQS_ON
-.Lio_tif:
 	TSTMSK	__TI_flags(%r12),_TIF_WORK
 	jnz	.Lio_work		# there is work to do (signals etc.)
 	TSTMSK	__LC_CPU_FLAGS,_CIF_WORK
 	jnz	.Lio_work
 .Lio_restore:
-	lg	%r14,__LC_VDSO_PER_CPU
-	lmg	%r0,%r10,__PT_R0(%r11)
+	TRACE_IRQS_ON
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
 	jno	.Lio_exit_kernel
 	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
-.Lio_exit_timer:
 	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 .Lio_exit_kernel:
-	lmg	%r11,%r15,__PT_R11(%r11)
-	b	__LC_RETURN_LPSWE(%r0)
+	lmg	%r0,%r15,__PT_R0(%r11)
+	b	__LC_RETURN_LPSWE
 .Lio_done:
 
 #
@@ -800,7 +825,7 @@
 .Lio_work:
 	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
 	jo	.Lio_work_user		# yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	# check for preemptive scheduling
 	icm	%r0,15,__LC_PREEMPT_COUNT
 	jnz	.Lio_restore		# preemption is disabled
@@ -813,9 +838,6 @@
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
 	lgr	%r15,%r1
-	# TRACE_IRQS_ON already done at .Lio_return, call
-	# TRACE_IRQS_OFF to keep things symmetrical
-	TRACE_IRQS_OFF
 	brasl	%r14,preempt_schedule_irq
 	j	.Lio_return
 #else
@@ -835,9 +857,6 @@
 #
 # One of the work bits is on. Find out which one.
 #
-.Lio_work_tif:
-	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
-	jo	.Lio_mcck_pending
 	TSTMSK	__TI_flags(%r12),_TIF_NEED_RESCHED
 	jo	.Lio_reschedule
 #ifdef CONFIG_LIVEPATCH
@@ -854,15 +873,6 @@
 	jo	.Lio_vxrs
 	TSTMSK	__LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
 	jnz	.Lio_asce
-	j	.Lio_return		# beware of critical section cleanup
-
-#
-# _CIF_MCCK_PENDING is set, call handler
-#
-.Lio_mcck_pending:
-	# TRACE_IRQS_ON already done at .Lio_return
-	brasl	%r14,s390_handle_mcck	# TIF bit will be cleared by handler
-	TRACE_IRQS_OFF
 	j	.Lio_return
 
 #
@@ -895,23 +905,19 @@
 # _TIF_GUARDED_STORAGE is set, call guarded_storage_load
 #
 .Lio_guarded_storage:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	ENABLE_INTS_TRACE
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,gs_load_bc_cb
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
+	DISABLE_INTS_TRACE
 	j	.Lio_return
 
 #
 # _TIF_NEED_RESCHED is set, call schedule
 #
 .Lio_reschedule:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	ENABLE_INTS_TRACE
 	brasl	%r14,schedule		# call scheduler
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
+	DISABLE_INTS_TRACE
 	j	.Lio_return
 
 #
@@ -928,24 +934,20 @@
 # _TIF_SIGPENDING or is set, call do_signal
 #
 .Lio_sigpending:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	ENABLE_INTS_TRACE
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_signal
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
+	DISABLE_INTS_TRACE
 	j	.Lio_return
 
 #
 # _TIF_NOTIFY_RESUME or is set, call do_notify_resume
 #
 .Lio_notify_resume:
-	# TRACE_IRQS_ON already done at .Lio_return
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
+	ENABLE_INTS_TRACE
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,do_notify_resume
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	TRACE_IRQS_OFF
+	DISABLE_INTS_TRACE
 	j	.Lio_return
 ENDPROC(io_int_handler)
 
@@ -958,9 +960,8 @@
 	BPOFF
 	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
 	lg	%r12,__LC_CURRENT
-	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_EXT_OLD_PSW
-	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
+	SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER,__LC_INT_CLOCK
 	stmg	%r0,%r7,__PT_R0(%r11)
 	# clear user controlled registers to prevent speculative use
 	xgr	%r0,%r0
@@ -979,10 +980,10 @@
 	mvc	__PT_INT_PARM(4,%r11),__LC_EXT_PARAMS
 	mvc	__PT_INT_PARM_LONG(8,%r11),0(%r1)
 	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	TSTMSK	__LC_CPU_FLAGS,_CIF_IGNORE_IRQ
 	jo	.Lio_restore
 	TRACE_IRQS_OFF
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	lghi	%r3,EXT_INTERRUPT
 	brasl	%r14,do_IRQ
@@ -990,12 +991,12 @@
 ENDPROC(ext_int_handler)
 
 /*
- * Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
+ * Load idle PSW.
  */
 ENTRY(psw_idle)
 	stg	%r14,(__SF_GPRS+8*8)(%r15)
 	stg	%r3,__SF_EMPTY(%r15)
-	larl	%r1,.Lpsw_idle_lpsw+4
+	larl	%r1,.Lpsw_idle_exit
 	stg	%r1,__SF_EMPTY+8(%r15)
 	larl	%r1,smp_cpu_mtid
 	llgf	%r1,0(%r1)
@@ -1007,10 +1008,9 @@
 	BPON
 	STCK	__CLOCK_IDLE_ENTER(%r2)
 	stpt	__TIMER_IDLE_ENTER(%r2)
-.Lpsw_idle_lpsw:
 	lpswe	__SF_EMPTY(%r15)
+.Lpsw_idle_exit:
 	BR_EX	%r14
-.Lpsw_idle_end:
 ENDPROC(psw_idle)
 
 /*
@@ -1021,6 +1021,7 @@
  * of the register contents at return from io or a system call.
  */
 ENTRY(save_fpu_regs)
+	stnsm	__SF_EMPTY(%r15),0xfc
 	lg	%r2,__LC_CURRENT
 	aghi	%r2,__TASK_thread
 	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
@@ -1052,6 +1053,7 @@
 .Lsave_fpu_regs_done:
 	oi	__LC_CPU_FLAGS+7,_CIF_FPU
 .Lsave_fpu_regs_exit:
+	ssm	__SF_EMPTY(%r15)
 	BR_EX	%r14
 .Lsave_fpu_regs_end:
 ENDPROC(save_fpu_regs)
@@ -1068,6 +1070,7 @@
  *	%r4
  */
 load_fpu_regs:
+	stnsm	__SF_EMPTY(%r15),0xfc
 	lg	%r4,__LC_CURRENT
 	aghi	%r4,__TASK_thread
 	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
@@ -1099,12 +1102,11 @@
 .Lload_fpu_regs_done:
 	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
 .Lload_fpu_regs_exit:
+	ssm	__SF_EMPTY(%r15)
 	BR_EX	%r14
 .Lload_fpu_regs_end:
 ENDPROC(load_fpu_regs)
 
-.L__critical_end:
-
 /*
  * Machine check handler routines
  */
@@ -1117,7 +1119,6 @@
 	lam	%a0,%a15,__LC_AREGS_SAVE_AREA-4095(%r1) # validate acrs
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
 	lg	%r12,__LC_CURRENT
-	larl	%r13,cleanup_critical
 	lmg	%r8,%r9,__LC_MCK_OLD_PSW
 	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
 	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
@@ -1183,7 +1184,7 @@
 	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
 	jno	.Lmcck_panic
 4:	ssm	__LC_PGM_NEW_PSW	# turn dat on, keep irqs off
-	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
+	SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER,__LC_MCCK_CLOCK
 .Lmcck_skip:
 	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
 	stmg	%r0,%r7,__PT_R0(%r11)
@@ -1203,27 +1204,23 @@
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	lgr	%r2,%r11		# pass pointer to pt_regs
 	brasl	%r14,s390_do_machine_check
-	tm	__PT_PSW+1(%r11),0x01	# returning to user ?
-	jno	.Lmcck_return
+	cghi	%r2,0
+	je	.Lmcck_return
 	lg	%r1,__LC_KERNEL_STACK	# switch to kernel stack
 	mvc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r1),0(%r11)
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
 	la	%r11,STACK_FRAME_OVERHEAD(%r1)
 	lgr	%r15,%r1
-	TSTMSK	__LC_CPU_FLAGS,_CIF_MCCK_PENDING
-	jno	.Lmcck_return
 	TRACE_IRQS_OFF
 	brasl	%r14,s390_handle_mcck
 	TRACE_IRQS_ON
 .Lmcck_return:
-	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	jno	0f
 	BPEXIT	__TI_flags(%r12),_TIF_ISOLATE_BP
 	stpt	__LC_EXIT_TIMER
-	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 0:	lmg	%r11,%r15,__PT_R11(%r11)
 	b	__LC_RETURN_MCCK_LPSWE
 
@@ -1281,265 +1278,23 @@
 ENDPROC(stack_overflow)
 #endif
 
-ENTRY(cleanup_critical)
-	cghi	%r9,__LC_RETURN_LPSWE
-	je	.Lcleanup_lpswe
 #if IS_ENABLED(CONFIG_KVM)
-	clg	%r9,BASED(.Lcleanup_table_sie)	# .Lsie_gmap
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table_sie+8)# .Lsie_done
-	jl	.Lcleanup_sie
-#endif
-	clg	%r9,BASED(.Lcleanup_table)	# system_call
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+8)	# .Lsysc_do_svc
-	jl	.Lcleanup_system_call
-	clg	%r9,BASED(.Lcleanup_table+16)	# .Lsysc_tif
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+24)	# .Lsysc_restore
-	jl	.Lcleanup_sysc_tif
-	clg	%r9,BASED(.Lcleanup_table+32)	# .Lsysc_done
-	jl	.Lcleanup_sysc_restore
-	clg	%r9,BASED(.Lcleanup_table+40)	# .Lio_tif
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+48)	# .Lio_restore
-	jl	.Lcleanup_io_tif
-	clg	%r9,BASED(.Lcleanup_table+56)	# .Lio_done
-	jl	.Lcleanup_io_restore
-	clg	%r9,BASED(.Lcleanup_table+64)	# psw_idle
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+72)	# .Lpsw_idle_end
-	jl	.Lcleanup_idle
-	clg	%r9,BASED(.Lcleanup_table+80)	# save_fpu_regs
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+88)	# .Lsave_fpu_regs_end
-	jl	.Lcleanup_save_fpu_regs
-	clg	%r9,BASED(.Lcleanup_table+96)	# load_fpu_regs
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
-	jl	.Lcleanup_load_fpu_regs
-0:	BR_EX	%r14,%r11
-ENDPROC(cleanup_critical)
-
-	.align	8
-.Lcleanup_table:
-	.quad	system_call
-	.quad	.Lsysc_do_svc
-	.quad	.Lsysc_tif
-	.quad	.Lsysc_restore
-	.quad	.Lsysc_done
-	.quad	.Lio_tif
-	.quad	.Lio_restore
-	.quad	.Lio_done
-	.quad	psw_idle
-	.quad	.Lpsw_idle_end
-	.quad	save_fpu_regs
-	.quad	.Lsave_fpu_regs_end
-	.quad	load_fpu_regs
-	.quad	.Lload_fpu_regs_end
-
-#if IS_ENABLED(CONFIG_KVM)
-.Lcleanup_table_sie:
-	.quad	.Lsie_gmap
-	.quad	.Lsie_done
-
 .Lcleanup_sie:
-	cghi    %r11,__LC_SAVE_AREA_ASYNC 	#Is this in normal interrupt?
-	je      1f
-	slg     %r9,BASED(.Lsie_crit_mcck_start)
-	clg     %r9,BASED(.Lsie_crit_mcck_length)
-	jh      1f
-	oi      __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-1:	BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
+	cghi	%r11,__LC_SAVE_AREA_ASYNC	#Is this in normal interrupt?
+	je	1f
+	larl	%r13,.Lsie_entry
+	slgr	%r9,%r13
+	lghi	%r13,.Lsie_skip - .Lsie_entry
+	clgr	%r9,%r13
+	jh	1f
+	oi	__LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+1:	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
 	lg	%r9,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_USER_ASCE		# load primary asce
 	larl	%r9,sie_exit			# skip forward to sie_exit
 	BR_EX	%r14,%r11
-#endif
 
-.Lcleanup_system_call:
-	# check if stpt has been executed
-	clg	%r9,BASED(.Lcleanup_system_call_insn)
-	jh	0f
-	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	# check if stmg has been executed
-	clg	%r9,BASED(.Lcleanup_system_call_insn+8)
-	jh	0f
-	mvc	__LC_SAVE_AREA_SYNC(64),0(%r11)
-0:	# check if base register setup + TIF bit load has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+16)
-	jhe	0f
-	# set up saved register r12 task struct pointer
-	stg	%r12,32(%r11)
-	# set up saved register r13 __TASK_thread offset
-	mvc	40(8,%r11),BASED(.Lcleanup_system_call_const)
-0:	# check if the user time update has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+24)
-	jh	0f
-	lg	%r15,__LC_EXIT_TIMER
-	slg	%r15,__LC_SYNC_ENTER_TIMER
-	alg	%r15,__LC_USER_TIMER
-	stg	%r15,__LC_USER_TIMER
-0:	# check if the system time update has been done
-	clg	%r9,BASED(.Lcleanup_system_call_insn+32)
-	jh	0f
-	lg	%r15,__LC_LAST_UPDATE_TIMER
-	slg	%r15,__LC_EXIT_TIMER
-	alg	%r15,__LC_SYSTEM_TIMER
-	stg	%r15,__LC_SYSTEM_TIMER
-0:	# update accounting time stamp
-	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-	# set up saved register r11
-	lg	%r15,__LC_KERNEL_STACK
-	la	%r9,STACK_FRAME_OVERHEAD(%r15)
-	stg	%r9,24(%r11)		# r11 pt_regs pointer
-	# fill pt_regs
-	mvc	__PT_R8(64,%r9),__LC_SAVE_AREA_SYNC
-	stmg	%r0,%r7,__PT_R0(%r9)
-	mvc	__PT_PSW(16,%r9),__LC_SVC_OLD_PSW
-	mvc	__PT_INT_CODE(4,%r9),__LC_SVC_ILC
-	xc	__PT_FLAGS(8,%r9),__PT_FLAGS(%r9)
-	mvi	__PT_FLAGS+7(%r9),_PIF_SYSCALL
-	# setup saved register r15
-	stg	%r15,56(%r11)		# r15 stack pointer
-	# set new psw address and exit
-	larl	%r9,.Lsysc_do_svc
-	BR_EX	%r14,%r11
-.Lcleanup_system_call_insn:
-	.quad	system_call
-	.quad	.Lsysc_stmg
-	.quad	.Lsysc_per
-	.quad	.Lsysc_vtime+36
-	.quad	.Lsysc_vtime+42
-.Lcleanup_system_call_const:
-	.quad	__TASK_thread
-
-.Lcleanup_sysc_tif:
-	larl	%r9,.Lsysc_tif
-	BR_EX	%r14,%r11
-
-.Lcleanup_sysc_restore:
-	# check if stpt has been executed
-	clg	%r9,BASED(.Lcleanup_sysc_restore_insn)
-	jh	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	clg	%r9,BASED(.Lcleanup_sysc_restore_insn+8)
-	je	1f
-	lg	%r9,24(%r11)		# get saved pointer to pt_regs
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
-	mvc	0(64,%r11),__PT_R8(%r9)
-	lmg	%r0,%r7,__PT_R0(%r9)
-.Lcleanup_lpswe:
-1:	lmg	%r8,%r9,__LC_RETURN_PSW
-	BR_EX	%r14,%r11
-.Lcleanup_sysc_restore_insn:
-	.quad	.Lsysc_exit_timer
-	.quad	.Lsysc_done - 4
-
-.Lcleanup_io_tif:
-	larl	%r9,.Lio_tif
-	BR_EX	%r14,%r11
-
-.Lcleanup_io_restore:
-	# check if stpt has been executed
-	clg	%r9,BASED(.Lcleanup_io_restore_insn)
-	jh	0f
-	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
-0:	clg	%r9,BASED(.Lcleanup_io_restore_insn+8)
-	je	1f
-	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
-	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
-	mvc	0(64,%r11),__PT_R8(%r9)
-	lmg	%r0,%r7,__PT_R0(%r9)
-1:	lmg	%r8,%r9,__LC_RETURN_PSW
-	BR_EX	%r14,%r11
-.Lcleanup_io_restore_insn:
-	.quad	.Lio_exit_timer
-	.quad	.Lio_done - 4
-
-.Lcleanup_idle:
-	ni	__LC_CPU_FLAGS+7,255-_CIF_ENABLED_WAIT
-	# copy interrupt clock & cpu timer
-	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_INT_CLOCK
-	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_ASYNC_ENTER_TIMER
-	cghi	%r11,__LC_SAVE_AREA_ASYNC
-	je	0f
-	mvc	__CLOCK_IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
-	mvc	__TIMER_IDLE_EXIT(8,%r2),__LC_MCCK_ENTER_TIMER
-0:	# check if stck & stpt have been executed
-	clg	%r9,BASED(.Lcleanup_idle_insn)
-	jhe	1f
-	mvc	__CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
-	mvc	__TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
-1:	# calculate idle cycles
-	clg	%r9,BASED(.Lcleanup_idle_insn)
-	jl	3f
-	larl	%r1,smp_cpu_mtid
-	llgf	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	3f
-	.insn	rsy,0xeb0000000017,%r1,5,__SF_EMPTY+80(%r15)
-	larl	%r3,mt_cycles
-	ag	%r3,__LC_PERCPU_OFFSET
-	la	%r4,__SF_EMPTY+16(%r15)
-2:	lg	%r0,0(%r3)
-	slg	%r0,0(%r4)
-	alg	%r0,64(%r4)
-	stg	%r0,0(%r3)
-	la	%r3,8(%r3)
-	la	%r4,8(%r4)
-	brct	%r1,2b
-3:	# account system time going idle
-	lg	%r9,__LC_STEAL_TIMER
-	alg	%r9,__CLOCK_IDLE_ENTER(%r2)
-	slg	%r9,__LC_LAST_UPDATE_CLOCK
-	stg	%r9,__LC_STEAL_TIMER
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__CLOCK_IDLE_EXIT(%r2)
-	lg	%r9,__LC_SYSTEM_TIMER
-	alg	%r9,__LC_LAST_UPDATE_TIMER
-	slg	%r9,__TIMER_IDLE_ENTER(%r2)
-	stg	%r9,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2)
-	# prepare return psw
-	nihh	%r8,0xfcfd		# clear irq & wait state bits
-	lg	%r9,48(%r11)		# return from psw_idle
-	BR_EX	%r14,%r11
-.Lcleanup_idle_insn:
-	.quad	.Lpsw_idle_lpsw
-
-.Lcleanup_save_fpu_regs:
-	larl	%r9,save_fpu_regs
-	BR_EX	%r14,%r11
-
-.Lcleanup_load_fpu_regs:
-	larl	%r9,load_fpu_regs
-	BR_EX	%r14,%r11
-
-/*
- * Integer constants
- */
-	.align	8
-.Lcritical_start:
-	.quad	.L__critical_start
-.Lcritical_length:
-	.quad	.L__critical_end - .L__critical_start
-#if IS_ENABLED(CONFIG_KVM)
-.Lsie_critical_start:
-	.quad	.Lsie_gmap
-.Lsie_critical_length:
-	.quad	.Lsie_done - .Lsie_gmap
-.Lsie_crit_mcck_start:
-	.quad   .Lsie_entry
-.Lsie_crit_mcck_length:
-	.quad   .Lsie_skip - .Lsie_entry
 #endif
 	.section .rodata, "a"
 #define SYSCALL(esame,emu)	.quad __s390x_ ## esame
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index b2956d4..d2ca3fe 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -9,7 +9,6 @@
 #include <asm/idle.h>
 
 extern void *restart_stack;
-extern unsigned long suspend_zero_pages;
 
 void system_call(void);
 void pgm_check_handler(void);
@@ -17,13 +16,15 @@
 void io_int_handler(void);
 void mcck_int_handler(void);
 void restart_int_handler(void);
-void restart_call_handler(void);
 
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
 
 void do_protection_exception(struct pt_regs *regs);
 void do_dat_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
+void do_secure_storage_violation(struct pt_regs *regs);
 
 void addressing_exception(struct pt_regs *regs);
 void data_exception(struct pt_regs *regs);
@@ -45,6 +46,7 @@
 void transaction_exception(struct pt_regs *regs);
 void translation_exception(struct pt_regs *regs);
 void vector_exception(struct pt_regs *regs);
+void monitor_event_exception(struct pt_regs *regs);
 
 void do_per_trap(struct pt_regs *regs);
 void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
@@ -58,12 +60,10 @@
 void __init init_IRQ(void);
 void do_IRQ(struct pt_regs *regs, int irq);
 void do_restart(void);
-void __init startup_init_nobss(void);
 void __init startup_init(void);
 void die(struct pt_regs *regs, const char *str);
 int setup_profiling_timer(unsigned int multiplier);
 void __init time_init(void);
-void s390_early_resume(void);
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
 
 struct s390_mmap_arg_struct;
@@ -88,4 +88,6 @@
 unsigned long stack_alloc(void);
 void stack_free(unsigned long stack);
 
+extern char kprobes_insn_page[];
+
 #endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 7df6b26..923eccc 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -73,31 +73,6 @@
 #endif
 }
 
-static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
-	if (insn->opc == BREAKPOINT_INSTRUCTION)
-		return 1;
-#endif
-	return 0;
-}
-
-static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
-	insn->opc = BREAKPOINT_INSTRUCTION;
-	insn->disp = KPROBE_ON_FTRACE_NOP;
-#endif
-}
-
-static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
-	insn->opc = BREAKPOINT_INSTRUCTION;
-	insn->disp = KPROBE_ON_FTRACE_CALL;
-#endif
-}
-
 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 		       unsigned long addr)
 {
@@ -109,22 +84,12 @@
 {
 	struct ftrace_insn orig, new, old;
 
-	if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+	if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
 		return -EFAULT;
 	if (addr == MCOUNT_ADDR) {
 		/* Initial code replacement */
 		ftrace_generate_orig_insn(&orig);
 		ftrace_generate_nop_insn(&new);
-	} else if (is_kprobe_on_ftrace(&old)) {
-		/*
-		 * If we find a breakpoint instruction, a kprobe has been
-		 * placed at the beginning of the function. We write the
-		 * constant KPROBE_ON_FTRACE_NOP into the remaining four
-		 * bytes of the original instruction so that the kprobes
-		 * handler can execute a nop, if it reaches this breakpoint.
-		 */
-		ftrace_generate_kprobe_call_insn(&orig);
-		ftrace_generate_kprobe_nop_insn(&new);
 	} else {
 		/* Replace ftrace call with a nop. */
 		ftrace_generate_call_insn(&orig, rec->ip);
@@ -141,23 +106,12 @@
 {
 	struct ftrace_insn orig, new, old;
 
-	if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
+	if (copy_from_kernel_nofault(&old, (void *) rec->ip, sizeof(old)))
 		return -EFAULT;
-	if (is_kprobe_on_ftrace(&old)) {
-		/*
-		 * If we find a breakpoint instruction, a kprobe has been
-		 * placed at the beginning of the function. We write the
-		 * constant KPROBE_ON_FTRACE_CALL into the remaining four
-		 * bytes of the original instruction so that the kprobes
-		 * handler can execute a brasl if it reaches this breakpoint.
-		 */
-		ftrace_generate_kprobe_nop_insn(&orig);
-		ftrace_generate_kprobe_call_insn(&new);
-	} else {
-		/* Replace nop with an ftrace call. */
-		ftrace_generate_nop_insn(&orig);
-		ftrace_generate_call_insn(&new, rec->ip);
-	}
+	/* Replace nop with an ftrace call. */
+	ftrace_generate_nop_insn(&orig);
+	ftrace_generate_call_insn(&new, rec->ip);
+
 	/* Verify that the to be replaced code matches what we expect. */
 	if (memcmp(&orig, &old, sizeof(old)))
 		return -EINVAL;
@@ -243,3 +197,45 @@
 }
 
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+		struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb;
+	struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
+
+	if (unlikely(!p) || kprobe_disabled(p))
+		return;
+
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+		return;
+	}
+
+	__this_cpu_write(current_kprobe, p);
+
+	kcb = get_kprobe_ctlblk();
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+	instruction_pointer_set(regs, ip);
+
+	if (!p->pre_handler || !p->pre_handler(p, regs)) {
+
+		instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE);
+
+		if (unlikely(p->post_handler)) {
+			kcb->kprobe_status = KPROBE_HIT_SSDONE;
+			p->post_handler(p, regs, 0);
+		}
+	}
+	__this_cpu_write(current_kprobe, NULL);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	return 0;
+}
+#endif
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 0d9ee19..8b88dbb 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,30 +26,17 @@
 0:	larl	%r1,tod_clock_base
 	mvc	0(16,%r1),__LC_BOOT_CLOCK
 	larl	%r13,.LPG1		# get base
-	larl	%r0,boot_vdso_data
-	stg	%r0,__LC_VDSO_PER_CPU
 #
 # Setup stack
 #
 	larl	%r14,init_task
 	stg	%r14,__LC_CURRENT
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
+	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
 #ifdef CONFIG_KASAN
 	brasl	%r14,kasan_early_init
 #endif
-#
-# Early machine initialization and detection functions.
-#
-	brasl	%r14,startup_init
-
-# check control registers
-	stctg	%c0,%c15,0(%r15)
-	oi	6(%r15),0x60		# enable sigp emergency & external call
-	oi	4(%r15),0x10		# switch on low address proctection
-	lctlg	%c0,%c15,0(%r15)
-
-	lam	0,15,.Laregs-.LPG1(%r13)	# load acrs needed by uaccess
-	brasl	%r14,start_kernel		# go to C code
+	brasl	%r14,startup_init		# s390 specific early init
+	brasl	%r14,start_kernel		# common init code
 #
 # We returned from start_kernel ?!? PANIK
 #
@@ -59,4 +46,3 @@
 	.align	16
 .LPG1:
 .Ldw:	.quad	0x0002000180000000,0x0000000000000000
-.Laregs:.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index 8f84568..2b85096 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/sched/cputime.h>
+#include <trace/events/power.h>
 #include <asm/nmi.h>
 #include <asm/smp.h>
 #include "entry.h"
@@ -24,28 +25,27 @@
 {
 	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
 	unsigned long long idle_time;
-	unsigned long psw_mask;
+	unsigned long psw_mask, flags;
 
-	trace_hardirqs_on();
 
 	/* Wait for external, I/O or machine check interrupt. */
 	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 	clear_cpu_flag(CIF_NOHZ_DELAY);
 
+	raw_local_irq_save(flags);
 	/* Call the assembler magic in entry.S */
 	psw_idle(idle, psw_mask);
-
-	trace_hardirqs_off();
+	raw_local_irq_restore(flags);
 
 	/* Account time spent with enabled wait psw loaded as idle time. */
-	write_seqcount_begin(&idle->seqcount);
+	raw_write_seqcount_begin(&idle->seqcount);
 	idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
 	idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
 	idle->idle_time += idle_time;
 	idle->idle_count++;
 	account_idle_time(cputime_to_nsecs(idle_time));
-	write_seqcount_end(&idle->seqcount);
+	raw_write_seqcount_end(&idle->seqcount);
 }
 NOKPROBE_SYMBOL(enabled_wait);
 
@@ -118,22 +118,16 @@
 
 void arch_cpu_idle_enter(void)
 {
-	local_mcck_disable();
 }
 
 void arch_cpu_idle(void)
 {
-	if (!test_cpu_flag(CIF_MCCK_PENDING))
-		/* Halt the cpu and keep track of cpu time accounting. */
-		enabled_wait();
-	local_irq_enable();
+	enabled_wait();
+	raw_local_irq_enable();
 }
 
 void arch_cpu_idle_exit(void)
 {
-	local_mcck_enable();
-	if (test_cpu_flag(CIF_MCCK_PENDING))
-		s390_handle_mcck();
 }
 
 void arch_cpu_idle_dead(void)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6837aff..6da0690 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -39,10 +39,13 @@
 #define IPL_CCW_STR		"ccw"
 #define IPL_FCP_STR		"fcp"
 #define IPL_FCP_DUMP_STR	"fcp_dump"
+#define IPL_NVME_STR		"nvme"
+#define IPL_NVME_DUMP_STR	"nvme_dump"
 #define IPL_NSS_STR		"nss"
 
 #define DUMP_CCW_STR		"ccw"
 #define DUMP_FCP_STR		"fcp"
+#define DUMP_NVME_STR		"nvme"
 #define DUMP_NONE_STR		"none"
 
 /*
@@ -93,6 +96,10 @@
 		return IPL_FCP_DUMP_STR;
 	case IPL_TYPE_NSS:
 		return IPL_NSS_STR;
+	case IPL_TYPE_NVME:
+		return IPL_NVME_STR;
+	case IPL_TYPE_NVME_DUMP:
+		return IPL_NVME_DUMP_STR;
 	case IPL_TYPE_UNKNOWN:
 	default:
 		return IPL_UNKNOWN_STR;
@@ -103,6 +110,7 @@
 	DUMP_TYPE_NONE	= 1,
 	DUMP_TYPE_CCW	= 2,
 	DUMP_TYPE_FCP	= 4,
+	DUMP_TYPE_NVME	= 8,
 };
 
 static char *dump_type_str(enum dump_type type)
@@ -114,6 +122,8 @@
 		return DUMP_CCW_STR;
 	case DUMP_TYPE_FCP:
 		return DUMP_FCP_STR;
+	case DUMP_TYPE_NVME:
+		return DUMP_NVME_STR;
 	default:
 		return NULL;
 	}
@@ -133,6 +143,7 @@
 
 static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
 static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_nvme;
 static struct ipl_parameter_block *reipl_block_ccw;
 static struct ipl_parameter_block *reipl_block_nss;
 static struct ipl_parameter_block *reipl_block_actual;
@@ -140,10 +151,15 @@
 static int dump_capabilities = DUMP_TYPE_NONE;
 static enum dump_type dump_type = DUMP_TYPE_NONE;
 static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_nvme;
 static struct ipl_parameter_block *dump_block_ccw;
 
 static struct sclp_ipl_info sclp_ipl_info;
 
+static bool reipl_nvme_clear;
+static bool reipl_fcp_clear;
+static bool reipl_ccw_clear;
+
 static inline int __diag308(unsigned long subcode, void *addr)
 {
 	register unsigned long _addr asm("0") = (unsigned long) addr;
@@ -174,7 +190,7 @@
 		struct kobj_attribute *attr,				\
 		char *page)						\
 {									\
-	return snprintf(page, PAGE_SIZE, _format, ##args);		\
+	return scnprintf(page, PAGE_SIZE, _format, ##args);		\
 }
 
 #define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk)			\
@@ -258,6 +274,11 @@
 			return IPL_TYPE_FCP_DUMP;
 		else
 			return IPL_TYPE_FCP;
+	case IPL_PBT_NVME:
+		if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+			return IPL_TYPE_NVME_DUMP;
+		else
+			return IPL_TYPE_NVME;
 	}
 	return IPL_TYPE_UNKNOWN;
 }
@@ -314,6 +335,9 @@
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
 		return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
 	default:
 		return 0;
 	}
@@ -342,15 +366,35 @@
 
 	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
+
+static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int size = ipl_block.nvme.scp_data_len;
+	void *scp_data = &ipl_block.nvme.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
 static struct bin_attribute ipl_scp_data_attr =
 	__BIN_ATTR(scp_data, S_IRUGO, ipl_scp_data_read, NULL, PAGE_SIZE);
 
+static struct bin_attribute ipl_nvme_scp_data_attr =
+	__BIN_ATTR(scp_data, S_IRUGO, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
+
 static struct bin_attribute *ipl_fcp_bin_attrs[] = {
 	&ipl_parameter_attr,
 	&ipl_scp_data_attr,
 	NULL,
 };
 
+static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_nvme_scp_data_attr,
+	NULL,
+};
+
 /* FCP ipl device attributes */
 
 DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
@@ -362,6 +406,16 @@
 DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
 		   (unsigned long long)ipl_block.fcp.br_lba);
 
+/* NVMe ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n",
+		   (unsigned long long)ipl_block.nvme.fid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n",
+		   (unsigned long long)ipl_block.nvme.nsid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.nvme.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
+		   (unsigned long long)ipl_block.nvme.br_lba);
+
 static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
 				     struct kobj_attribute *attr, char *page)
 {
@@ -396,6 +450,24 @@
 	.bin_attrs = ipl_fcp_bin_attrs,
 };
 
+static struct attribute *ipl_nvme_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_nvme_fid_attr.attr,
+	&sys_ipl_nvme_nsid_attr.attr,
+	&sys_ipl_nvme_bootprog_attr.attr,
+	&sys_ipl_nvme_br_lba_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_nvme_attr_group = {
+	.attrs = ipl_nvme_attrs,
+	.bin_attrs = ipl_nvme_bin_attrs,
+};
+
+
 /* CCW ipl device attributes */
 
 static struct attribute *ipl_ccw_attrs_vm[] = {
@@ -471,6 +543,10 @@
 	case IPL_TYPE_FCP_DUMP:
 		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
 		break;
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
+		break;
 	default:
 		rc = sysfs_create_group(&ipl_kset->kobj,
 					&ipl_unknown_attr_group);
@@ -691,6 +767,21 @@
 	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
 					    reipl_fcp_loadparm_store);
 
+static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_fcp_clear);
+}
+
+static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (strtobool(buf, &reipl_fcp_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
 static struct attribute *reipl_fcp_attrs[] = {
 	&sys_reipl_fcp_device_attr.attr,
 	&sys_reipl_fcp_wwpn_attr.attr,
@@ -706,6 +797,114 @@
 	.bin_attrs = reipl_fcp_bin_attrs,
 };
 
+static struct kobj_attribute sys_reipl_fcp_clear_attr =
+	__ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+
+/* NVME reipl device attributes */
+
+static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_nvme->nvme.scp_data_len;
+	void *scp_data = reipl_block_nvme->nvme.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t scpdata_len = count;
+	size_t padding;
+
+	if (off)
+		return -EINVAL;
+
+	memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+	reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
+	reipl_block_nvme->nvme.scp_data_len = scpdata_len;
+
+	return count;
+}
+
+static struct bin_attribute sys_reipl_nvme_scp_data_attr =
+	__BIN_ATTR(scp_data, (S_IRUGO | S_IWUSR), reipl_nvme_scpdata_read,
+		   reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+	&sys_reipl_nvme_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
+		   reipl_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
+		   reipl_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_nvme->nvme.br_lba);
+
+/* nvme wrapper */
+static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_loadparm_show(reipl_block_nvme, page);
+}
+
+static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t len)
+{
+	return reipl_generic_loadparm_store(reipl_block_nvme, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nvme_loadparm_attr =
+	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nvme_loadparm_show,
+					    reipl_nvme_loadparm_store);
+
+static struct attribute *reipl_nvme_attrs[] = {
+	&sys_reipl_nvme_fid_attr.attr,
+	&sys_reipl_nvme_nsid_attr.attr,
+	&sys_reipl_nvme_bootprog_attr.attr,
+	&sys_reipl_nvme_br_lba_attr.attr,
+	&sys_reipl_nvme_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_nvme_attr_group = {
+	.attrs = reipl_nvme_attrs,
+	.bin_attrs = reipl_nvme_bin_attrs
+};
+
+static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_nvme_clear);
+}
+
+static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	if (strtobool(buf, &reipl_nvme_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_nvme_clear_attr =
+	__ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store);
+
 /* CCW reipl device attributes */
 DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
 
@@ -741,16 +940,36 @@
 	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
 					    reipl_ccw_loadparm_store);
 
+static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_ccw_clear);
+}
+
+static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (strtobool(buf, &reipl_ccw_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_ccw_clear_attr =
+	__ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store);
+
 static struct attribute *reipl_ccw_attrs_vm[] = {
 	&sys_reipl_ccw_device_attr.attr,
 	&sys_reipl_ccw_loadparm_attr.attr,
 	&sys_reipl_ccw_vmparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
 	NULL,
 };
 
 static struct attribute *reipl_ccw_attrs_lpar[] = {
 	&sys_reipl_ccw_device_attr.attr,
 	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
 	NULL,
 };
 
@@ -850,6 +1069,9 @@
 	case IPL_TYPE_FCP:
 		reipl_block_actual = reipl_block_fcp;
 		break;
+	case IPL_TYPE_NVME:
+		reipl_block_actual = reipl_block_nvme;
+		break;
 	case IPL_TYPE_NSS:
 		reipl_block_actual = reipl_block_nss;
 		break;
@@ -876,6 +1098,8 @@
 		rc = reipl_set_type(IPL_TYPE_CCW);
 	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
 		rc = reipl_set_type(IPL_TYPE_FCP);
+	else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_NVME);
 	else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
 		rc = reipl_set_type(IPL_TYPE_NSS);
 	return (rc != 0) ? rc : len;
@@ -886,17 +1110,31 @@
 
 static struct kset *reipl_kset;
 static struct kset *reipl_fcp_kset;
+static struct kset *reipl_nvme_kset;
 
 static void __reipl_run(void *unused)
 {
 	switch (reipl_type) {
 	case IPL_TYPE_CCW:
 		diag308(DIAG308_SET, reipl_block_ccw);
-		diag308(DIAG308_LOAD_CLEAR, NULL);
+		if (reipl_ccw_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
 		break;
 	case IPL_TYPE_FCP:
 		diag308(DIAG308_SET, reipl_block_fcp);
-		diag308(DIAG308_LOAD_CLEAR, NULL);
+		if (reipl_fcp_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
+	case IPL_TYPE_NVME:
+		diag308(DIAG308_SET, reipl_block_nvme);
+		if (reipl_nvme_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
 		break;
 	case IPL_TYPE_NSS:
 		diag308(DIAG308_SET, reipl_block_nss);
@@ -906,6 +1144,7 @@
 		diag308(DIAG308_LOAD_CLEAR, NULL);
 		break;
 	case IPL_TYPE_FCP_DUMP:
+	case IPL_TYPE_NVME_DUMP:
 		break;
 	}
 	disabled_wait();
@@ -1008,10 +1247,16 @@
 	}
 
 	rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
-	if (rc) {
-		kset_unregister(reipl_fcp_kset);
-		free_page((unsigned long) reipl_block_fcp);
-		return rc;
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_fcp_kset->kobj,
+				       &sys_reipl_fcp_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_fcp_clear = true;
 	}
 
 	if (ipl_info.type == IPL_TYPE_FCP) {
@@ -1032,6 +1277,69 @@
 	}
 	reipl_capabilities |= IPL_TYPE_FCP;
 	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+out1:
+	kset_unregister(reipl_fcp_kset);
+	free_page((unsigned long) reipl_block_fcp);
+	return rc;
+}
+
+static int __init reipl_nvme_init(void)
+{
+	int rc;
+
+	reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_nvme)
+		return -ENOMEM;
+
+	/* sysfs: create kset for mixing attr group and bin attrs */
+	reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_nvme_kset) {
+		free_page((unsigned long) reipl_block_nvme);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_nvme_kset->kobj,
+				       &sys_reipl_nvme_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_nvme_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_NVME) {
+		memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block));
+		/*
+		 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+		 * is invalid in the IPL parameter block, so take it
+		 * always from sclp_ipl_info.
+		 */
+		memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm,
+		       LOADPARM_LEN);
+	} else {
+		reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+		reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+		reipl_block_nvme->nvme.pbt = IPL_PBT_NVME;
+		reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_NVME;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+out1:
+	kset_unregister(reipl_nvme_kset);
+	free_page((unsigned long) reipl_block_nvme);
+	return rc;
 }
 
 static int __init reipl_type_init(void)
@@ -1049,6 +1357,9 @@
 	if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
 		memcpy(reipl_block_fcp, reipl_block, size);
 		reipl_type = IPL_TYPE_FCP;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) {
+		memcpy(reipl_block_nvme, reipl_block, size);
+		reipl_type = IPL_TYPE_NVME;
 	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
 		memcpy(reipl_block_ccw, reipl_block, size);
 		reipl_type = IPL_TYPE_CCW;
@@ -1075,6 +1386,9 @@
 	rc = reipl_fcp_init();
 	if (rc)
 		return rc;
+	rc = reipl_nvme_init();
+	if (rc)
+		return rc;
 	rc = reipl_nss_init();
 	if (rc)
 		return rc;
@@ -1118,6 +1432,29 @@
 	.attrs = dump_fcp_attrs,
 };
 
+/* NVME dump device attributes */
+DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
+		   dump_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
+		   dump_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+		   dump_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
+		   dump_block_nvme->nvme.br_lba);
+
+static struct attribute *dump_nvme_attrs[] = {
+	&sys_dump_nvme_fid_attr.attr,
+	&sys_dump_nvme_nsid_attr.attr,
+	&sys_dump_nvme_bootprog_attr.attr,
+	&sys_dump_nvme_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_nvme_attr_group = {
+	.name  = IPL_NVME_STR,
+	.attrs = dump_nvme_attrs,
+};
+
 /* CCW dump device attributes */
 DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
 
@@ -1159,6 +1496,8 @@
 		rc = dump_set_type(DUMP_TYPE_CCW);
 	else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
 		rc = dump_set_type(DUMP_TYPE_FCP);
+	else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_NVME);
 	return (rc != 0) ? rc : len;
 }
 
@@ -1186,6 +1525,9 @@
 	case DUMP_TYPE_FCP:
 		diag308_dump(dump_block_fcp);
 		break;
+	case DUMP_TYPE_NVME:
+		diag308_dump(dump_block_nvme);
+		break;
 	default:
 		break;
 	}
@@ -1242,6 +1584,29 @@
 	return 0;
 }
 
+static int __init dump_nvme_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_nvme)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_nvme);
+		return rc;
+	}
+	dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+	dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
+	dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
+	dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_NVME;
+	return 0;
+}
+
 static int __init dump_init(void)
 {
 	int rc;
@@ -1260,6 +1625,9 @@
 	rc = dump_fcp_init();
 	if (rc)
 		return rc;
+	rc = dump_nvme_init();
+	if (rc)
+		return rc;
 	dump_set_type(DUMP_TYPE_NONE);
 	return 0;
 }
@@ -1691,6 +2059,11 @@
 		ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
 		ipl_info.data.fcp.lun = ipl_block.fcp.lun;
 		break;
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		ipl_info.data.nvme.fid = ipl_block.nvme.fid;
+		ipl_info.data.nvme.nsid = ipl_block.nvme.nsid;
+		break;
 	case IPL_TYPE_NSS:
 	case IPL_TYPE_UNKNOWN:
 		/* We have no info to copy */
@@ -1783,7 +2156,7 @@
 
 	buf = vzalloc(report->size);
 	if (!buf)
-		return ERR_PTR(-ENOMEM);
+		goto out;
 	ptr = buf;
 
 	memcpy(ptr, report->ipib, report->ipib->hdr.len);
@@ -1822,6 +2195,7 @@
 	}
 
 	BUG_ON(ptr > buf + report->size);
+out:
 	return buf;
 }
 
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index da550cb..3514420 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -95,14 +95,6 @@
 	{.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
 };
 
-void __init init_IRQ(void)
-{
-	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
-	init_cio_interrupts();
-	init_airq_interrupts();
-	init_ext_interrupts();
-}
-
 void do_IRQ(struct pt_regs *regs, int irq)
 {
 	struct pt_regs *old_regs;
@@ -294,7 +286,7 @@
 	return IRQ_HANDLED;
 }
 
-void __init init_ext_interrupts(void)
+static void __init init_ext_interrupts(void)
 {
 	int idx;
 
@@ -307,6 +299,14 @@
 		panic("Failed to register EXT interrupt\n");
 }
 
+void __init init_IRQ(void)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+	init_cio_interrupts();
+	init_airq_interrupts();
+	init_ext_interrupts();
+}
+
 static DEFINE_SPINLOCK(irq_subclass_lock);
 static unsigned char irq_subclass_refcount[64];
 
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 6f13883..aae24dc 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -7,6 +7,7 @@
  * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
  */
 
+#include <linux/moduleloader.h>
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 #include <linux/preempt.h>
@@ -21,6 +22,7 @@
 #include <asm/set_memory.h>
 #include <asm/sections.h>
 #include <asm/dis.h>
+#include "entry.h"
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe);
 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
@@ -30,19 +32,32 @@
 DEFINE_INSN_CACHE_OPS(s390_insn);
 
 static int insn_page_in_use;
-static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+void *alloc_insn_page(void)
+{
+	void *page;
+
+	page = module_alloc(PAGE_SIZE);
+	if (!page)
+		return NULL;
+	__set_memory((unsigned long) page, 1, SET_MEMORY_RO | SET_MEMORY_X);
+	return page;
+}
+
+void free_insn_page(void *page)
+{
+	module_memfree(page);
+}
 
 static void *alloc_s390_insn_page(void)
 {
 	if (xchg(&insn_page_in_use, 1) == 1)
 		return NULL;
-	set_memory_x((unsigned long) &insn_page, 1);
-	return &insn_page;
+	return &kprobes_insn_page;
 }
 
 static void free_s390_insn_page(void *page)
 {
-	set_memory_nx((unsigned long) page, 1);
 	xchg(&insn_page_in_use, 0);
 }
 
@@ -56,36 +71,29 @@
 
 static void copy_instruction(struct kprobe *p)
 {
-	unsigned long ip = (unsigned long) p->addr;
+	kprobe_opcode_t insn[MAX_INSN_SIZE];
 	s64 disp, new_disp;
 	u64 addr, new_addr;
+	unsigned int len;
 
-	if (ftrace_location(ip) == ip) {
+	len = insn_length(*p->addr >> 8);
+	memcpy(&insn, p->addr, len);
+	p->opcode = insn[0];
+	if (probe_is_insn_relative_long(&insn[0])) {
 		/*
-		 * If kprobes patches the instruction that is morphed by
-		 * ftrace make sure that kprobes always sees the branch
-		 * "jg .+24" that skips the mcount block or the "brcl 0,0"
-		 * in case of hotpatch.
+		 * For pc-relative instructions in RIL-b or RIL-c format patch
+		 * the RI2 displacement field. We have already made sure that
+		 * the insn slot for the patched instruction is within the same
+		 * 2GB area as the original instruction (either kernel image or
+		 * module area). Therefore the new displacement will always fit.
 		 */
-		ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
-		p->ainsn.is_ftrace_insn = 1;
-	} else
-		memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
-	p->opcode = p->ainsn.insn[0];
-	if (!probe_is_insn_relative_long(p->ainsn.insn))
-		return;
-	/*
-	 * For pc-relative instructions in RIL-b or RIL-c format patch the
-	 * RI2 displacement field. We have already made sure that the insn
-	 * slot for the patched instruction is within the same 2GB area
-	 * as the original instruction (either kernel image or module area).
-	 * Therefore the new displacement will always fit.
-	 */
-	disp = *(s32 *)&p->ainsn.insn[1];
-	addr = (u64)(unsigned long)p->addr;
-	new_addr = (u64)(unsigned long)p->ainsn.insn;
-	new_disp = ((addr + (disp * 2)) - new_addr) / 2;
-	*(s32 *)&p->ainsn.insn[1] = new_disp;
+		disp = *(s32 *)&insn[1];
+		addr = (u64)(unsigned long)p->addr;
+		new_addr = (u64)(unsigned long)p->ainsn.insn;
+		new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+		*(s32 *)&insn[1] = new_disp;
+	}
+	s390_kernel_write(p->ainsn.insn, &insn, len);
 }
 NOKPROBE_SYMBOL(copy_instruction);
 
@@ -136,11 +144,6 @@
 }
 NOKPROBE_SYMBOL(arch_prepare_kprobe);
 
-int arch_check_ftrace_location(struct kprobe *p)
-{
-	return 0;
-}
-
 struct swap_insn_args {
 	struct kprobe *p;
 	unsigned int arm_kprobe : 1;
@@ -149,28 +152,11 @@
 static int swap_instruction(void *data)
 {
 	struct swap_insn_args *args = data;
-	struct ftrace_insn new_insn, *insn;
 	struct kprobe *p = args->p;
-	size_t len;
+	u16 opc;
 
-	new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
-	len = sizeof(new_insn.opc);
-	if (!p->ainsn.is_ftrace_insn)
-		goto skip_ftrace;
-	len = sizeof(new_insn);
-	insn = (struct ftrace_insn *) p->addr;
-	if (args->arm_kprobe) {
-		if (is_ftrace_nop(insn))
-			new_insn.disp = KPROBE_ON_FTRACE_NOP;
-		else
-			new_insn.disp = KPROBE_ON_FTRACE_CALL;
-	} else {
-		ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
-		if (insn->disp == KPROBE_ON_FTRACE_NOP)
-			ftrace_generate_nop_insn(&new_insn);
-	}
-skip_ftrace:
-	s390_kernel_write(p->addr, &new_insn, len);
+	opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+	s390_kernel_write(p->addr, &opc, sizeof(opc));
 	return 0;
 }
 NOKPROBE_SYMBOL(swap_instruction);
@@ -261,6 +247,7 @@
 void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
 	ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
+	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
 	regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
@@ -364,83 +351,7 @@
  */
 static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	struct kretprobe_instance *ri;
-	struct hlist_head *head, empty_rp;
-	struct hlist_node *tmp;
-	unsigned long flags, orig_ret_address;
-	unsigned long trampoline_address;
-	kprobe_opcode_t *correct_ret_addr;
-
-	INIT_HLIST_HEAD(&empty_rp);
-	kretprobe_hash_lock(current, &head, &flags);
-
-	/*
-	 * It is possible to have multiple instances associated with a given
-	 * task either because an multiple functions in the call path
-	 * have a return probe installed on them, and/or more than one return
-	 * return probe was registered for a target function.
-	 *
-	 * We can handle this because:
-	 *     - instances are always inserted at the head of the list
-	 *     - when multiple return probes are registered for the same
-	 *	 function, the first instance's ret_addr will point to the
-	 *	 real return address, and all the rest will point to
-	 *	 kretprobe_trampoline
-	 */
-	ri = NULL;
-	orig_ret_address = 0;
-	correct_ret_addr = NULL;
-	trampoline_address = (unsigned long) &kretprobe_trampoline;
-	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
-		if (ri->task != current)
-			/* another task is sharing our hash bucket */
-			continue;
-
-		orig_ret_address = (unsigned long) ri->ret_addr;
-
-		if (orig_ret_address != trampoline_address)
-			/*
-			 * This is the real return address. Any other
-			 * instances associated with this task are for
-			 * other calls deeper on the call stack
-			 */
-			break;
-	}
-
-	kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
-	correct_ret_addr = ri->ret_addr;
-	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
-		if (ri->task != current)
-			/* another task is sharing our hash bucket */
-			continue;
-
-		orig_ret_address = (unsigned long) ri->ret_addr;
-
-		if (ri->rp && ri->rp->handler) {
-			ri->ret_addr = correct_ret_addr;
-			ri->rp->handler(ri, regs);
-		}
-
-		recycle_rp_inst(ri, &empty_rp);
-
-		if (orig_ret_address != trampoline_address)
-			/*
-			 * This is the real return address. Any other
-			 * instances associated with this task are for
-			 * other calls deeper on the call stack
-			 */
-			break;
-	}
-
-	regs->psw.addr = orig_ret_address;
-
-	kretprobe_hash_unlock(current, &flags);
-
-	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
-		hlist_del(&ri->hlist);
-		kfree(ri);
-	}
+	regs->psw.addr = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
 	/*
 	 * By returning a non-zero value, we are telling
 	 * kprobe_handler() that we don't want the post_handler
@@ -464,24 +375,6 @@
 	unsigned long ip = regs->psw.addr;
 	int fixup = probe_get_fixup_type(p->ainsn.insn);
 
-	/* Check if the kprobes location is an enabled ftrace caller */
-	if (p->ainsn.is_ftrace_insn) {
-		struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
-		struct ftrace_insn call_insn;
-
-		ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
-		/*
-		 * A kprobe on an enabled ftrace call site actually single
-		 * stepped an unconditional branch (ftrace nop equivalent).
-		 * Now we need to fixup things and pretend that a brasl r0,...
-		 * was executed instead.
-		 */
-		if (insn->disp == KPROBE_ON_FTRACE_CALL) {
-			ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
-			regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
-		}
-	}
-
 	if (fixup & FIXUP_PSW_NORMAL)
 		ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
 
@@ -574,10 +467,8 @@
 		 * zero, try to fix up.
 		 */
 		entry = s390_search_extables(regs->psw.addr);
-		if (entry) {
-			regs->psw.addr = extable_fixup(entry);
+		if (entry && ex_handle(entry, regs))
 			return 1;
-		}
 
 		/*
 		 * fixup_exception() could not handle it,
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
new file mode 100644
index 0000000..f6cb022
--- /dev/null
+++ b/arch/s390/kernel/kprobes_insn_page.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+/*
+ * insn_page is a special 4k aligned dummy function for kprobes.
+ * It will contain all kprobed instructions that are out-of-line executed.
+ * The page must be within the kernel image to guarantee that the
+ * out-of-line instructions are within 2GB distance of their original
+ * location. Using a dummy function ensures that the insn_page is within
+ * the text section of the kernel and mapped read-only/executable from
+ * the beginning on, thus avoiding to split large mappings if the page
+ * would be in the data section instead.
+ */
+	.section .kprobes.text, "ax"
+	.align 4096
+ENTRY(kprobes_insn_page)
+	.rept 2048
+	.word 0x07fe
+	.endr
+ENDPROC(kprobes_insn_page)
+	.previous
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 452502f..3b89597 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -167,7 +167,7 @@
  */
 static void lgr_timer_set(void)
 {
-	mod_timer(&lgr_timer, jiffies + LGR_TIMER_INTERVAL_SECS * HZ);
+	mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
 }
 
 /*
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index cb8b1cc..d91989c 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -14,11 +14,8 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
-#include <linux/suspend.h>
 #include <asm/cio.h>
 #include <asm/setup.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
 #include <asm/smp.h>
 #include <asm/ipl.h>
 #include <asm/diag.h>
@@ -39,36 +36,6 @@
 #ifdef CONFIG_CRASH_DUMP
 
 /*
- * PM notifier callback for kdump
- */
-static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action,
-			       void *ptr)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		if (kexec_crash_image)
-			arch_kexec_unprotect_crashkres();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (kexec_crash_image)
-			arch_kexec_protect_crashkres();
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-	return NOTIFY_OK;
-}
-
-static int __init machine_kdump_pm_init(void)
-{
-	pm_notifier(machine_kdump_pm_cb, 0);
-	return 0;
-}
-arch_initcall(machine_kdump_pm_init);
-
-/*
  * Reset the system, copy boot CPU registers to absolute zero,
  * and jump to the kdump image
  */
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index f9e4baa..76cd098 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -12,6 +12,7 @@
 #include <linux/kexec.h>
 #include <linux/module_signature.h>
 #include <linux/verification.h>
+#include <linux/vmalloc.h>
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
 #include <asm/setup.h>
@@ -170,6 +171,7 @@
 	struct kexec_buf buf;
 	unsigned long addr;
 	void *ptr, *end;
+	int ret;
 
 	buf.image = image;
 
@@ -199,9 +201,13 @@
 		ptr += len;
 	}
 
+	ret = -ENOMEM;
 	buf.buffer = ipl_report_finish(data->report);
+	if (!buf.buffer)
+		goto out;
 	buf.bufsz = data->report->size;
 	buf.memsz = buf.bufsz;
+	image->arch.ipl_buf = buf.buffer;
 
 	data->memsz += buf.memsz;
 
@@ -209,7 +215,9 @@
 		data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
 	*lc_ipl_parmblock_ptr = (__u32)buf.mem;
 
-	return kexec_add_buffer(&buf);
+	ret = kexec_add_buffer(&buf);
+out:
+	return ret;
 }
 
 void *kexec_file_add_components(struct kimage *image,
@@ -269,6 +277,7 @@
 {
 	Elf_Rela *relas;
 	int i, r_type;
+	int ret;
 
 	relas = (void *)pi->ehdr + relsec->sh_offset;
 
@@ -303,7 +312,11 @@
 		addr = section->sh_addr + relas[i].r_offset;
 
 		r_type = ELF64_R_TYPE(relas[i].r_info);
-		arch_kexec_do_relocs(r_type, loc, val, addr);
+		ret = arch_kexec_do_relocs(r_type, loc, val, addr);
+		if (ret) {
+			pr_err("Unknown rela relocation: %d\n", r_type);
+			return -ENOEXEC;
+		}
 	}
 	return 0;
 }
@@ -321,3 +334,11 @@
 
 	return kexec_image_probe_default(image, buf, buf_len);
 }
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->arch.ipl_buf);
+	image->arch.ipl_buf = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 14bad57..be9e85e 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -42,6 +42,9 @@
 	.globl	ftrace_regs_caller
 	.set	ftrace_regs_caller,ftrace_caller
 	stg	%r14,(__SF_GPRS+8*8)(%r15)	# save traced function caller
+	lghi	%r14,0				# save condition code
+	ipm	%r14				# don't put any instructions
+	sllg	%r14,%r14,16			# clobbering CC before this point
 	lgr	%r1,%r15
 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
 	aghi	%r0,MCOUNT_RETURN_FIXUP
@@ -54,6 +57,9 @@
 	# allocate pt_regs and stack frame for ftrace_trace_function
 	aghi	%r15,-STACK_FRAME_SIZE
 	stg	%r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+	stg	%r14,(STACK_PTREGS_PSW)(%r15)
+	lg	%r14,(__SF_GPRS+8*8)(%r1)	# restore original return address
+	stosm	(STACK_PTREGS_PSW)(%r15),0
 	aghi	%r1,-TRACED_FUNC_FRAME_SIZE
 	stg	%r1,__SF_BACKCHAIN(%r15)
 	stg	%r0,(STACK_PTREGS_PSW+8)(%r15)
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index ba8f19b..b81bc96 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -19,6 +19,7 @@
 #include <linux/kasan.h>
 #include <linux/moduleloader.h>
 #include <linux/bug.h>
+#include <linux/memory.h>
 #include <asm/alternative.h>
 #include <asm/nospec-branch.h>
 #include <asm/facility.h>
@@ -29,7 +30,7 @@
 #define DEBUGP(fmt , ...)
 #endif
 
-#define PLT_ENTRY_SIZE 20
+#define PLT_ENTRY_SIZE 22
 
 void *module_alloc(unsigned long size)
 {
@@ -174,10 +175,12 @@
 }
 
 static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
-			   int sign, int bits, int shift)
+			   int sign, int bits, int shift,
+			   void *(*write)(void *dest, const void *src, size_t len))
 {
 	unsigned long umax;
 	long min, max;
+	void *dest = (void *)loc;
 
 	if (val & ((1UL << shift) - 1))
 		return -ENOEXEC;
@@ -194,26 +197,33 @@
 			return -ENOEXEC;
 	}
 
-	if (bits == 8)
-		*(unsigned char *) loc = val;
-	else if (bits == 12)
-		*(unsigned short *) loc = (val & 0xfff) |
+	if (bits == 8) {
+		unsigned char tmp = val;
+		write(dest, &tmp, 1);
+	} else if (bits == 12) {
+		unsigned short tmp = (val & 0xfff) |
 			(*(unsigned short *) loc & 0xf000);
-	else if (bits == 16)
-		*(unsigned short *) loc = val;
-	else if (bits == 20)
-		*(unsigned int *) loc = (val & 0xfff) << 16 |
-			(val & 0xff000) >> 4 |
-			(*(unsigned int *) loc & 0xf00000ff);
-	else if (bits == 32)
-		*(unsigned int *) loc = val;
-	else if (bits == 64)
-		*(unsigned long *) loc = val;
+		write(dest, &tmp, 2);
+	} else if (bits == 16) {
+		unsigned short tmp = val;
+		write(dest, &tmp, 2);
+	} else if (bits == 20) {
+		unsigned int tmp = (val & 0xfff) << 16 |
+			(val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff);
+		write(dest, &tmp, 4);
+	} else if (bits == 32) {
+		unsigned int tmp = val;
+		write(dest, &tmp, 4);
+	} else if (bits == 64) {
+		unsigned long tmp = val;
+		write(dest, &tmp, 8);
+	}
 	return 0;
 }
 
 static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
-		      const char *strtab, struct module *me)
+		      const char *strtab, struct module *me,
+		      void *(*write)(void *dest, const void *src, size_t len))
 {
 	struct mod_arch_syminfo *info;
 	Elf_Addr loc, val;
@@ -241,17 +251,17 @@
 	case R_390_64:		/* Direct 64 bit.  */
 		val += rela->r_addend;
 		if (r_type == R_390_8)
-			rc = apply_rela_bits(loc, val, 0, 8, 0);
+			rc = apply_rela_bits(loc, val, 0, 8, 0, write);
 		else if (r_type == R_390_12)
-			rc = apply_rela_bits(loc, val, 0, 12, 0);
+			rc = apply_rela_bits(loc, val, 0, 12, 0, write);
 		else if (r_type == R_390_16)
-			rc = apply_rela_bits(loc, val, 0, 16, 0);
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
 		else if (r_type == R_390_20)
-			rc = apply_rela_bits(loc, val, 1, 20, 0);
+			rc = apply_rela_bits(loc, val, 1, 20, 0, write);
 		else if (r_type == R_390_32)
-			rc = apply_rela_bits(loc, val, 0, 32, 0);
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
 		else if (r_type == R_390_64)
-			rc = apply_rela_bits(loc, val, 0, 64, 0);
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
 		break;
 	case R_390_PC16:	/* PC relative 16 bit.  */
 	case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
@@ -260,15 +270,15 @@
 	case R_390_PC64:	/* PC relative 64 bit.	*/
 		val += rela->r_addend - loc;
 		if (r_type == R_390_PC16)
-			rc = apply_rela_bits(loc, val, 1, 16, 0);
+			rc = apply_rela_bits(loc, val, 1, 16, 0, write);
 		else if (r_type == R_390_PC16DBL)
-			rc = apply_rela_bits(loc, val, 1, 16, 1);
+			rc = apply_rela_bits(loc, val, 1, 16, 1, write);
 		else if (r_type == R_390_PC32DBL)
-			rc = apply_rela_bits(loc, val, 1, 32, 1);
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
 		else if (r_type == R_390_PC32)
-			rc = apply_rela_bits(loc, val, 1, 32, 0);
+			rc = apply_rela_bits(loc, val, 1, 32, 0, write);
 		else if (r_type == R_390_PC64)
-			rc = apply_rela_bits(loc, val, 1, 64, 0);
+			rc = apply_rela_bits(loc, val, 1, 64, 0, write);
 		break;
 	case R_390_GOT12:	/* 12 bit GOT offset.  */
 	case R_390_GOT16:	/* 16 bit GOT offset.  */
@@ -283,33 +293,33 @@
 	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
 	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
 		if (info->got_initialized == 0) {
-			Elf_Addr *gotent;
+			Elf_Addr *gotent = me->core_layout.base +
+					   me->arch.got_offset +
+					   info->got_offset;
 
-			gotent = me->core_layout.base + me->arch.got_offset +
-				info->got_offset;
-			*gotent = val;
+			write(gotent, &val, sizeof(*gotent));
 			info->got_initialized = 1;
 		}
 		val = info->got_offset + rela->r_addend;
 		if (r_type == R_390_GOT12 ||
 		    r_type == R_390_GOTPLT12)
-			rc = apply_rela_bits(loc, val, 0, 12, 0);
+			rc = apply_rela_bits(loc, val, 0, 12, 0, write);
 		else if (r_type == R_390_GOT16 ||
 			 r_type == R_390_GOTPLT16)
-			rc = apply_rela_bits(loc, val, 0, 16, 0);
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
 		else if (r_type == R_390_GOT20 ||
 			 r_type == R_390_GOTPLT20)
-			rc = apply_rela_bits(loc, val, 1, 20, 0);
+			rc = apply_rela_bits(loc, val, 1, 20, 0, write);
 		else if (r_type == R_390_GOT32 ||
 			 r_type == R_390_GOTPLT32)
-			rc = apply_rela_bits(loc, val, 0, 32, 0);
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
 		else if (r_type == R_390_GOT64 ||
 			 r_type == R_390_GOTPLT64)
-			rc = apply_rela_bits(loc, val, 0, 64, 0);
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
 		else if (r_type == R_390_GOTENT ||
 			 r_type == R_390_GOTPLTENT) {
 			val += (Elf_Addr) me->core_layout.base - loc;
-			rc = apply_rela_bits(loc, val, 1, 32, 1);
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
 		}
 		break;
 	case R_390_PLT16DBL:	/* 16 bit PC rel. PLT shifted by 1.  */
@@ -320,25 +330,28 @@
 	case R_390_PLTOFF32:	/* 32 bit offset from GOT to PLT. */
 	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
 		if (info->plt_initialized == 0) {
-			unsigned int *ip;
-			ip = me->core_layout.base + me->arch.plt_offset +
-				info->plt_offset;
-			ip[0] = 0x0d10e310;	/* basr 1,0  */
-			ip[1] = 0x100a0004;	/* lg	1,10(1) */
+			unsigned char insn[PLT_ENTRY_SIZE];
+			char *plt_base;
+			char *ip;
+
+			plt_base = me->core_layout.base + me->arch.plt_offset;
+			ip = plt_base + info->plt_offset;
+			*(int *)insn = 0x0d10e310;	/* basr 1,0  */
+			*(int *)&insn[4] = 0x100c0004;	/* lg	1,12(1) */
 			if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
-				unsigned int *ij;
-				ij = me->core_layout.base +
-					me->arch.plt_offset +
-					me->arch.plt_size - PLT_ENTRY_SIZE;
-				ip[2] = 0xa7f40000 +	/* j __jump_r1 */
-					(unsigned int)(u16)
-					(((unsigned long) ij - 8 -
-					  (unsigned long) ip) / 2);
+				char *jump_r1;
+
+				jump_r1 = plt_base + me->arch.plt_size -
+					PLT_ENTRY_SIZE;
+				/* brcl	0xf,__jump_r1 */
+				*(short *)&insn[8] = 0xc0f4;
+				*(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
 			} else {
-				ip[2] = 0x07f10000;	/* br %r1 */
+				*(int *)&insn[8] = 0x07f10000;	/* br %r1 */
 			}
-			ip[3] = (unsigned int) (val >> 32);
-			ip[4] = (unsigned int) val;
+			*(long *)&insn[14] = val;
+
+			write(ip, insn, sizeof(insn));
 			info->plt_initialized = 1;
 		}
 		if (r_type == R_390_PLTOFF16 ||
@@ -357,17 +370,17 @@
 			val += rela->r_addend - loc;
 		}
 		if (r_type == R_390_PLT16DBL)
-			rc = apply_rela_bits(loc, val, 1, 16, 1);
+			rc = apply_rela_bits(loc, val, 1, 16, 1, write);
 		else if (r_type == R_390_PLTOFF16)
-			rc = apply_rela_bits(loc, val, 0, 16, 0);
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
 		else if (r_type == R_390_PLT32DBL)
-			rc = apply_rela_bits(loc, val, 1, 32, 1);
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
 		else if (r_type == R_390_PLT32 ||
 			 r_type == R_390_PLTOFF32)
-			rc = apply_rela_bits(loc, val, 0, 32, 0);
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
 		else if (r_type == R_390_PLT64 ||
 			 r_type == R_390_PLTOFF64)
-			rc = apply_rela_bits(loc, val, 0, 64, 0);
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
 		break;
 	case R_390_GOTOFF16:	/* 16 bit offset to GOT.  */
 	case R_390_GOTOFF32:	/* 32 bit offset to GOT.  */
@@ -375,20 +388,20 @@
 		val = val + rela->r_addend -
 			((Elf_Addr) me->core_layout.base + me->arch.got_offset);
 		if (r_type == R_390_GOTOFF16)
-			rc = apply_rela_bits(loc, val, 0, 16, 0);
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
 		else if (r_type == R_390_GOTOFF32)
-			rc = apply_rela_bits(loc, val, 0, 32, 0);
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
 		else if (r_type == R_390_GOTOFF64)
-			rc = apply_rela_bits(loc, val, 0, 64, 0);
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
 		break;
 	case R_390_GOTPC:	/* 32 bit PC relative offset to GOT. */
 	case R_390_GOTPCDBL:	/* 32 bit PC rel. off. to GOT shifted by 1. */
 		val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
 			rela->r_addend - loc;
 		if (r_type == R_390_GOTPC)
-			rc = apply_rela_bits(loc, val, 1, 32, 0);
+			rc = apply_rela_bits(loc, val, 1, 32, 0, write);
 		else if (r_type == R_390_GOTPCDBL)
-			rc = apply_rela_bits(loc, val, 1, 32, 1);
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
 		break;
 	case R_390_COPY:
 	case R_390_GLOB_DAT:	/* Create GOT entry.  */
@@ -412,9 +425,10 @@
 	return 0;
 }
 
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 		       unsigned int symindex, unsigned int relsec,
-		       struct module *me)
+		       struct module *me,
+		       void *(*write)(void *dest, const void *src, size_t len))
 {
 	Elf_Addr base;
 	Elf_Sym *symtab;
@@ -430,13 +444,27 @@
 	n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
 
 	for (i = 0; i < n; i++, rela++) {
-		rc = apply_rela(rela, base, symtab, strtab, me);
+		rc = apply_rela(rela, base, symtab, strtab, me, write);
 		if (rc)
 			return rc;
 	}
 	return 0;
 }
 
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relsec,
+		       struct module *me)
+{
+	bool early = me->state == MODULE_STATE_UNFORMED;
+	void *(*write)(void *, const void *, size_t) = memcpy;
+
+	if (!early)
+		write = s390_kernel_write;
+
+	return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+				    write);
+}
+
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs,
 		    struct module *me)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0a487fa..86c8d53 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -148,7 +148,6 @@
 	local_mcck_disable();
 	mcck = *this_cpu_ptr(&cpu_mcck);
 	memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
-	clear_cpu_flag(CIF_MCCK_PENDING);
 	local_mcck_enable();
 	local_irq_restore(flags);
 
@@ -333,7 +332,7 @@
 /*
  * machine check handler.
  */
-void notrace s390_do_machine_check(struct pt_regs *regs)
+int notrace s390_do_machine_check(struct pt_regs *regs)
 {
 	static int ipd_count;
 	static DEFINE_SPINLOCK(ipd_lock);
@@ -342,6 +341,7 @@
 	unsigned long long tmp;
 	union mci mci;
 	unsigned long mcck_dam_code;
+	int mcck_pending = 0;
 
 	nmi_enter();
 	inc_irq_stat(NMI_NMI);
@@ -400,7 +400,7 @@
 		 */
 		mcck->kill_task = 1;
 		mcck->mcck_code = mci.val;
-		set_cpu_flag(CIF_MCCK_PENDING);
+		mcck_pending = 1;
 	}
 
 	/*
@@ -420,8 +420,7 @@
 			mcck->stp_queue |= stp_sync_check();
 		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
 			mcck->stp_queue |= stp_island_check();
-		if (mcck->stp_queue)
-			set_cpu_flag(CIF_MCCK_PENDING);
+		mcck_pending = 1;
 	}
 
 	/*
@@ -442,12 +441,12 @@
 	if (mci.cp) {
 		/* Channel report word pending */
 		mcck->channel_report = 1;
-		set_cpu_flag(CIF_MCCK_PENDING);
+		mcck_pending = 1;
 	}
 	if (mci.w) {
 		/* Warning pending */
 		mcck->warning = 1;
-		set_cpu_flag(CIF_MCCK_PENDING);
+		mcck_pending = 1;
 	}
 
 	/*
@@ -462,7 +461,17 @@
 		*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
 	}
 	clear_cpu_flag(CIF_MCCK_GUEST);
+
+	if (user_mode(regs) && mcck_pending) {
+		nmi_exit();
+		return 1;
+	}
+
+	if (mcck_pending)
+		schedule_mcck_handler();
+
 	nmi_exit();
+	return 0;
 }
 NOKPROBE_SYMBOL(s390_do_machine_check);
 
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
new file mode 100644
index 0000000..51c5a9f
--- /dev/null
+++ b/arch/s390/kernel/numa.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <asm/numa.h>
+
+struct pglist_data *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+void __init numa_setup(void)
+{
+	int nid;
+
+	nodes_clear(node_possible_map);
+	node_set(0, node_possible_map);
+	node_set_online(0);
+	for (nid = 0; nid < MAX_NUMNODES; nid++) {
+		NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
+		if (!NODE_DATA(nid))
+			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+			      __func__, sizeof(pg_data_t), 8);
+	}
+	NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	NODE_DATA(0)->node_id = 0;
+}
+
+static int __init numa_init_late(void)
+{
+	register_one_node(0);
+	return 0;
+}
+arch_initcall(numa_init_late);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index 8b33e03..37265f5 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -238,6 +238,64 @@
 CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
 CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
 
+CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
 static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
 	CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
 	CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
@@ -516,6 +574,67 @@
 	NULL,
 };
 
+static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
 /* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
 
 static struct attribute_group cpumcf_pmu_events_group = {
@@ -624,9 +743,11 @@
 		break;
 	case 0x3906:
 	case 0x3907:
+		model = cpumcf_z14_pmu_event_attr;
+		break;
 	case 0x8561:
 	case 0x8562:
-		model = cpumcf_z14_pmu_event_attr;
+		model = cpumcf_z15_pmu_event_attr;
 		break;
 	default:
 		model = none;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index b83bddf..19cd7b9 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -156,8 +156,8 @@
 		}
 	}
 
-	debug_sprintf_event(sfdbg, 5,
-			    "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+	debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
+			    (unsigned long)sfb->sdbt);
 	memset(sfb, 0, sizeof(*sfb));
 }
 
@@ -212,10 +212,11 @@
 	 * the sampling buffer origin.
 	 */
 	if (sfb->sdbt != get_next_sdbt(tail)) {
-		debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
-				    "sampling buffer is not linked: origin=%p"
-				    "tail=%p\n",
-				    (void *) sfb->sdbt, (void *) tail);
+		debug_sprintf_event(sfdbg, 3, "%s: "
+				    "sampling buffer is not linked: origin %#lx"
+				    " tail %#lx\n", __func__,
+				    (unsigned long)sfb->sdbt,
+				    (unsigned long)tail);
 		return -EINVAL;
 	}
 
@@ -264,8 +265,8 @@
 	*tail = (unsigned long) sfb->sdbt + 1;
 	sfb->tail = tail;
 
-	debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
-			    " settings: sdbt=%lu sdb=%lu\n",
+	debug_sprintf_event(sfdbg, 4, "%s: new buffer"
+			    " settings: sdbt %lu sdb %lu\n", __func__,
 			    sfb->num_sdbt, sfb->num_sdb);
 	return rc;
 }
@@ -305,12 +306,13 @@
 	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
 	if (rc) {
 		free_sampling_buffer(sfb);
-		debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
-			"realloc_sampling_buffer failed with rc=%i\n", rc);
+		debug_sprintf_event(sfdbg, 4, "%s: "
+			"realloc_sampling_buffer failed with rc %i\n",
+			__func__, rc);
 	} else
 		debug_sprintf_event(sfdbg, 4,
-			"alloc_sampling_buffer: tear=%p dear=%p\n",
-			sfb->sdbt, (void *) *sfb->sdbt);
+			"%s: tear %#lx dear %#lx\n", __func__,
+			(unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
 	return rc;
 }
 
@@ -370,28 +372,33 @@
 
 static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
 {
-	unsigned long n_sdb, freq, factor;
+	unsigned long n_sdb, freq;
 	size_t sample_size;
 
 	/* Calculate sampling buffers using 4K pages
 	 *
-	 *    1. Determine the sample data size which depends on the used
-	 *	 sampling functions, for example, basic-sampling or
-	 *	 basic-sampling with diagnostic-sampling.
+	 *    1. The sampling size is 32 bytes for basic sampling. This size
+	 *	 is the same for all machine types. Diagnostic
+	 *	 sampling uses auxlilary data buffer setup which provides the
+	 *	 memory for SDBs using linux common code auxiliary trace
+	 *	 setup.
 	 *
-	 *    2. Use the sampling frequency as input.  The sampling buffer is
-	 *	 designed for almost one second.  This can be adjusted through
-	 *	 the "factor" variable.
-	 *	 In any case, alloc_sampling_buffer() sets the Alert Request
+	 *    2. Function alloc_sampling_buffer() sets the Alert Request
 	 *	 Control indicator to trigger a measurement-alert to harvest
-	 *	 sample-data-blocks (sdb).
+	 *	 sample-data-blocks (SDB). This is done per SDB. This
+	 *	 measurement alert interrupt fires quick enough to handle
+	 *	 one SDB, on very high frequency and work loads there might
+	 *	 be 2 to 3 SBDs available for sample processing.
+	 *	 Currently there is no need for setup alert request on every
+	 *	 n-th page. This is counterproductive as one IRQ triggers
+	 *	 a very high number of samples to be processed at one IRQ.
 	 *
-	 *    3. Compute the number of sample-data-blocks and ensure a minimum
-	 *	 of CPUM_SF_MIN_SDB.  Also ensure the upper limit does not
-	 *	 exceed a "calculated" maximum.  The symbolic maximum is
-	 *	 designed for basic-sampling only and needs to be increased if
-	 *	 diagnostic-sampling is active.
-	 *	 See also the remarks for these symbolic constants.
+	 *    3. Use the sampling frequency as input.
+	 *	 Compute the number of SDBs and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Depending on frequency add some more
+	 *	 SDBs to handle a higher sampling rate.
+	 *	 Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
+	 *	 (one SDB) for every 10000 HZ frequency increment.
 	 *
 	 *    4. Compute the number of sample-data-block-tables (SDBT) and
 	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
@@ -399,10 +406,7 @@
 	 */
 	sample_size = sizeof(struct hws_basic_entry);
 	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
-	factor = 1;
-	n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size));
-	if (n_sdb < CPUM_SF_MIN_SDB)
-		n_sdb = CPUM_SF_MIN_SDB;
+	n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
 
 	/* If there is already a sampling buffer allocated, it is very likely
 	 * that the sampling facility is enabled too.  If the event to be
@@ -417,8 +421,8 @@
 		return 0;
 
 	debug_sprintf_event(sfdbg, 3,
-			    "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
-			    " sample_size=%lu cpuhw=%p\n",
+			    "%s: rate %lu f %lu sdb %lu/%lu"
+			    " sample_size %lu cpuhw %p\n", __func__,
 			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
 			    sample_size, cpuhw);
 
@@ -478,8 +482,8 @@
 	if (num)
 		sfb_account_allocs(num, hwc);
 
-	debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
-			    " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+	debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
+			    __func__, OVERFLOW_REG(hwc), ratio, num);
 	OVERFLOW_REG(hwc) = 0;
 }
 
@@ -517,13 +521,13 @@
 	 */
 	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
 	if (rc)
-		debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
-				    "failed with rc=%i\n", rc);
+		debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
+				    __func__, rc);
 
 	if (sfb_has_pending_allocs(sfb, hwc))
-		debug_sprintf_event(sfdbg, 5, "sfb: extend: "
-				    "req=%lu alloc=%lu remaining=%lu\n",
-				    num, sfb->num_sdb - num_old,
+		debug_sprintf_event(sfdbg, 5, "%s: "
+				    "req %lu alloc %lu remaining %lu\n",
+				    __func__, num, sfb->num_sdb - num_old,
 				    sfb_pending_allocs(sfb, hwc));
 }
 
@@ -551,20 +555,22 @@
 		err = sf_disable();
 		if (err)
 			pr_err("Switching off the sampling facility failed "
-			       "with rc=%i\n", err);
+			       "with rc %i\n", err);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+				    "%s: initialized: cpuhw %p\n", __func__,
+				    cpusf);
 		break;
 	case PMC_RELEASE:
 		cpusf->flags &= ~PMU_F_RESERVED;
 		err = sf_disable();
 		if (err) {
 			pr_err("Switching off the sampling facility failed "
-			       "with rc=%i\n", err);
+			       "with rc %i\n", err);
 		} else
 			deallocate_buffers(cpusf);
 		debug_sprintf_event(sfdbg, 5,
-				    "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+				    "%s: released: cpuhw %p\n", __func__,
+				    cpusf);
 		break;
 	}
 	if (err)
@@ -611,13 +617,6 @@
 	local64_set(&hwc->period_left, hwc->sample_period);
 }
 
-static void hw_reset_registers(struct hw_perf_event *hwc,
-			       unsigned long *sdbt_origin)
-{
-	/* (Re)set to first sample-data-block-table */
-	TEAR_REG(hwc) = (unsigned long) sdbt_origin;
-}
-
 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
 				   unsigned long rate)
 {
@@ -673,7 +672,7 @@
 	rcu_read_lock();
 
 	perf_prepare_sample(&header, data, event, regs);
-	if (perf_output_begin(&handle, event, header.size))
+	if (perf_output_begin(&handle, data, event, header.size))
 		goto out;
 
 	/* Update the process ID (see also kernel/events/core.c) */
@@ -709,9 +708,9 @@
 		 */
 		if (sample_rate_to_freq(si, rate) >
 		    sysctl_perf_event_sample_rate) {
-			debug_sprintf_event(sfdbg, 1,
+			debug_sprintf_event(sfdbg, 1, "%s: "
 					    "Sampling rate exceeds maximum "
-					    "perf sample rate\n");
+					    "perf sample rate\n", __func__);
 			rate = 0;
 		}
 	}
@@ -756,10 +755,9 @@
 	attr->sample_period = rate;
 	SAMPL_RATE(hwc) = rate;
 	hw_init_period(hwc, SAMPL_RATE(hwc));
-	debug_sprintf_event(sfdbg, 4, "__hw_perf_event_init_rate:"
-			    "cpu:%d period:%llx freq:%d,%#lx\n", event->cpu,
-			    event->attr.sample_period, event->attr.freq,
-			    SAMPLE_FREQ_MODE(hwc));
+	debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
+			    __func__, event->cpu, event->attr.sample_period,
+			    event->attr.freq, SAMPLE_FREQ_MODE(hwc));
 	return 0;
 }
 
@@ -883,12 +881,21 @@
 	return err;
 }
 
+static bool is_callchain_event(struct perf_event *event)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
+			      PERF_SAMPLE_STACK_USER);
+}
+
 static int cpumsf_pmu_event_init(struct perf_event *event)
 {
 	int err;
 
 	/* No support for taken branch sampling */
-	if (has_branch_stack(event))
+	/* No support for callchain, stacks and registers */
+	if (has_branch_stack(event) || is_callchain_event(event))
 		return -EOPNOTSUPP;
 
 	switch (event->attr.type) {
@@ -962,8 +969,7 @@
 			 * buffer extents
 			 */
 			sfb_account_overflows(cpuhw, hwc);
-			if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
-				extend_sampling_buffer(&cpuhw->sfb, hwc);
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
 		}
 		/* Rate may be adjusted with ioctl() */
 		cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
@@ -976,7 +982,7 @@
 	err = lsctl(&cpuhw->lsctl);
 	if (err) {
 		cpuhw->flags &= ~PMU_F_ENABLED;
-		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+		pr_err("Loading sampling controls failed: op %i err %i\n",
 			1, err);
 		return;
 	}
@@ -984,12 +990,11 @@
 	/* Load current program parameter */
 	lpp(&S390_lowcore.lpp);
 
-	debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
-			    "interval:%lx tear=%p dear=%p\n",
+	debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
+			    "interval %#lx tear %#lx dear %#lx\n", __func__,
 			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
 			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
-			    (void *) cpuhw->lsctl.tear,
-			    (void *) cpuhw->lsctl.dear);
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
 }
 
 static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -1012,13 +1017,14 @@
 
 	err = lsctl(&inactive);
 	if (err) {
-		pr_err("Loading sampling controls failed: op=%i err=%i\n",
+		pr_err("Loading sampling controls failed: op %i err %i\n",
 			2, err);
 		return;
 	}
 
 	/* Save state of TEAR and DEAR register contents */
-	if (!qsi(&si)) {
+	err = qsi(&si);
+	if (!err) {
 		/* TEAR/DEAR values are valid only if the sampling facility is
 		 * enabled.  Note that cpumsf_pmu_disable() might be called even
 		 * for a disabled sampling facility because cpumsf_pmu_enable()
@@ -1029,8 +1035,8 @@
 			cpuhw->lsctl.dear = si.dear;
 		}
 	} else
-		debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
-				    "qsi() failed with err=%i\n", err);
+		debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
+				    __func__, err);
 
 	cpuhw->flags &= ~PMU_F_ENABLED;
 }
@@ -1143,15 +1149,6 @@
 	local64_add(count, &event->count);
 }
 
-static void debug_sample_entry(struct hws_basic_entry *sample,
-			       struct hws_trailer_entry *te)
-{
-	debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
-			    "sampling data entry: te->f=%i basic.def=%04x "
-			    "(%p)\n",
-			    te->f, sample->def, sample);
-}
-
 /* hw_collect_samples() - Walk through a sample-data-block and collect samples
  * @event:	The perf event
  * @sdbt:	Sample-data-block table
@@ -1205,7 +1202,11 @@
 				/* Count discarded samples */
 				*overflow += 1;
 		} else {
-			debug_sample_entry(sample, te);
+			debug_sprintf_event(sfdbg, 4,
+					    "%s: Found unknown"
+					    " sampling data entry: te->f %i"
+					    " basic.def %#4x (%p)\n", __func__,
+					    te->f, sample->def, sample);
 			/* Sample slot is not yet written or other record.
 			 *
 			 * This condition can occur if the buffer was reused
@@ -1280,9 +1281,9 @@
 			sampl_overflow += te->overflow;
 
 		/* Timestamps are valid for full sample-data-blocks only */
-		debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
-				    "overflow=%llu timestamp=%#llx\n",
-				    sdbt, te->overflow,
+		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
+				    "overflow %llu timestamp %#llx\n",
+				    __func__, (unsigned long)sdbt, te->overflow,
 				    (te->f) ? trailer_timestamp(te) : 0ULL);
 
 		/* Collect all samples from a single sample-data-block and
@@ -1336,9 +1337,11 @@
 	}
 
 	if (sampl_overflow || event_overflow)
-		debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
-				    "overflow stats: sample=%llu event=%llu\n",
-				    sampl_overflow, event_overflow);
+		debug_sprintf_event(sfdbg, 4, "%s: "
+				    "overflows: sample %llu event %llu"
+				    " total %llu num_sdb %llu\n",
+				    __func__, sampl_overflow, event_overflow,
+				    OVERFLOW_REG(hwc), num_sdb);
 }
 
 #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
@@ -1391,7 +1394,8 @@
 	te = aux_sdb_trailer(aux, aux->alert_mark);
 	te->flags &= ~SDB_TE_ALERT_REQ_MASK;
 
-	debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+	debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
+			    __func__, i, range_scan, aux->head);
 }
 
 /*
@@ -1424,6 +1428,10 @@
 	 * SDBs between aux->head and aux->empty_mark are already ready
 	 * for new data. range_scan is num of SDBs not within them.
 	 */
+	debug_sprintf_event(sfdbg, 6,
+			    "%s: range %ld head %ld alert %ld empty %ld\n",
+			    __func__, range, aux->head, aux->alert_mark,
+			    aux->empty_mark);
 	if (range > AUX_SDB_NUM_EMPTY(aux)) {
 		range_scan = range - AUX_SDB_NUM_EMPTY(aux);
 		idx = aux->empty_mark + 1;
@@ -1449,15 +1457,11 @@
 	cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
 	cpuhw->lsctl.dear = aux->sdb_index[head];
 
-	debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
-			    "head->alert_mark->empty_mark (num_alert, range)"
-			    "[%lx -> %lx -> %lx] (%lx, %lx) "
-			    "tear index %lx, tear %lx dear %lx\n",
+	debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
+			    "index %ld tear %#lx dear %#lx\n", __func__,
 			    aux->head, aux->alert_mark, aux->empty_mark,
-			    AUX_SDB_NUM_ALERT(aux), range,
 			    head / CPUM_SF_SDB_PER_TABLE,
-			    cpuhw->lsctl.tear,
-			    cpuhw->lsctl.dear);
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
 
 	return 0;
 }
@@ -1519,9 +1523,12 @@
 			     unsigned long long *overflow)
 {
 	unsigned long long orig_overflow, orig_flags, new_flags;
-	unsigned long i, range_scan, idx;
+	unsigned long i, range_scan, idx, idx_old;
 	struct hws_trailer_entry *te;
 
+	debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
+			    "empty %ld\n", __func__, range, aux->head,
+			    aux->alert_mark, aux->empty_mark);
 	if (range <= AUX_SDB_NUM_EMPTY(aux))
 		/*
 		 * No need to scan. All SDBs in range are marked as empty.
@@ -1544,7 +1551,7 @@
 	 * indicator fall into this range, set it.
 	 */
 	range_scan = range - AUX_SDB_NUM_EMPTY(aux);
-	idx = aux->empty_mark + 1;
+	idx_old = idx = aux->empty_mark + 1;
 	for (i = 0; i < range_scan; i++, idx++) {
 		te = aux_sdb_trailer(aux, idx);
 		do {
@@ -1564,6 +1571,9 @@
 	/* Update empty_mark to new position */
 	aux->empty_mark = aux->head + range - 1;
 
+	debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
+			    "empty %ld\n", __func__, range_scan, idx_old,
+			    idx - 1, aux->empty_mark);
 	return true;
 }
 
@@ -1585,8 +1595,9 @@
 
 	/* Inform user space new data arrived */
 	size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+	debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+			    size >> PAGE_SHIFT);
 	perf_aux_output_end(handle, size);
-	num_sdb = aux->sfb.num_sdb;
 
 	num_sdb = aux->sfb.num_sdb;
 	while (!done) {
@@ -1596,7 +1607,9 @@
 			pr_err("The AUX buffer with %lu pages for the "
 			       "diagnostic-sampling mode is full\n",
 				num_sdb);
-			debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
+			debug_sprintf_event(sfdbg, 1,
+					    "%s: AUX buffer used up\n",
+					    __func__);
 			break;
 		}
 		if (WARN_ON_ONCE(!aux))
@@ -1618,24 +1631,24 @@
 			size = range << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
 			pr_err("Sample data caused the AUX buffer with %lu "
-			       "pages to overflow\n", num_sdb);
-			debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
-					    "overflow %llx\n",
+			       "pages to overflow\n", aux->sfb.num_sdb);
+			debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
+					    "overflow %lld\n", __func__,
 					    aux->head, range, overflow);
 		} else {
 			size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
 			perf_aux_output_end(&cpuhw->handle, size);
-			debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+			debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
 					    "already full, try another\n",
+					    __func__,
 					    aux->head, aux->alert_mark);
 		}
 	}
 
 	if (done)
-		debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
-				    "[%lx -> %lx -> %lx] (%lx, %lx)\n",
-				    aux->head, aux->alert_mark, aux->empty_mark,
-				    AUX_SDB_NUM_ALERT(aux), range);
+		debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+				    "empty %ld\n", __func__, aux->head,
+				    aux->alert_mark, aux->empty_mark);
 }
 
 /*
@@ -1658,8 +1671,7 @@
 	kfree(aux->sdb_index);
 	kfree(aux);
 
-	debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
-			    "%lu SDBTs\n", num_sdbt);
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
 }
 
 static void aux_sdb_init(unsigned long sdb)
@@ -1717,7 +1729,7 @@
 	sfb = &aux->sfb;
 
 	/* Allocate sdbt_index for fast reference */
-	n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
+	n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
 	aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
 	if (!aux->sdbt_index)
 		goto no_sdbt_index;
@@ -1767,8 +1779,7 @@
 	 */
 	aux->empty_mark = sfb->num_sdb - 1;
 
-	debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
-			    " and %lu SDBs\n",
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
 			    sfb->num_sdbt, sfb->num_sdb);
 
 	return aux;
@@ -1822,9 +1833,9 @@
 	event->attr.sample_period = rate;
 	SAMPL_RATE(&event->hw) = rate;
 	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
-	debug_sprintf_event(sfdbg, 4, "cpumsf_pmu_check_period:"
-			    "cpu:%d value:%llx period:%llx freq:%d\n",
-			    event->cpu, value,
+	debug_sprintf_event(sfdbg, 4, "%s:"
+			    " cpu %d value %#llx period %#llx freq %d\n",
+			    __func__, event->cpu, value,
 			    event->attr.sample_period, do_freq);
 	return 0;
 }
@@ -1900,7 +1911,7 @@
 	if (!SAMPL_DIAG_MODE(&event->hw)) {
 		cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
 		cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
-		hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+		TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
 	}
 
 	/* Ensure sampling functions are in the disabled state.  If disabled,
@@ -2055,7 +2066,7 @@
 
 	/* Report measurement alerts only for non-PRA codes */
 	if (alert != CPU_MF_INT_SF_PRA)
-		debug_sprintf_event(sfdbg, 6, "measurement alert: %#x\n",
+		debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
 				    alert);
 
 	/* Sampling authorization change request */
@@ -2134,7 +2145,7 @@
 
 	sfb_set_limits(min, max);
 	pr_info("The sampling buffer limits have changed to: "
-		"min=%lu max=%lu (diag=x%lu)\n",
+		"min %lu max %lu (diag %lu)\n",
 		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
 	return 0;
 }
@@ -2152,7 +2163,7 @@
 static void __init pr_cpumsf_err(unsigned int reason)
 {
 	pr_err("Sampling facility support for perf is not available: "
-	       "reason=%04x\n", reason);
+	       "reason %#x\n", reason);
 }
 
 static int __init init_cpum_sampling_pmu(void)
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
index 4352a50..6e9e5d5 100644
--- a/arch/s390/kernel/perf_regs.c
+++ b/arch/s390/kernel/perf_regs.c
@@ -53,8 +53,7 @@
 }
 
 void perf_get_regs_user(struct perf_regs *regs_user,
-			struct pt_regs *regs,
-			struct pt_regs *regs_user_copy)
+			struct pt_regs *regs)
 {
 	/*
 	 * Use the regs from the first interruption and let
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 59dee9d..9a92638 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -78,10 +78,10 @@
 PGM_CHECK(do_dat_exception)		/* 3a */
 PGM_CHECK(do_dat_exception)		/* 3b */
 PGM_CHECK_DEFAULT			/* 3c */
-PGM_CHECK_DEFAULT			/* 3d */
-PGM_CHECK_DEFAULT			/* 3e */
-PGM_CHECK_DEFAULT			/* 3f */
-PGM_CHECK_DEFAULT			/* 40 */
+PGM_CHECK(do_secure_storage_access)	/* 3d */
+PGM_CHECK(do_non_secure_storage_access)	/* 3e */
+PGM_CHECK(do_secure_storage_violation)	/* 3f */
+PGM_CHECK(monitor_event_exception)	/* 40 */
 PGM_CHECK_DEFAULT			/* 41 */
 PGM_CHECK_DEFAULT			/* 42 */
 PGM_CHECK_DEFAULT			/* 43 */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 4e6299e..ec801d3 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -40,6 +40,7 @@
 #include <asm/stacktrace.h>
 #include <asm/switch_to.h>
 #include <asm/runtime_instr.h>
+#include <asm/unwind.h>
 #include "entry.h"
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -79,8 +80,8 @@
 	return 0;
 }
 
-int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
-		    unsigned long arg, struct task_struct *p, unsigned long tls)
+int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
+		unsigned long arg, struct task_struct *p, unsigned long tls)
 {
 	struct fake_frame
 	{
@@ -159,29 +160,10 @@
 	asm volatile("sfpc %0" : : "d" (0));
 }
 
-/*
- * fill in the FPU structure for a core dump.
- */
-int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
-{
-	save_fpu_regs();
-	fpregs->fpc = current->thread.fpu.fpc;
-	fpregs->pad = 0;
-	if (MACHINE_HAS_VX)
-		convert_vx_to_fp((freg_t *)&fpregs->fprs,
-				 current->thread.fpu.vxrs);
-	else
-		memcpy(&fpregs->fprs, current->thread.fpu.fprs,
-		       sizeof(fpregs->fprs));
-	return 1;
-}
-EXPORT_SYMBOL(dump_fpu);
-
 unsigned long get_wchan(struct task_struct *p)
 {
-	struct stack_frame *sf, *low, *high;
-	unsigned long return_address;
-	int count;
+	struct unwind_state state;
+	unsigned long ip = 0;
 
 	if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
 		return 0;
@@ -189,26 +171,22 @@
 	if (!try_get_task_stack(p))
 		return 0;
 
-	low = task_stack_page(p);
-	high = (struct stack_frame *) task_pt_regs(p);
-	sf = (struct stack_frame *) p->thread.ksp;
-	if (sf <= low || sf > high) {
-		return_address = 0;
-		goto out;
-	}
-	for (count = 0; count < 16; count++) {
-		sf = (struct stack_frame *)READ_ONCE_NOCHECK(sf->back_chain);
-		if (sf <= low || sf > high) {
-			return_address = 0;
-			goto out;
+	unwind_for_each_frame(&state, p, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK) {
+			ip = 0;
+			break;
 		}
-		return_address = READ_ONCE_NOCHECK(sf->gprs[8]);
-		if (!in_sched_functions(return_address))
-			goto out;
+
+		ip = unwind_get_return_address(&state);
+		if (!ip)
+			break;
+
+		if (!in_sched_functions(ip))
+			break;
 	}
-out:
+
 	put_task_stack(p);
-	return return_address;
+	return ip;
 }
 
 unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 91b9b3f..c92d04f 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -151,10 +151,35 @@
 	}
 }
 
+static void show_cpu_topology(struct seq_file *m, unsigned long n)
+{
+#ifdef CONFIG_SCHED_TOPOLOGY
+	seq_printf(m, "physical id     : %d\n", topology_physical_package_id(n));
+	seq_printf(m, "core id         : %d\n", topology_core_id(n));
+	seq_printf(m, "book id         : %d\n", topology_book_id(n));
+	seq_printf(m, "drawer id       : %d\n", topology_drawer_id(n));
+	seq_printf(m, "dedicated       : %d\n", topology_cpu_dedicated(n));
+	seq_printf(m, "address         : %d\n", smp_cpu_get_cpu_address(n));
+	seq_printf(m, "siblings        : %d\n", cpumask_weight(topology_core_cpumask(n)));
+	seq_printf(m, "cpu cores       : %d\n", topology_booted_cores(n));
+#endif /* CONFIG_SCHED_TOPOLOGY */
+}
+
+static void show_cpu_ids(struct seq_file *m, unsigned long n)
+{
+	struct cpuid *id = &per_cpu(cpu_info.cpu_id, n);
+
+	seq_printf(m, "version         : %02X\n", id->version);
+	seq_printf(m, "identification  : %06X\n", id->ident);
+	seq_printf(m, "machine         : %04X\n", id->machine);
+}
+
 static void show_cpu_mhz(struct seq_file *m, unsigned long n)
 {
 	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
 
+	if (!machine_has_cpu_mhz)
+		return;
 	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
 	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
 }
@@ -169,9 +194,9 @@
 
 	if (n == first)
 		show_cpu_summary(m, v);
-	if (!machine_has_cpu_mhz)
-		return 0;
 	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_topology(m, n);
+	show_cpu_ids(m, n);
 	show_cpu_mhz(m, n);
 	return 0;
 }
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ad74472..a76dd27 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -25,8 +25,6 @@
 #include <linux/compat.h>
 #include <trace/syscall.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/switch_to.h>
@@ -869,6 +867,9 @@
 	unsigned long mask = -1UL;
 	long ret = -1;
 
+	if (is_compat_task())
+		mask = 0xffffffff;
+
 	/*
 	 * The sysc_tracesys code in entry.S stored the system
 	 * call number to gprs[2].
@@ -882,17 +883,35 @@
 		goto skip;
 	}
 
+#ifdef CONFIG_SECCOMP
 	/* Do the secure computing check after ptrace. */
-	if (secure_computing(NULL)) {
-		/* seccomp failures shouldn't expose any additional code. */
-		goto skip;
+	if (unlikely(test_thread_flag(TIF_SECCOMP))) {
+		struct seccomp_data sd;
+
+		if (is_compat_task()) {
+			sd.instruction_pointer = regs->psw.addr & 0x7fffffff;
+			sd.arch = AUDIT_ARCH_S390;
+		} else {
+			sd.instruction_pointer = regs->psw.addr;
+			sd.arch = AUDIT_ARCH_S390X;
+		}
+
+		sd.nr = regs->int_code & 0xffff;
+		sd.args[0] = regs->orig_gpr2 & mask;
+		sd.args[1] = regs->gprs[3] & mask;
+		sd.args[2] = regs->gprs[4] & mask;
+		sd.args[3] = regs->gprs[5] & mask;
+		sd.args[4] = regs->gprs[6] & mask;
+		sd.args[5] = regs->gprs[7] & mask;
+
+		if (__secure_computing(&sd) == -1)
+			goto skip;
 	}
+#endif /* CONFIG_SECCOMP */
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->int_code & 0xffff);
 
-	if (is_compat_task())
-		mask = 0xffffffff;
 
 	audit_syscall_entry(regs->int_code & 0xffff, regs->orig_gpr2 & mask,
 			    regs->gprs[3] &mask, regs->gprs[4] &mask,
@@ -925,28 +944,14 @@
 
 static int s390_regs_get(struct task_struct *target,
 			 const struct user_regset *regset,
-			 unsigned int pos, unsigned int count,
-			 void *kbuf, void __user *ubuf)
+			 struct membuf to)
 {
+	unsigned pos;
 	if (target == current)
 		save_access_regs(target->thread.acrs);
 
-	if (kbuf) {
-		unsigned long *k = kbuf;
-		while (count > 0) {
-			*k++ = __peek_user(target, pos);
-			count -= sizeof(*k);
-			pos += sizeof(*k);
-		}
-	} else {
-		unsigned long __user *u = ubuf;
-		while (count > 0) {
-			if (__put_user(__peek_user(target, pos), u++))
-				return -EFAULT;
-			count -= sizeof(*u);
-			pos += sizeof(*u);
-		}
-	}
+	for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long))
+		membuf_store(&to, __peek_user(target, pos));
 	return 0;
 }
 
@@ -987,8 +992,8 @@
 }
 
 static int s390_fpregs_get(struct task_struct *target,
-			   const struct user_regset *regset, unsigned int pos,
-			   unsigned int count, void *kbuf, void __user *ubuf)
+			   const struct user_regset *regset,
+			   struct membuf to)
 {
 	_s390_fp_regs fp_regs;
 
@@ -998,8 +1003,7 @@
 	fp_regs.fpc = target->thread.fpu.fpc;
 	fpregs_store(&fp_regs, &target->thread.fpu);
 
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &fp_regs, 0, -1);
+	return membuf_write(&to, &fp_regs, sizeof(fp_regs));
 }
 
 static int s390_fpregs_set(struct task_struct *target,
@@ -1046,20 +1050,9 @@
 
 static int s390_last_break_get(struct task_struct *target,
 			       const struct user_regset *regset,
-			       unsigned int pos, unsigned int count,
-			       void *kbuf, void __user *ubuf)
+			       struct membuf to)
 {
-	if (count > 0) {
-		if (kbuf) {
-			unsigned long *k = kbuf;
-			*k = target->thread.last_break;
-		} else {
-			unsigned long  __user *u = ubuf;
-			if (__put_user(target->thread.last_break, u))
-				return -EFAULT;
-		}
-	}
-	return 0;
+	return membuf_store(&to, target->thread.last_break);
 }
 
 static int s390_last_break_set(struct task_struct *target,
@@ -1072,16 +1065,13 @@
 
 static int s390_tdb_get(struct task_struct *target,
 			const struct user_regset *regset,
-			unsigned int pos, unsigned int count,
-			void *kbuf, void __user *ubuf)
+			struct membuf to)
 {
 	struct pt_regs *regs = task_pt_regs(target);
-	unsigned char *data;
 
 	if (!(regs->int_code & 0x200))
 		return -ENODATA;
-	data = target->thread.trap_tdb;
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, data, 0, 256);
+	return membuf_write(&to, target->thread.trap_tdb, 256);
 }
 
 static int s390_tdb_set(struct task_struct *target,
@@ -1094,8 +1084,7 @@
 
 static int s390_vxrs_low_get(struct task_struct *target,
 			     const struct user_regset *regset,
-			     unsigned int pos, unsigned int count,
-			     void *kbuf, void __user *ubuf)
+			     struct membuf to)
 {
 	__u64 vxrs[__NUM_VXRS_LOW];
 	int i;
@@ -1106,7 +1095,7 @@
 		save_fpu_regs();
 	for (i = 0; i < __NUM_VXRS_LOW; i++)
 		vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+	return membuf_write(&to, vxrs, sizeof(vxrs));
 }
 
 static int s390_vxrs_low_set(struct task_struct *target,
@@ -1135,18 +1124,14 @@
 
 static int s390_vxrs_high_get(struct task_struct *target,
 			      const struct user_regset *regset,
-			      unsigned int pos, unsigned int count,
-			      void *kbuf, void __user *ubuf)
+			      struct membuf to)
 {
-	__vector128 vxrs[__NUM_VXRS_HIGH];
-
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
 	if (target == current)
 		save_fpu_regs();
-	memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
-
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+	return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+			    __NUM_VXRS_HIGH * sizeof(__vector128));
 }
 
 static int s390_vxrs_high_set(struct task_struct *target,
@@ -1168,12 +1153,9 @@
 
 static int s390_system_call_get(struct task_struct *target,
 				const struct user_regset *regset,
-				unsigned int pos, unsigned int count,
-				void *kbuf, void __user *ubuf)
+				struct membuf to)
 {
-	unsigned int *data = &target->thread.system_call;
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   data, 0, sizeof(unsigned int));
+	return membuf_store(&to, target->thread.system_call);
 }
 
 static int s390_system_call_set(struct task_struct *target,
@@ -1188,8 +1170,7 @@
 
 static int s390_gs_cb_get(struct task_struct *target,
 			  const struct user_regset *regset,
-			  unsigned int pos, unsigned int count,
-			  void *kbuf, void __user *ubuf)
+			  struct membuf to)
 {
 	struct gs_cb *data = target->thread.gs_cb;
 
@@ -1199,8 +1180,7 @@
 		return -ENODATA;
 	if (target == current)
 		save_gs_cb(data);
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   data, 0, sizeof(struct gs_cb));
+	return membuf_write(&to, data, sizeof(struct gs_cb));
 }
 
 static int s390_gs_cb_set(struct task_struct *target,
@@ -1244,8 +1224,7 @@
 
 static int s390_gs_bc_get(struct task_struct *target,
 			  const struct user_regset *regset,
-			  unsigned int pos, unsigned int count,
-			  void *kbuf, void __user *ubuf)
+			  struct membuf to)
 {
 	struct gs_cb *data = target->thread.gs_bc_cb;
 
@@ -1253,8 +1232,7 @@
 		return -ENODEV;
 	if (!data)
 		return -ENODATA;
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   data, 0, sizeof(struct gs_cb));
+	return membuf_write(&to, data, sizeof(struct gs_cb));
 }
 
 static int s390_gs_bc_set(struct task_struct *target,
@@ -1304,8 +1282,7 @@
 
 static int s390_runtime_instr_get(struct task_struct *target,
 				const struct user_regset *regset,
-				unsigned int pos, unsigned int count,
-				void *kbuf, void __user *ubuf)
+				struct membuf to)
 {
 	struct runtime_instr_cb *data = target->thread.ri_cb;
 
@@ -1314,8 +1291,7 @@
 	if (!data)
 		return -ENODATA;
 
-	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   data, 0, sizeof(struct runtime_instr_cb));
+	return membuf_write(&to, data, sizeof(struct runtime_instr_cb));
 }
 
 static int s390_runtime_instr_set(struct task_struct *target,
@@ -1375,7 +1351,7 @@
 		.n = sizeof(s390_regs) / sizeof(long),
 		.size = sizeof(long),
 		.align = sizeof(long),
-		.get = s390_regs_get,
+		.regset_get = s390_regs_get,
 		.set = s390_regs_set,
 	},
 	{
@@ -1383,7 +1359,7 @@
 		.n = sizeof(s390_fp_regs) / sizeof(long),
 		.size = sizeof(long),
 		.align = sizeof(long),
-		.get = s390_fpregs_get,
+		.regset_get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
 	{
@@ -1391,7 +1367,7 @@
 		.n = 1,
 		.size = sizeof(unsigned int),
 		.align = sizeof(unsigned int),
-		.get = s390_system_call_get,
+		.regset_get = s390_system_call_get,
 		.set = s390_system_call_set,
 	},
 	{
@@ -1399,7 +1375,7 @@
 		.n = 1,
 		.size = sizeof(long),
 		.align = sizeof(long),
-		.get = s390_last_break_get,
+		.regset_get = s390_last_break_get,
 		.set = s390_last_break_set,
 	},
 	{
@@ -1407,7 +1383,7 @@
 		.n = 1,
 		.size = 256,
 		.align = 1,
-		.get = s390_tdb_get,
+		.regset_get = s390_tdb_get,
 		.set = s390_tdb_set,
 	},
 	{
@@ -1415,7 +1391,7 @@
 		.n = __NUM_VXRS_LOW,
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_vxrs_low_get,
+		.regset_get = s390_vxrs_low_get,
 		.set = s390_vxrs_low_set,
 	},
 	{
@@ -1423,7 +1399,7 @@
 		.n = __NUM_VXRS_HIGH,
 		.size = sizeof(__vector128),
 		.align = sizeof(__vector128),
-		.get = s390_vxrs_high_get,
+		.regset_get = s390_vxrs_high_get,
 		.set = s390_vxrs_high_set,
 	},
 	{
@@ -1431,7 +1407,7 @@
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_gs_cb_get,
+		.regset_get = s390_gs_cb_get,
 		.set = s390_gs_cb_set,
 	},
 	{
@@ -1439,7 +1415,7 @@
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_gs_bc_get,
+		.regset_get = s390_gs_bc_get,
 		.set = s390_gs_bc_set,
 	},
 	{
@@ -1447,13 +1423,13 @@
 		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_runtime_instr_get,
+		.regset_get = s390_runtime_instr_get,
 		.set = s390_runtime_instr_set,
 	},
 };
 
 static const struct user_regset_view user_s390_view = {
-	.name = UTS_MACHINE,
+	.name = "s390x",
 	.e_machine = EM_S390,
 	.regsets = s390_regsets,
 	.n = ARRAY_SIZE(s390_regsets)
@@ -1462,28 +1438,15 @@
 #ifdef CONFIG_COMPAT
 static int s390_compat_regs_get(struct task_struct *target,
 				const struct user_regset *regset,
-				unsigned int pos, unsigned int count,
-				void *kbuf, void __user *ubuf)
+				struct membuf to)
 {
+	unsigned n;
+
 	if (target == current)
 		save_access_regs(target->thread.acrs);
 
-	if (kbuf) {
-		compat_ulong_t *k = kbuf;
-		while (count > 0) {
-			*k++ = __peek_user_compat(target, pos);
-			count -= sizeof(*k);
-			pos += sizeof(*k);
-		}
-	} else {
-		compat_ulong_t __user *u = ubuf;
-		while (count > 0) {
-			if (__put_user(__peek_user_compat(target, pos), u++))
-				return -EFAULT;
-			count -= sizeof(*u);
-			pos += sizeof(*u);
-		}
-	}
+	for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t))
+		membuf_store(&to, __peek_user_compat(target, n));
 	return 0;
 }
 
@@ -1525,29 +1488,14 @@
 
 static int s390_compat_regs_high_get(struct task_struct *target,
 				     const struct user_regset *regset,
-				     unsigned int pos, unsigned int count,
-				     void *kbuf, void __user *ubuf)
+				     struct membuf to)
 {
 	compat_ulong_t *gprs_high;
+	int i;
 
-	gprs_high = (compat_ulong_t *)
-		&task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
-	if (kbuf) {
-		compat_ulong_t *k = kbuf;
-		while (count > 0) {
-			*k++ = *gprs_high;
-			gprs_high += 2;
-			count -= sizeof(*k);
-		}
-	} else {
-		compat_ulong_t __user *u = ubuf;
-		while (count > 0) {
-			if (__put_user(*gprs_high, u++))
-				return -EFAULT;
-			gprs_high += 2;
-			count -= sizeof(*u);
-		}
-	}
+	gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs;
+	for (i = 0; i < NUM_GPRS; i++, gprs_high += 2)
+		membuf_store(&to, *gprs_high);
 	return 0;
 }
 
@@ -1586,23 +1534,11 @@
 
 static int s390_compat_last_break_get(struct task_struct *target,
 				      const struct user_regset *regset,
-				      unsigned int pos, unsigned int count,
-				      void *kbuf, void __user *ubuf)
+				      struct membuf to)
 {
-	compat_ulong_t last_break;
+	compat_ulong_t last_break = target->thread.last_break;
 
-	if (count > 0) {
-		last_break = target->thread.last_break;
-		if (kbuf) {
-			unsigned long *k = kbuf;
-			*k = last_break;
-		} else {
-			unsigned long  __user *u = ubuf;
-			if (__put_user(last_break, u))
-				return -EFAULT;
-		}
-	}
-	return 0;
+	return membuf_store(&to, (unsigned long)last_break);
 }
 
 static int s390_compat_last_break_set(struct task_struct *target,
@@ -1619,7 +1555,7 @@
 		.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
-		.get = s390_compat_regs_get,
+		.regset_get = s390_compat_regs_get,
 		.set = s390_compat_regs_set,
 	},
 	{
@@ -1627,7 +1563,7 @@
 		.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
-		.get = s390_fpregs_get,
+		.regset_get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
 	{
@@ -1635,7 +1571,7 @@
 		.n = 1,
 		.size = sizeof(compat_uint_t),
 		.align = sizeof(compat_uint_t),
-		.get = s390_system_call_get,
+		.regset_get = s390_system_call_get,
 		.set = s390_system_call_set,
 	},
 	{
@@ -1643,7 +1579,7 @@
 		.n = 1,
 		.size = sizeof(long),
 		.align = sizeof(long),
-		.get = s390_compat_last_break_get,
+		.regset_get = s390_compat_last_break_get,
 		.set = s390_compat_last_break_set,
 	},
 	{
@@ -1651,7 +1587,7 @@
 		.n = 1,
 		.size = 256,
 		.align = 1,
-		.get = s390_tdb_get,
+		.regset_get = s390_tdb_get,
 		.set = s390_tdb_set,
 	},
 	{
@@ -1659,7 +1595,7 @@
 		.n = __NUM_VXRS_LOW,
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_vxrs_low_get,
+		.regset_get = s390_vxrs_low_get,
 		.set = s390_vxrs_low_set,
 	},
 	{
@@ -1667,7 +1603,7 @@
 		.n = __NUM_VXRS_HIGH,
 		.size = sizeof(__vector128),
 		.align = sizeof(__vector128),
-		.get = s390_vxrs_high_get,
+		.regset_get = s390_vxrs_high_get,
 		.set = s390_vxrs_high_set,
 	},
 	{
@@ -1675,7 +1611,7 @@
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
 		.size = sizeof(compat_long_t),
 		.align = sizeof(compat_long_t),
-		.get = s390_compat_regs_high_get,
+		.regset_get = s390_compat_regs_high_get,
 		.set = s390_compat_regs_high_set,
 	},
 	{
@@ -1683,7 +1619,7 @@
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_gs_cb_get,
+		.regset_get = s390_gs_cb_get,
 		.set = s390_gs_cb_set,
 	},
 	{
@@ -1691,7 +1627,7 @@
 		.n = sizeof(struct gs_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_gs_bc_get,
+		.regset_get = s390_gs_bc_get,
 		.set = s390_gs_bc_set,
 	},
 	{
@@ -1699,7 +1635,7 @@
 		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
 		.size = sizeof(__u64),
 		.align = sizeof(__u64),
-		.get = s390_runtime_instr_get,
+		.regset_get = s390_runtime_instr_get,
 		.set = s390_runtime_instr_set,
 	},
 };
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f661f17..f9f8721 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -37,7 +37,7 @@
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/kernel_stat.h>
-#include <linux/dma-contiguous.h>
+#include <linux/dma-map-ops.h>
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/pfn.h>
@@ -93,10 +93,6 @@
 
 unsigned long int_hwcap = 0;
 
-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
-int __bootdata_preserved(prot_virt_guest);
-#endif
-
 int __bootdata(noexec_disabled);
 int __bootdata(memory_end_set);
 unsigned long __bootdata(memory_end);
@@ -106,12 +102,13 @@
 
 struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
 struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
-unsigned long __bootdata_preserved(__swsusp_reset_dma);
 unsigned long __bootdata_preserved(__stext_dma);
 unsigned long __bootdata_preserved(__etext_dma);
 unsigned long __bootdata_preserved(__sdma);
 unsigned long __bootdata_preserved(__edma);
 unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned int __bootdata_preserved(zlib_dfltcc_support);
+EXPORT_SYMBOL(zlib_dfltcc_support);
 
 unsigned long VMALLOC_START;
 EXPORT_SYMBOL(VMALLOC_START);
@@ -121,6 +118,7 @@
 
 struct page *vmemmap;
 EXPORT_SYMBOL(vmemmap);
+unsigned long vmemmap_size;
 
 unsigned long MODULES_VADDR;
 unsigned long MODULES_END;
@@ -130,6 +128,12 @@
 EXPORT_SYMBOL(lowcore_ptr);
 
 /*
+ * The Write Back bit position in the physaddr is given by the SLPC PCI.
+ * Leaving the mask zero always uses write through which is safe
+ */
+unsigned long mio_wb_bit_mask __ro_after_init;
+
+/*
  * This is set up by the setup-routine at boot-time
  * for S390 need to find out, what we have to setup
  * using address 0x10400 ...
@@ -242,14 +246,12 @@
 		SET_CONSOLE_SCLP;
 #endif
 	}
-	if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
-		conswitchp = &dummy_con;
 }
 
 #ifdef CONFIG_CRASH_DUMP
 static void __init setup_zfcpdump(void)
 {
-	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
+	if (!is_ipl_type_dump())
 		return;
 	if (OLDMEM_BASE)
 		return;
@@ -304,17 +306,14 @@
 void (*pm_power_off)(void) = machine_power_off;
 EXPORT_SYMBOL_GPL(pm_power_off);
 
-void *restart_stack __section(.data);
+void *restart_stack;
 
 unsigned long stack_alloc(void)
 {
 #ifdef CONFIG_VMAP_STACK
-	return (unsigned long)
-		__vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
-				     VMALLOC_START, VMALLOC_END,
-				     THREADINFO_GFP,
-				     PAGE_KERNEL, 0, NUMA_NO_NODE,
-				     __builtin_return_address(0));
+	return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
+			THREADINFO_GFP, NUMA_NO_NODE,
+			__builtin_return_address(0));
 #else
 	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 #endif
@@ -373,8 +372,12 @@
 
 static void __init setup_lowcore_dat_off(void)
 {
+	unsigned long int_psw_mask = PSW_KERNEL_BITS;
 	struct lowcore *lc;
 
+	if (IS_ENABLED(CONFIG_KASAN))
+		int_psw_mask |= PSW_MASK_DAT;
+
 	/*
 	 * Setup lowcore for boot cpu
 	 */
@@ -386,16 +389,15 @@
 
 	lc->restart_psw.mask = PSW_KERNEL_BITS;
 	lc->restart_psw.addr = (unsigned long) restart_int_handler;
-	lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
-	lc->svc_new_psw.mask = PSW_KERNEL_BITS |
-		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+	lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 	lc->svc_new_psw.addr = (unsigned long) system_call;
-	lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
 	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
 	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
-	lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
 	lc->io_new_psw.addr = (unsigned long) io_int_handler;
 	lc->clock_comparator = clock_comparator_max;
 	lc->nodat_stack = ((unsigned long) &init_thread_union)
@@ -410,7 +412,6 @@
 	memcpy(lc->alt_stfle_fac_list, S390_lowcore.alt_stfle_fac_list,
 	       sizeof(lc->alt_stfle_fac_list));
 	nmi_alloc_boot_cpu(lc);
-	vdso_alloc_boot_cpu(lc);
 	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
 	lc->async_enter_timer = S390_lowcore.async_enter_timer;
 	lc->exit_timer = S390_lowcore.exit_timer;
@@ -453,6 +454,7 @@
 	lc->br_r1_trampoline = 0x07f1;	/* br %r1 */
 	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
 	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+	lc->preempt_count = PREEMPT_DISABLED;
 
 	set_prefix((u32)(unsigned long) lc);
 	lowcore_ptr[0] = lc;
@@ -492,8 +494,9 @@
 static void __init setup_resources(void)
 {
 	struct resource *res, *std_res, *sub_res;
-	struct memblock_region *reg;
+	phys_addr_t start, end;
 	int j;
+	u64 i;
 
 	code_resource.start = (unsigned long) _text;
 	code_resource.end = (unsigned long) _etext - 1;
@@ -502,7 +505,7 @@
 	bss_resource.start = (unsigned long) __bss_start;
 	bss_resource.end = (unsigned long) __bss_stop - 1;
 
-	for_each_memblock(memory, reg) {
+	for_each_mem_range(i, &start, &end) {
 		res = memblock_alloc(sizeof(*res), 8);
 		if (!res)
 			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
@@ -510,8 +513,13 @@
 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		res->name = "System RAM";
-		res->start = reg->base;
-		res->end = reg->base + reg->size - 1;
+		res->start = start;
+		/*
+		 * In memblock, end points to the first byte after the
+		 * range while in resourses, end points to the last byte in
+		 * the range.
+		 */
+		res->end = end - 1;
 		request_resource(&iomem_resource, res);
 
 		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@@ -554,19 +562,17 @@
 	unsigned long vmax, tmp;
 
 	/* Choose kernel address space layout: 3 or 4 levels. */
-	if (IS_ENABLED(CONFIG_KASAN)) {
-		vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
-			   ? _REGION1_SIZE
-			   : _REGION2_SIZE;
-	} else {
-		tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
-		tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
-		if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
-			vmax = _REGION2_SIZE; /* 3-level kernel page table */
-		else
-			vmax = _REGION1_SIZE; /* 4-level kernel page table */
-	}
-
+	tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
+	tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+	if (tmp + vmalloc_size + MODULES_LEN <= _REGION2_SIZE)
+		vmax = _REGION2_SIZE; /* 3-level kernel page table */
+	else
+		vmax = _REGION1_SIZE; /* 4-level kernel page table */
+	if (is_prot_virt_host())
+		adjust_to_uv_max(&vmax);
+#ifdef CONFIG_KASAN
+	vmax = kasan_vmax;
+#endif
 	/* module area is at the end of the kernel address space. */
 	MODULES_END = vmax;
 	MODULES_VADDR = MODULES_END - MODULES_LEN;
@@ -585,9 +591,14 @@
 	/* Take care that memory_end is set and <= vmemmap */
 	memory_end = min(memory_end ?: max_physmem_end, (unsigned long)vmemmap);
 #ifdef CONFIG_KASAN
-	/* fit in kasan shadow memory region between 1:1 and vmemmap */
 	memory_end = min(memory_end, KASAN_SHADOW_START);
-	vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
+#endif
+	vmemmap_size = SECTION_ALIGN_UP(memory_end / PAGE_SIZE) * sizeof(struct page);
+#ifdef CONFIG_KASAN
+	/* move vmemmap above kasan shadow only if stands in a way */
+	if (KASAN_SHADOW_END > (unsigned long)vmemmap &&
+	    (unsigned long)vmemmap + vmemmap_size > KASAN_SHADOW_START)
+		vmemmap = max(vmemmap, (struct page *)KASAN_SHADOW_END);
 #endif
 	max_pfn = max_low_pfn = PFN_DOWN(memory_end);
 	memblock_remove(memory_end, ULONG_MAX);
@@ -598,9 +609,10 @@
 #ifdef CONFIG_CRASH_DUMP
 
 /*
- * When kdump is enabled, we have to ensure that no memory from
- * the area [0 - crashkernel memory size] and
- * [crashk_res.start - crashk_res.end] is set offline.
+ * When kdump is enabled, we have to ensure that no memory from the area
+ * [0 - crashkernel memory size] is set offline - it will be exchanged with
+ * the crashkernel memory region when kdump is triggered. The crashkernel
+ * memory region can never get offlined (pages are unmovable).
  */
 static int kdump_mem_notifier(struct notifier_block *nb,
 			      unsigned long action, void *data)
@@ -611,11 +623,7 @@
 		return NOTIFY_OK;
 	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
 		return NOTIFY_BAD;
-	if (arg->start_pfn > PFN_DOWN(crashk_res.end))
-		return NOTIFY_OK;
-	if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
-		return NOTIFY_OK;
-	return NOTIFY_BAD;
+	return NOTIFY_OK;
 }
 
 static struct notifier_block kdump_mem_nb = {
@@ -764,14 +772,6 @@
 		memblock_free(start, size);
 }
 
-static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size)
-{
-	memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
-		     start, start + size - 1);
-	memblock_add_range(&memblock.memory, start, size, 0, 0);
-	memblock_add_range(&memblock.physmem, start, size, 0, 0);
-}
-
 static const char * __init get_mem_info_source(void)
 {
 	switch (mem_detect.info_source) {
@@ -792,13 +792,16 @@
 	unsigned long start, end;
 	int i;
 
-	memblock_dbg("physmem info source: %s (%hhd)\n",
-		     get_mem_info_source(), mem_detect.info_source);
+	pr_debug("physmem info source: %s (%hhd)\n",
+		 get_mem_info_source(), mem_detect.info_source);
 	/* keep memblock lists close to the kernel */
 	memblock_set_bottom_up(true);
-	for_each_mem_detect_block(i, &start, &end)
+	for_each_mem_detect_block(i, &start, &end) {
+		memblock_add(start, end - start);
 		memblock_physmem_add(start, end - start);
+	}
 	memblock_set_bottom_up(false);
+	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
 	memblock_dump_all();
 }
 
@@ -832,18 +835,16 @@
 
 static void __init setup_memory(void)
 {
-	struct memblock_region *reg;
+	phys_addr_t start, end;
+	u64 i;
 
 	/*
 	 * Init storage key for present memory
 	 */
-	for_each_memblock(memory, reg) {
-		storage_key_init_range(reg->base, reg->base + reg->size);
-	}
-	psw_set_key(PAGE_DEFAULT_KEY);
+	for_each_mem_range(i, &start, &end)
+		storage_key_init_range(start, end);
 
-	/* Only cosmetics */
-	memblock_enforce_memory_limit(memblock_end_of_DRAM());
+	psw_set_key(PAGE_DEFAULT_KEY);
 }
 
 /*
@@ -1034,8 +1035,7 @@
 {
 	union diag318_info diag318_info = {
 		.cpnc = CPNC_LINUX,
-		.cpvc_linux = 0,
-		.cpvc_distro = {0},
+		.cpvc = 0,
 	};
 
 	if (!sclp.has_diag318)
@@ -1140,14 +1140,7 @@
 	free_mem_detect_info();
 	remove_oldmem();
 
-	/*
-	 * Make sure all chunks are MAX_ORDER aligned so we don't need the
-	 * extra checks that HOLES_IN_ZONE would require.
-	 *
-	 * Is this still required?
-	 */
-	memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));
-
+	setup_uv();
 	setup_memory_end();
 	setup_memory();
 	dma_contiguous_reserve(memory_end);
@@ -1191,7 +1184,7 @@
 	if (IS_ENABLED(CONFIG_EXPOLINE))
 		nospec_init_branches();
 
-	/* Setup zfcpdump support */
+	/* Setup zfcp/nvme dump support */
 	setup_zfcpdump();
 
 	/* Add system specific data to the random pool */
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index e6fca54..9e900a8 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -487,7 +487,7 @@
 					regs->gprs[2] = -EINTR;
 					break;
 				}
-			/* fallthrough */
+				fallthrough;
 			case -ERESTARTNOINTR:
 				regs->gprs[2] = regs->orig_gpr2;
 				regs->psw.addr =
@@ -514,7 +514,7 @@
 		case -ERESTART_RESTARTBLOCK:
 			/* Restart with sys_restart_syscall */
 			regs->int_code = __NR_restart_syscall;
-		/* fallthrough */
+			fallthrough;
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
@@ -535,7 +535,6 @@
 
 void do_notify_resume(struct pt_regs *regs)
 {
-	clear_thread_flag(TIF_NOTIFY_RESUME);
 	tracehook_notify_resume(regs);
 	rseq_handle_notify_resume(NULL, regs);
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8c51462..5674792 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -61,6 +61,7 @@
 	ec_schedule = 0,
 	ec_call_function_single,
 	ec_stop_cpu,
+	ec_mcck_pending,
 };
 
 enum {
@@ -145,7 +146,7 @@
 
 static inline int pcpu_stopped(struct pcpu *pcpu)
 {
-	u32 uninitialized_var(status);
+	u32 status;
 
 	if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
 			0, &status) != SIGP_CC_STATUS_STORED)
@@ -214,6 +215,7 @@
 	lc->br_r1_trampoline = 0x07f1;	/* br %r1 */
 	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
 	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+	lc->preempt_count = PREEMPT_DISABLED;
 	if (nmi_alloc_per_cpu(lc))
 		goto out_async;
 	if (vdso_alloc_per_cpu(lc))
@@ -403,6 +405,11 @@
 	return -1;
 }
 
+void schedule_mcck_handler(void)
+{
+	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+}
+
 bool notrace arch_vcpu_is_preempted(int cpu)
 {
 	if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
@@ -415,14 +422,11 @@
 
 void notrace smp_yield_cpu(int cpu)
 {
-	if (MACHINE_HAS_DIAG9C) {
-		diag_stat_inc_norecursion(DIAG_STAT_X09C);
-		asm volatile("diag %0,0,0x9c"
-			     : : "d" (pcpu_devices[cpu].address));
-	} else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
-		diag_stat_inc_norecursion(DIAG_STAT_X044);
-		asm volatile("diag 0,0,0x44");
-	}
+	if (!MACHINE_HAS_DIAG9C)
+		return;
+	diag_stat_inc_norecursion(DIAG_STAT_X09C);
+	asm volatile("diag %0,0,0x9c"
+		     : : "d" (pcpu_devices[cpu].address));
 }
 
 /*
@@ -500,6 +504,8 @@
 		scheduler_ipi();
 	if (test_bit(ec_call_function_single, &bits))
 		generic_smp_call_function_single_interrupt();
+	if (test_bit(ec_mcck_pending, &bits))
+		s390_handle_mcck();
 }
 
 static void do_ext_call_interrupt(struct ext_code ext_code,
@@ -601,14 +607,14 @@
 /*
  * Collect CPU state of the previous, crashed system.
  * There are four cases:
- * 1) standard zfcp dump
- *    condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * 1) standard zfcp/nvme dump
+ *    condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
  *    The state for all CPUs except the boot CPU needs to be collected
  *    with sigp stop-and-store-status. The boot CPU state is located in
  *    the absolute lowcore of the memory stored in the HSA. The zcore code
  *    will copy the boot CPU state from the HSA.
- * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory)
- *    condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP
+ * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
+ *    condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
  *    The state for all CPUs except the boot CPU needs to be collected
  *    with sigp stop-and-store-status. The firmware or the boot-loader
  *    stored the registers of the boot CPU in the absolute lowcore in the
@@ -655,7 +661,7 @@
 	unsigned long page;
 	bool is_boot_cpu;
 
-	if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP))
+	if (!(OLDMEM_BASE || is_ipl_type_dump()))
 		/* No previous system present, normal boot. */
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
@@ -681,7 +687,7 @@
 			/* Get the vector registers */
 			smp_save_cpu_vxrs(sa, addr, is_boot_cpu, page);
 		/*
-		 * For a zfcp dump OLDMEM_BASE == NULL and the registers
+		 * For a zfcp/nvme dump OLDMEM_BASE == NULL and the registers
 		 * of the boot CPU are stored in the HSA. To retrieve
 		 * these registers an SCLP request is required which is
 		 * done by drivers/s390/char/zcore.c:init_cpu_info()
@@ -706,6 +712,11 @@
 	return pcpu_devices[cpu].polarization;
 }
 
+int smp_cpu_get_cpu_address(int cpu)
+{
+	return pcpu_devices[cpu].address;
+}
+
 static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
 {
 	static int use_sigp_detection;
@@ -853,16 +864,16 @@
 	set_cpu_flag(CIF_ASCE_SECONDARY);
 	cpu_init();
 	rcu_cpu_starting(cpu);
-	preempt_disable();
 	init_cpu_timer();
 	vtime_init();
 	pfault_init();
-	notify_cpu_starting(smp_processor_id());
+	notify_cpu_starting(cpu);
 	if (topology_cpu_dedicated(cpu))
 		set_cpu_flag(CIF_DEDICATED_CPU);
 	else
 		clear_cpu_flag(CIF_DEDICATED_CPU);
-	set_cpu_online(smp_processor_id(), true);
+	set_cpu_online(cpu, true);
+	update_cpu_masks();
 	inc_irq_stat(CPU_RST);
 	local_irq_enable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -922,6 +933,7 @@
 	/* Handle possible pending IPIs */
 	smp_handle_ext_call();
 	set_cpu_online(smp_processor_id(), false);
+	update_cpu_masks();
 	/* Disable pseudo page faults on this cpu. */
 	pfault_fini();
 	/* Disable interrupt sources via control register. */
@@ -989,10 +1001,6 @@
 	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
 }
 
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-}
-
 void __init smp_setup_processor_id(void)
 {
 	pcpu_devices[0].address = stap();
@@ -1122,6 +1130,7 @@
 
 	return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
 }
+
 static int smp_cpu_pre_down(unsigned int cpu)
 {
 	struct device *s = &per_cpu(cpu_device, cpu)->dev;
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index f8fc4f8..7f1266c 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -9,6 +9,7 @@
 #include <linux/stacktrace.h>
 #include <asm/stacktrace.h>
 #include <asm/unwind.h>
+#include <asm/kprobes.h>
 
 void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 		     struct task_struct *task, struct pt_regs *regs)
@@ -18,7 +19,49 @@
 
 	unwind_for_each_frame(&state, task, regs, 0) {
 		addr = unwind_get_return_address(&state);
-		if (!addr || !consume_entry(cookie, addr, false))
+		if (!addr || !consume_entry(cookie, addr))
 			break;
 	}
 }
+
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack.  Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, task, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK)
+			return -EINVAL;
+
+		if (state.regs)
+			return -EINVAL;
+
+		addr = unwind_get_return_address(&state);
+		if (!addr)
+			return -EINVAL;
+
+#ifdef CONFIG_KPROBES
+		/*
+		 * Mark stacktraces with kretprobed functions on them
+		 * as unreliable.
+		 */
+		if (state.ip == (unsigned long)kretprobe_trampoline)
+			return -EINVAL;
+#endif
+
+		if (!consume_entry(cookie, addr))
+			return -EINVAL;
+	}
+
+	/* Check for stack corruption */
+	if (unwind_error(&state))
+		return -EINVAL;
+	return 0;
+}
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
deleted file mode 100644
index 75b7b30..0000000
--- a/arch/s390/kernel/suspend.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Suspend support specific for s390.
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- */
-
-#include <linux/pfn.h>
-#include <linux/suspend.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <asm/ctl_reg.h>
-#include <asm/ipl.h>
-#include <asm/cio.h>
-#include <asm/sections.h>
-#include "entry.h"
-
-/*
- * The restore of the saved pages in an hibernation image will set
- * the change and referenced bits in the storage key for each page.
- * Overindication of the referenced bits after an hibernation cycle
- * does not cause any harm but the overindication of the change bits
- * would cause trouble.
- * Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
- * page to the most significant byte of the associated page frame
- * number in the hibernation image.
- */
-
-/*
- * Key storage is allocated as a linked list of pages.
- * The size of the keys array is (PAGE_SIZE - sizeof(long))
- */
-struct page_key_data {
-	struct page_key_data *next;
-	unsigned char data[];
-};
-
-#define PAGE_KEY_DATA_SIZE	(PAGE_SIZE - sizeof(struct page_key_data *))
-
-static struct page_key_data *page_key_data;
-static struct page_key_data *page_key_rp, *page_key_wp;
-static unsigned long page_key_rx, page_key_wx;
-unsigned long suspend_zero_pages;
-
-/*
- * For each page in the hibernation image one additional byte is
- * stored in the most significant byte of the page frame number.
- * On suspend no additional memory is required but on resume the
- * keys need to be memorized until the page data has been restored.
- * Only then can the storage keys be set to their old state.
- */
-unsigned long page_key_additional_pages(unsigned long pages)
-{
-	return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
-}
-
-/*
- * Free page_key_data list of arrays.
- */
-void page_key_free(void)
-{
-	struct page_key_data *pkd;
-
-	while (page_key_data) {
-		pkd = page_key_data;
-		page_key_data = pkd->next;
-		free_page((unsigned long) pkd);
-	}
-}
-
-/*
- * Allocate page_key_data list of arrays with enough room to store
- * one byte for each page in the hibernation image.
- */
-int page_key_alloc(unsigned long pages)
-{
-	struct page_key_data *pk;
-	unsigned long size;
-
-	size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
-	while (size--) {
-		pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
-		if (!pk) {
-			page_key_free();
-			return -ENOMEM;
-		}
-		pk->next = page_key_data;
-		page_key_data = pk;
-	}
-	page_key_rp = page_key_wp = page_key_data;
-	page_key_rx = page_key_wx = 0;
-	return 0;
-}
-
-/*
- * Save the storage key into the upper 8 bits of the page frame number.
- */
-void page_key_read(unsigned long *pfn)
-{
-	struct page *page;
-	unsigned long addr;
-	unsigned char key;
-
-	page = pfn_to_page(*pfn);
-	addr = (unsigned long) page_address(page);
-	key = (unsigned char) page_get_storage_key(addr) & 0x7f;
-	if (arch_test_page_nodat(page))
-		key |= 0x80;
-	*(unsigned char *) pfn = key;
-}
-
-/*
- * Extract the storage key from the upper 8 bits of the page frame number
- * and store it in the page_key_data list of arrays.
- */
-void page_key_memorize(unsigned long *pfn)
-{
-	page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
-	*(unsigned char *) pfn = 0;
-	if (++page_key_wx < PAGE_KEY_DATA_SIZE)
-		return;
-	page_key_wp = page_key_wp->next;
-	page_key_wx = 0;
-}
-
-/*
- * Get the next key from the page_key_data list of arrays and set the
- * storage key of the page referred by @address. If @address refers to
- * a "safe" page the swsusp_arch_resume code will transfer the storage
- * key from the buffer page to the original page.
- */
-void page_key_write(void *address)
-{
-	struct page *page;
-	unsigned char key;
-
-	key = page_key_rp->data[page_key_rx];
-	page_set_storage_key((unsigned long) address, key & 0x7f, 0);
-	page = virt_to_page(address);
-	if (key & 0x80)
-		arch_set_page_nodat(page, 0);
-	else
-		arch_set_page_dat(page, 0);
-	if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
-		return;
-	page_key_rp = page_key_rp->next;
-	page_key_rx = 0;
-}
-
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
-	unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
-	unsigned long end_rodata_pfn = PFN_DOWN(__pa(__end_rodata)) - 1;
-	unsigned long stext_pfn = PFN_DOWN(__pa(_stext));
-
-	/* Always save lowcore pages (LC protection might be enabled). */
-	if (pfn <= LC_PAGES)
-		return 0;
-	if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
-		return 1;
-	/* Skip memory holes and read-only pages (DCSS, ...). */
-	if (pfn >= stext_pfn && pfn <= end_rodata_pfn)
-		return 0;
-	if (tprot(PFN_PHYS(pfn)))
-		return 1;
-	return 0;
-}
-
-/*
- * PM notifier callback for suspend
- */
-static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
-			 void *ptr)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
-		if (!suspend_zero_pages)
-			return NOTIFY_BAD;
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		free_pages(suspend_zero_pages, LC_ORDER);
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-	return NOTIFY_OK;
-}
-
-static int __init suspend_pm_init(void)
-{
-	pm_notifier(suspend_pm_cb, 0);
-	return 0;
-}
-arch_initcall(suspend_pm_init);
-
-void save_processor_state(void)
-{
-	/* swsusp_arch_suspend() actually saves all cpu register contents.
-	 * Machine checks must be disabled since swsusp_arch_suspend() stores
-	 * register contents to their lowcore save areas. That's the same
-	 * place where register contents on machine checks would be saved.
-	 * To avoid register corruption disable machine checks.
-	 * We must also disable machine checks in the new psw mask for
-	 * program checks, since swsusp_arch_suspend() may generate program
-	 * checks. Disabling machine checks for all other new psw masks is
-	 * just paranoia.
-	 */
-	local_mcck_disable();
-	/* Disable lowcore protection */
-	__ctl_clear_bit(0,28);
-	S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
-}
-
-void restore_processor_state(void)
-{
-	S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
-	S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
-	/* Enable lowcore protection */
-	__ctl_set_bit(0,28);
-	local_mcck_enable();
-}
-
-/* Called at the end of swsusp_arch_resume */
-void s390_early_resume(void)
-{
-	lgr_info_log();
-	channel_subsystem_reinit();
-	zpci_rescan();
-}
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
deleted file mode 100644
index a7baf0b..0000000
--- a/arch/s390/kernel/swsusp.S
+++ /dev/null
@@ -1,276 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * S390 64-bit swsusp implementation
- *
- * Copyright IBM Corp. 2009
- *
- * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
- *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
- */
-
-#include <linux/linkage.h>
-#include <asm/page.h>
-#include <asm/ptrace.h>
-#include <asm/thread_info.h>
-#include <asm/asm-offsets.h>
-#include <asm/nospec-insn.h>
-#include <asm/sigp.h>
-
-/*
- * Save register context in absolute 0 lowcore and call swsusp_save() to
- * create in-memory kernel image. The context is saved in the designated
- * "store status" memory locations (see POP).
- * We return from this function twice. The first time during the suspend to
- * disk process. The second time via the swsusp_arch_resume() function
- * (see below) in the resume process.
- * This function runs with disabled interrupts.
- */
-	GEN_BR_THUNK %r14
-
-	.section .text
-ENTRY(swsusp_arch_suspend)
-	lg	%r1,__LC_NODAT_STACK
-	stmg	%r6,%r15,__SF_GPRS(%r1)
-	aghi	%r1,-STACK_FRAME_OVERHEAD
-	stg	%r15,__SF_BACKCHAIN(%r1)
-	lgr	%r15,%r1
-
-	/* Store FPU registers */
-	brasl	%r14,save_fpu_regs
-
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Store prefix register on stack */
-	stpx	__SF_EMPTY(%r15)
-
-	/* Save prefix register contents for lowcore copy */
-	llgf	%r10,__SF_EMPTY(%r15)
-
-	/* Get pointer to save area */
-	lghi	%r1,0x1000
-
-	/* Save CPU address */
-	stap	__LC_EXT_CPU_ADDR(%r0)
-
-	/* Store registers */
-	mvc	0x318(4,%r1),__SF_EMPTY(%r15)	/* move prefix to lowcore */
-	stam	%a0,%a15,0x340(%r1)		/* store access registers */
-	stctg	%c0,%c15,0x380(%r1)		/* store control registers */
-	stmg	%r0,%r15,0x280(%r1)		/* store general registers */
-
-	stpt	0x328(%r1)			/* store timer */
-	stck	__SF_EMPTY(%r15)		/* store clock */
-	stckc	0x330(%r1)			/* store clock comparator */
-
-	/* Update cputime accounting before going to sleep */
-	lg	%r0,__LC_LAST_UPDATE_TIMER
-	slg	%r0,0x328(%r1)
-	alg	%r0,__LC_SYSTEM_TIMER
-	stg	%r0,__LC_SYSTEM_TIMER
-	mvc	__LC_LAST_UPDATE_TIMER(8),0x328(%r1)
-	lg	%r0,__LC_LAST_UPDATE_CLOCK
-	slg	%r0,__SF_EMPTY(%r15)
-	alg	%r0,__LC_STEAL_TIMER
-	stg	%r0,__LC_STEAL_TIMER
-	mvc	__LC_LAST_UPDATE_CLOCK(8),__SF_EMPTY(%r15)
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	/* Save absolute zero pages */
-	larl	%r2,suspend_zero_pages
-	lg	%r2,0(%r2)
-	lghi	%r4,0
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Copy lowcore to absolute zero lowcore */
-	lghi	%r2,0
-	lgr	%r4,%r10
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Save image */
-	brasl	%r14,swsusp_save
-
-	/* Restore prefix register and return */
-	lghi	%r1,0x1000
-	spx	0x318(%r1)
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	BR_EX	%r14
-ENDPROC(swsusp_arch_suspend)
-
-/*
- * Restore saved memory image to correct place and restore register context.
- * Then we return to the function that called swsusp_arch_suspend().
- * swsusp_arch_resume() runs with disabled interrupts.
- */
-ENTRY(swsusp_arch_resume)
-	stmg	%r6,%r15,__SF_GPRS(%r15)
-	lgr	%r1,%r15
-	aghi	%r15,-STACK_FRAME_OVERHEAD
-	stg	%r1,__SF_BACKCHAIN(%r15)
-
-	/* Make all free pages stable */
-	lghi	%r2,1
-	brasl	%r14,arch_set_page_states
-
-	/* Set prefix page to zero */
-	xc	__SF_EMPTY(4,%r15),__SF_EMPTY(%r15)
-	spx	__SF_EMPTY(%r15)
-
-	/* Deactivate DAT */
-	stnsm	__SF_EMPTY(%r15),0xfb
-
-	/* Restore saved image */
-	larl	%r1,restore_pblist
-	lg	%r1,0(%r1)
-	ltgr	%r1,%r1
-	jz	2f
-0:
-	lg	%r2,8(%r1)
-	lg	%r4,0(%r1)
-	iske	%r0,%r4
-	lghi	%r3,PAGE_SIZE
-	lghi	%r5,PAGE_SIZE
-1:
-	mvcle	%r2,%r4,0
-	jo	1b
-	lg	%r2,8(%r1)
-	sske	%r0,%r2
-	lg	%r1,16(%r1)
-	ltgr	%r1,%r1
-	jnz	0b
-2:
-	ptlb				/* flush tlb */
-
-	/* Reset System */
-	larl	%r1,.Lnew_pgm_check_psw
-	epsw	%r2,%r3
-	stm	%r2,%r3,0(%r1)
-	mvc	__LC_PGM_NEW_PSW(16,%r0),0(%r1)
-	larl	%r1,__swsusp_reset_dma
-	lg	%r1,0(%r1)
-	BASR_EX	%r14,%r1
-	larl	%r1,smp_cpu_mt_shift
-	icm	%r1,15,0(%r1)
-	jz	smt_done
-	llgfr	%r1,%r1
-smt_loop:
-	sigp	%r1,%r0,SIGP_SET_MULTI_THREADING
-	brc	8,smt_done			/* accepted */
-	brc	2,smt_loop			/* busy, try again */
-smt_done:
-	larl	%r1,.Lnew_pgm_check_psw
-	lpswe	0(%r1)
-pgm_check_entry:
-
-	/* Switch to original suspend CPU */
-	larl	%r1,.Lresume_cpu		/* Resume CPU address: r2 */
-	stap	0(%r1)
-	llgh	%r2,0(%r1)
-	llgh	%r1,__LC_EXT_CPU_ADDR(%r0)	/* Suspend CPU address: r1 */
-	cgr	%r1,%r2
-	je	restore_registers		/* r1 = r2 -> nothing to do */
-	larl	%r4,.Lrestart_suspend_psw	/* Set new restart PSW */
-	mvc	__LC_RST_NEW_PSW(16,%r0),0(%r4)
-3:
-	sigp	%r9,%r1,SIGP_INITIAL_CPU_RESET	/* sigp initial cpu reset */
-	brc	8,4f				/* accepted */
-	brc	2,3b				/* busy, try again */
-
-	/* Suspend CPU not available -> panic */
-	larl	%r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
-	larl	%r2,.Lpanic_string
-	brasl	%r14,sclp_early_printk_force
-	larl	%r3,.Ldisabled_wait_31
-	lpsw	0(%r3)
-4:
-	/* Switch to suspend CPU */
-	sigp	%r9,%r1,SIGP_RESTART	/* sigp restart to suspend CPU */
-	brc	2,4b			/* busy, try again */
-5:
-	sigp	%r9,%r2,SIGP_STOP	/* sigp stop to current resume CPU */
-	brc	2,5b			/* busy, try again */
-6:	j	6b
-
-restart_suspend:
-	larl	%r1,.Lresume_cpu
-	llgh	%r2,0(%r1)
-7:
-	sigp	%r9,%r2,SIGP_SENSE	/* sigp sense, wait for resume CPU */
-	brc	8,7b			/* accepted, status 0, still running */
-	brc	2,7b			/* busy, try again */
-	tmll	%r9,0x40		/* Test if resume CPU is stopped */
-	jz	7b
-
-restore_registers:
-	/* Restore registers */
-	lghi	%r13,0x1000		/* %r1 = pointer to save area */
-
-	/* Ignore time spent in suspended state. */
-	llgf	%r1,0x318(%r13)
-	stck	__LC_LAST_UPDATE_CLOCK(%r1)
-	spt	0x328(%r13)		/* reprogram timer */
-	//sckc	0x330(%r13)		/* set clock comparator */
-
-	lctlg	%c0,%c15,0x380(%r13)	/* load control registers */
-	lam	%a0,%a15,0x340(%r13)	/* load access registers */
-
-	/* Load old stack */
-	lg	%r15,0x2f8(%r13)
-
-	/* Save prefix register */
-	mvc __SF_EMPTY(4,%r15),0x318(%r13)
-
-	/* Restore absolute zero pages */
-	lghi	%r2,0
-	larl	%r4,suspend_zero_pages
-	lg	%r4,0(%r4)
-	lghi	%r3,2*PAGE_SIZE
-	lghi	%r5,2*PAGE_SIZE
-1:	mvcle	%r2,%r4,0
-	jo	1b
-
-	/* Restore prefix register */
-	spx	__SF_EMPTY(%r15)
-
-	/* Activate DAT */
-	stosm	__SF_EMPTY(%r15),0x04
-
-	/* Make all free pages unstable */
-	lghi	%r2,0
-	brasl	%r14,arch_set_page_states
-
-	/* Call arch specific early resume code */
-	brasl	%r14,s390_early_resume
-
-	/* Return 0 */
-	lmg	%r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
-	lghi	%r2,0
-	BR_EX	%r14
-ENDPROC(swsusp_arch_resume)
-
-	.section .data..nosave,"aw",@progbits
-	.align	8
-.Ldisabled_wait_31:
-	.long  0x000a0000,0x00000000
-.Lpanic_string:
-	.asciz	"Resume not possible because suspend CPU is no longer available\n"
-	.align	8
-.Lrestart_suspend_psw:
-	.quad	0x0000000180000000,restart_suspend
-.Lnew_pgm_check_psw:
-	.quad	0,pgm_check_entry
-.Lresume_cpu:
-	.byte	0,0
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 3054e9c..28c1680 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
 16   32		lchown			-				sys_lchown16
 19   common	lseek			sys_lseek			compat_sys_lseek
 20   common	getpid			sys_getpid			sys_getpid
-21   common	mount			sys_mount			compat_sys_mount
+21   common	mount			sys_mount			sys_mount
 22   common	umount			sys_oldumount			sys_oldumount
 23   32		setuid			-				sys_setuid16
 24   32		getuid			-				sys_getuid16
@@ -134,11 +134,11 @@
 142  64		select			sys_select			-
 143  common	flock			sys_flock			sys_flock
 144  common	msync			sys_msync			sys_msync
-145  common	readv			sys_readv			compat_sys_readv
-146  common	writev			sys_writev			compat_sys_writev
+145  common	readv			sys_readv			sys_readv
+146  common	writev			sys_writev			sys_writev
 147  common	getsid			sys_getsid			sys_getsid
 148  common	fdatasync		sys_fdatasync			sys_fdatasync
-149  common	_sysctl			sys_sysctl			compat_sys_sysctl
+149  common	_sysctl			-				-
 150  common	mlock			sys_mlock			sys_mlock
 151  common	munlock			sys_munlock			sys_munlock
 152  common	mlockall		sys_mlockall			sys_mlockall
@@ -316,7 +316,7 @@
 306  common	splice			sys_splice			sys_splice
 307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
 308  common	tee			sys_tee				sys_tee
-309  common	vmsplice		sys_vmsplice			compat_sys_vmsplice
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
 337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
-340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
-341  common	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
 342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
 343  common	kcmp			sys_kcmp			sys_kcmp
 344  common	finit_module		sys_finit_module		sys_finit_module
@@ -372,8 +372,8 @@
 362  common	connect			sys_connect			sys_connect
 363  common	listen			sys_listen			sys_listen
 364  common	accept4			sys_accept4			sys_accept4
-365  common	getsockopt		sys_getsockopt			compat_sys_getsockopt
-366  common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+365  common	getsockopt		sys_getsockopt			sys_getsockopt
+366  common	setsockopt		sys_setsockopt			sys_setsockopt
 367  common	getsockname		sys_getsockname			sys_getsockname
 368  common	getpeername		sys_getpeername			sys_getpeername
 369  common	sendto			sys_sendto			sys_sendto
@@ -438,3 +438,8 @@
 433  common	fspick			sys_fspick			sys_fspick
 434  common	pidfd_open		sys_pidfd_open			sys_pidfd_open
 435  common	clone3			sys_clone3			sys_clone3
+436  common	close_range		sys_close_range			sys_close_range
+437  common	openat2			sys_openat2			sys_openat2
+438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
+439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	process_madvise		sys_process_madvise		sys_process_madvise
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 11c32b2..b651745 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -41,6 +41,9 @@
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
+#include <vdso/vsyscall.h>
+#include <vdso/clocksource.h>
+#include <vdso/helpers.h>
 #include <asm/facility.h>
 #include <asm/delay.h>
 #include <asm/div64.h>
@@ -84,7 +87,7 @@
 
 	/* Initialize TOD steering parameters */
 	tod_steering_end = *(unsigned long long *) &tod_clock_base[1];
-	vdso_data->ts_end = tod_steering_end;
+	vdso_data->arch_data.tod_steering_end = tod_steering_end;
 
 	if (!test_facility(28))
 		return;
@@ -110,15 +113,6 @@
 }
 NOKPROBE_SYMBOL(sched_clock);
 
-/*
- * Monotonic_clock - returns # of nanoseconds passed since time_init()
- */
-unsigned long long monotonic_clock(void)
-{
-	return sched_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
 static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
 {
 	unsigned long long high, low, rem, sec, nsec;
@@ -246,7 +240,7 @@
 	preempt_disable(); /* protect from changes to steering parameters */
 	now = get_tod_clock();
 	adj = tod_steering_end - now;
-	if (unlikely((s64) adj >= 0))
+	if (unlikely((s64) adj > 0))
 		/*
 		 * manually steer by 1 cycle every 2^16 cycles. This
 		 * corresponds to shifting the tod delta by 15. 1s is
@@ -262,10 +256,11 @@
 	.name		= "tod",
 	.rating		= 400,
 	.read		= read_tod_clock,
-	.mask		= -1ULL,
+	.mask		= CLOCKSOURCE_MASK(64),
 	.mult		= 1000,
 	.shift		= 12,
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
 };
 
 struct clocksource * __init clocksource_default_clock(void)
@@ -273,56 +268,6 @@
 	return &clocksource_tod;
 }
 
-void update_vsyscall(struct timekeeper *tk)
-{
-	u64 nsecps;
-
-	if (tk->tkr_mono.clock != &clocksource_tod)
-		return;
-
-	/* Make userspace gettimeofday spin until we're done. */
-	++vdso_data->tb_update_count;
-	smp_wmb();
-	vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
-	vdso_data->xtime_clock_sec = tk->xtime_sec;
-	vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
-	vdso_data->wtom_clock_sec =
-		tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-	vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
-		+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
-	nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
-	while (vdso_data->wtom_clock_nsec >= nsecps) {
-		vdso_data->wtom_clock_nsec -= nsecps;
-		vdso_data->wtom_clock_sec++;
-	}
-
-	vdso_data->xtime_coarse_sec = tk->xtime_sec;
-	vdso_data->xtime_coarse_nsec =
-		(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
-	vdso_data->wtom_coarse_sec =
-		vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
-	vdso_data->wtom_coarse_nsec =
-		vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
-	while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
-		vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
-		vdso_data->wtom_coarse_sec++;
-	}
-
-	vdso_data->tk_mult = tk->tkr_mono.mult;
-	vdso_data->tk_shift = tk->tkr_mono.shift;
-	vdso_data->hrtimer_res = hrtimer_resolution;
-	smp_wmb();
-	++vdso_data->tb_update_count;
-}
-
-extern struct timezone sys_tz;
-
-void update_vsyscall_tz(void)
-{
-	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
-	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-}
-
 /*
  * Initialize the TOD clock and the CPU timer of
  * the boot cpu.
@@ -351,7 +296,7 @@
 }
 
 static DEFINE_PER_CPU(atomic_t, clock_sync_word);
-static DEFINE_MUTEX(clock_sync_mutex);
+static DEFINE_MUTEX(stp_mutex);
 static unsigned long clock_sync_flags;
 
 #define CLOCK_SYNC_HAS_STP		0
@@ -441,7 +386,6 @@
 		/* Epoch overflow */
 		tod_clock_base[0]++;
 	/* Adjust TOD steering parameters. */
-	vdso_data->tb_update_count++;
 	now = get_tod_clock();
 	adj = tod_steering_end - now;
 	if (unlikely((s64) adj >= 0))
@@ -453,9 +397,9 @@
 		panic("TOD clock sync offset %lli is too large to drift\n",
 		      tod_steering_delta);
 	tod_steering_end = now + (abs(tod_steering_delta) << 15);
-	vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1;
-	vdso_data->ts_end = tod_steering_end;
-	vdso_data->tb_update_count++;
+	vdso_data->arch_data.tod_steering_end = tod_steering_end;
+	vdso_data->arch_data.tod_steering_delta = tod_steering_delta;
+
 	/* Update LPAR offset. */
 	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
 		lpar_offset = qto.tod_epoch_difference;
@@ -502,7 +446,6 @@
 static void *stp_page;
 
 static void stp_work_fn(struct work_struct *work);
-static DEFINE_MUTEX(stp_work_mutex);
 static DECLARE_WORK(stp_work, stp_work_fn);
 static struct timer_list stp_timer;
 
@@ -612,7 +555,7 @@
 static int stp_sync_clock(void *data)
 {
 	struct clock_sync_data *sync = data;
-	unsigned long long clock_delta;
+	unsigned long long clock_delta, flags;
 	static int first;
 	int rc;
 
@@ -625,6 +568,7 @@
 		if (stp_info.todoff[0] || stp_info.todoff[1] ||
 		    stp_info.todoff[2] || stp_info.todoff[3] ||
 		    stp_info.tmd != 2) {
+			flags = vdso_update_begin();
 			rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
 					&clock_delta);
 			if (rc == 0) {
@@ -634,6 +578,7 @@
 				if (rc == 0 && stp_info.tmd != 2)
 					rc = -EAGAIN;
 			}
+			vdso_update_end(flags);
 		}
 		sync->in_sync = rc ? -EAGAIN : 1;
 		xchg(&first, 0);
@@ -653,6 +598,81 @@
 	return 0;
 }
 
+static int stp_clear_leap(void)
+{
+	struct __kernel_timex txc;
+	int ret;
+
+	memset(&txc, 0, sizeof(txc));
+
+	ret = do_adjtimex(&txc);
+	if (ret < 0)
+		return ret;
+
+	txc.modes = ADJ_STATUS;
+	txc.status &= ~(STA_INS|STA_DEL);
+	return do_adjtimex(&txc);
+}
+
+static void stp_check_leap(void)
+{
+	struct stp_stzi stzi;
+	struct stp_lsoib *lsoib = &stzi.lsoib;
+	struct __kernel_timex txc;
+	int64_t timediff;
+	int leapdiff, ret;
+
+	if (!stp_info.lu || !check_sync_clock()) {
+		/*
+		 * Either a scheduled leap second was removed by the operator,
+		 * or STP is out of sync. In both cases, clear the leap second
+		 * kernel flags.
+		 */
+		if (stp_clear_leap() < 0)
+			pr_err("failed to clear leap second flags\n");
+		return;
+	}
+
+	if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
+		pr_err("stzi failed\n");
+		return;
+	}
+
+	timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
+	leapdiff = lsoib->nlso - lsoib->also;
+
+	if (leapdiff != 1 && leapdiff != -1) {
+		pr_err("Cannot schedule %d leap seconds\n", leapdiff);
+		return;
+	}
+
+	if (timediff < 0) {
+		if (stp_clear_leap() < 0)
+			pr_err("failed to clear leap second flags\n");
+	} else if (timediff < 7200) {
+		memset(&txc, 0, sizeof(txc));
+		ret = do_adjtimex(&txc);
+		if (ret < 0)
+			return;
+
+		txc.modes = ADJ_STATUS;
+		if (leapdiff > 0)
+			txc.status |= STA_INS;
+		else
+			txc.status |= STA_DEL;
+		ret = do_adjtimex(&txc);
+		if (ret < 0)
+			pr_err("failed to set leap second flags\n");
+		/* arm Timer to clear leap second flags */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+	} else {
+		/* The day the leap second is scheduled for hasn't been reached. Retry
+		 * in one hour.
+		 */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+	}
+}
+
 /*
  * STP work. Check for the STP state and take over the clock
  * synchronization if the STP clock source is usable.
@@ -663,7 +683,7 @@
 	int rc;
 
 	/* prevent multiple execution. */
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 
 	if (!stp_online) {
 		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
@@ -671,7 +691,7 @@
 		goto out_unlock;
 	}
 
-	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0, NULL);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL);
 	if (rc)
 		goto out_unlock;
 
@@ -680,24 +700,25 @@
 		goto out_unlock;
 
 	/* Skip synchronization if the clock is already in sync. */
-	if (check_sync_clock())
-		goto out_unlock;
-
-	memset(&stp_sync, 0, sizeof(stp_sync));
-	cpus_read_lock();
-	atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
-	stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
-	cpus_read_unlock();
+	if (!check_sync_clock()) {
+		memset(&stp_sync, 0, sizeof(stp_sync));
+		cpus_read_lock();
+		atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+		stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+		cpus_read_unlock();
+	}
 
 	if (!check_sync_clock())
 		/*
 		 * There is a usable clock but the synchonization failed.
 		 * Retry after a second.
 		 */
-		mod_timer(&stp_timer, jiffies + HZ);
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+	else if (stp_info.lu)
+		stp_check_leap();
 
 out_unlock:
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 }
 
 /*
@@ -708,151 +729,178 @@
 	.dev_name	= "stp",
 };
 
-static ssize_t stp_ctn_id_show(struct device *dev,
+static ssize_t ctn_id_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
 		ret = sprintf(buf, "%016llx\n",
 			      *(unsigned long long *) stp_info.ctnid);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+static DEVICE_ATTR_RO(ctn_id);
 
-static ssize_t stp_ctn_type_show(struct device *dev,
+static ssize_t ctn_type_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
 		ret = sprintf(buf, "%i\n", stp_info.ctn);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+static DEVICE_ATTR_RO(ctn_type);
 
-static ssize_t stp_dst_offset_show(struct device *dev,
+static ssize_t dst_offset_show(struct device *dev,
 				   struct device_attribute *attr,
 				   char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x2000))
 		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+static DEVICE_ATTR_RO(dst_offset);
 
-static ssize_t stp_leap_seconds_show(struct device *dev,
+static ssize_t leap_seconds_show(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x8000))
 		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+static DEVICE_ATTR_RO(leap_seconds);
 
-static ssize_t stp_stratum_show(struct device *dev,
+static ssize_t leap_seconds_scheduled_show(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct stp_stzi stzi;
+	ssize_t ret;
+
+	mutex_lock(&stp_mutex);
+	if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) {
+		mutex_unlock(&stp_mutex);
+		return -ENODATA;
+	}
+
+	ret = chsc_stzi(stp_page, &stzi, sizeof(stzi));
+	mutex_unlock(&stp_mutex);
+	if (ret < 0)
+		return ret;
+
+	if (!stzi.lsoib.p)
+		return sprintf(buf, "0,0\n");
+
+	return sprintf(buf, "%llu,%d\n",
+		       tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+		       stzi.lsoib.nlso - stzi.lsoib.also);
+}
+
+static DEVICE_ATTR_RO(leap_seconds_scheduled);
+
+static ssize_t stratum_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
 		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(stratum, 0400, stp_stratum_show, NULL);
+static DEVICE_ATTR_RO(stratum);
 
-static ssize_t stp_time_offset_show(struct device *dev,
+static ssize_t time_offset_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x0800))
 		ret = sprintf(buf, "%i\n", (int) stp_info.tto);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+static DEVICE_ATTR_RO(time_offset);
 
-static ssize_t stp_time_zone_offset_show(struct device *dev,
+static ssize_t time_zone_offset_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid() && (stp_info.vbits & 0x4000))
 		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(time_zone_offset, 0400,
-			 stp_time_zone_offset_show, NULL);
+static DEVICE_ATTR_RO(time_zone_offset);
 
-static ssize_t stp_timing_mode_show(struct device *dev,
+static ssize_t timing_mode_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
 		ret = sprintf(buf, "%i\n", stp_info.tmd);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+static DEVICE_ATTR_RO(timing_mode);
 
-static ssize_t stp_timing_state_show(struct device *dev,
+static ssize_t timing_state_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	ssize_t ret = -ENODATA;
 
-	mutex_lock(&stp_work_mutex);
+	mutex_lock(&stp_mutex);
 	if (stpinfo_valid())
 		ret = sprintf(buf, "%i\n", stp_info.tst);
-	mutex_unlock(&stp_work_mutex);
+	mutex_unlock(&stp_mutex);
 	return ret;
 }
 
-static DEVICE_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+static DEVICE_ATTR_RO(timing_state);
 
-static ssize_t stp_online_show(struct device *dev,
+static ssize_t online_show(struct device *dev,
 				struct device_attribute *attr,
 				char *buf)
 {
 	return sprintf(buf, "%i\n", stp_online);
 }
 
-static ssize_t stp_online_store(struct device *dev,
+static ssize_t online_store(struct device *dev,
 				struct device_attribute *attr,
 				const char *buf, size_t count)
 {
@@ -863,14 +911,14 @@
 		return -EINVAL;
 	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
 		return -EOPNOTSUPP;
-	mutex_lock(&clock_sync_mutex);
+	mutex_lock(&stp_mutex);
 	stp_online = value;
 	if (stp_online)
 		set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
 	else
 		clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
 	queue_work(time_sync_wq, &stp_work);
-	mutex_unlock(&clock_sync_mutex);
+	mutex_unlock(&stp_mutex);
 	return count;
 }
 
@@ -878,18 +926,15 @@
  * Can't use DEVICE_ATTR because the attribute should be named
  * stp/online but dev_attr_online already exists in this file ..
  */
-static struct device_attribute dev_attr_stp_online = {
-	.attr = { .name = "online", .mode = 0600 },
-	.show	= stp_online_show,
-	.store	= stp_online_store,
-};
+static DEVICE_ATTR_RW(online);
 
 static struct device_attribute *stp_attributes[] = {
 	&dev_attr_ctn_id,
 	&dev_attr_ctn_type,
 	&dev_attr_dst_offset,
 	&dev_attr_leap_seconds,
-	&dev_attr_stp_online,
+	&dev_attr_online,
+	&dev_attr_leap_seconds_scheduled,
 	&dev_attr_stratum,
 	&dev_attr_time_offset,
 	&dev_attr_time_zone_offset,
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 3627953..ca47141 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -26,7 +26,6 @@
 #include <linux/nodemask.h>
 #include <linux/node.h>
 #include <asm/sysinfo.h>
-#include <asm/numa.h>
 
 #define PTF_HORIZONTAL	(0UL)
 #define PTF_VERTICAL	(1UL)
@@ -63,8 +62,6 @@
 struct cpu_topology_s390 cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
 
-cpumask_t cpus_with_topology;
-
 static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
 {
 	cpumask_t mask;
@@ -86,11 +83,12 @@
 		cpumask_copy(&mask, cpu_present_mask);
 		break;
 	default:
-		/* fallthrough */
+		fallthrough;
 	case TOPOLOGY_MODE_SINGLE:
 		cpumask_copy(&mask, cpumask_of(cpu));
 		break;
 	}
+	cpumask_and(&mask, &mask, cpu_online_mask);
 	return mask;
 }
 
@@ -106,6 +104,7 @@
 	for (i = 0; i <= smp_cpu_mtid; i++)
 		if (cpu_present(cpu + i))
 			cpumask_set_cpu(cpu + i, &mask);
+	cpumask_and(&mask, &mask, cpu_online_mask);
 	return mask;
 }
 
@@ -138,7 +137,6 @@
 			cpumask_set_cpu(lcpu + i, &drawer->mask);
 			cpumask_set_cpu(lcpu + i, &book->mask);
 			cpumask_set_cpu(lcpu + i, &socket->mask);
-			cpumask_set_cpu(lcpu + i, &cpus_with_topology);
 			smp_cpu_set_polarization(lcpu + i, tl_core->pp);
 		}
 	}
@@ -245,10 +243,10 @@
 	return rc;
 }
 
-static void update_cpu_masks(void)
+void update_cpu_masks(void)
 {
-	struct cpu_topology_s390 *topo;
-	int cpu, id;
+	struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
+	int cpu, sibling, pkg_first, smt_first, id;
 
 	for_each_possible_cpu(cpu) {
 		topo = &cpu_topology[cpu];
@@ -256,6 +254,7 @@
 		topo->core_mask = cpu_group_map(&socket_info, cpu);
 		topo->book_mask = cpu_group_map(&book_info, cpu);
 		topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+		topo->booted_cores = 0;
 		if (topology_mode != TOPOLOGY_MODE_HW) {
 			id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
 			topo->thread_id = cpu;
@@ -263,11 +262,23 @@
 			topo->socket_id = id;
 			topo->book_id = id;
 			topo->drawer_id = id;
-			if (cpu_present(cpu))
-				cpumask_set_cpu(cpu, &cpus_with_topology);
 		}
 	}
-	numa_update_cpu_topology();
+	for_each_online_cpu(cpu) {
+		topo = &cpu_topology[cpu];
+		pkg_first = cpumask_first(&topo->core_mask);
+		topo_package = &cpu_topology[pkg_first];
+		if (cpu == pkg_first) {
+			for_each_cpu(sibling, &topo->core_mask) {
+				topo_sibling = &cpu_topology[sibling];
+				smt_first = cpumask_first(&topo_sibling->thread_mask);
+				if (sibling == smt_first)
+					topo_package->booted_cores++;
+			}
+		} else {
+			topo->booted_cores = topo_package->booted_cores;
+		}
+	}
 }
 
 void store_topology(struct sysinfo_15_1_x *info)
@@ -289,7 +300,6 @@
 	int rc = 0;
 
 	mutex_lock(&smp_cpu_state_mutex);
-	cpumask_clear(&cpus_with_topology);
 	if (MACHINE_HAS_TOPOLOGY) {
 		rc = 1;
 		store_topology(info);
@@ -346,9 +356,9 @@
 static void set_topology_timer(void)
 {
 	if (atomic_add_unless(&topology_poll, -1, 0))
-		mod_timer(&topology_timer, jiffies + HZ / 10);
+		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
 	else
-		mod_timer(&topology_timer, jiffies + HZ * 60);
+		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
 }
 
 void topology_expect_change(void)
@@ -584,7 +594,7 @@
 early_param("topology", topology_setup);
 
 static int topology_ctl_handler(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int enabled = topology_is_enabled();
 	int new_mode;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 164c028..16934fa 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -50,16 +50,8 @@
         } else {
                 const struct exception_table_entry *fixup;
 		fixup = s390_search_extables(regs->psw.addr);
-                if (fixup)
-			regs->psw.addr = extable_fixup(fixup);
-		else {
-			enum bug_trap_type btt;
-
-			btt = report_bug(regs->psw.addr, regs);
-			if (btt == BUG_TRAP_TYPE_WARN)
-				return;
+		if (!fixup || !ex_handle(fixup, regs))
 			die(regs, str);
-		}
         }
 }
 
@@ -245,6 +237,27 @@
 	do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
 }
 
+void monitor_event_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	if (user_mode(regs))
+		return;
+
+	switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
+	case BUG_TRAP_TYPE_NONE:
+		fixup = s390_search_extables(regs->psw.addr);
+		if (fixup)
+			ex_handle(fixup, regs);
+		break;
+	case BUG_TRAP_TYPE_WARN:
+		break;
+	case BUG_TRAP_TYPE_BUG:
+		die(regs, "monitor event");
+		break;
+	}
+}
+
 void kernel_stack_overflow(struct pt_regs *regs)
 {
 	bust_spinlocks(1);
@@ -255,8 +268,25 @@
 }
 NOKPROBE_SYMBOL(kernel_stack_overflow);
 
+static void __init test_monitor_call(void)
+{
+	int val = 1;
+
+	if (!IS_ENABLED(CONFIG_BUG))
+		return;
+	asm volatile(
+		"	mc	0,0\n"
+		"0:	xgr	%0,%0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (val));
+	if (!val)
+		panic("Monitor call doesn't work!\n");
+}
+
 void __init trap_init(void)
 {
 	sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
 	local_mcck_enable();
+	test_monitor_call();
 }
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 6e609b1..707fd99 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -36,6 +36,19 @@
 	return true;
 }
 
+static inline bool is_final_pt_regs(struct unwind_state *state,
+				    struct pt_regs *regs)
+{
+	/* user mode or kernel thread pt_regs at the bottom of task stack */
+	if (task_pt_regs(state->task) == regs)
+		return true;
+
+	/* user mode pt_regs at the bottom of irq stack */
+	return state->stack_info.type == STACK_TYPE_IRQ &&
+	       state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+	       READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
+}
+
 bool unwind_next_frame(struct unwind_state *state)
 {
 	struct stack_info *info = &state->stack_info;
@@ -46,16 +59,7 @@
 
 	regs = state->regs;
 	if (unlikely(regs)) {
-		if (state->reuse_sp) {
-			sp = state->sp;
-			state->reuse_sp = false;
-		} else {
-			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
-			if (unlikely(outside_of_stack(state, sp))) {
-				if (!update_stack_info(state, sp))
-					goto out_err;
-			}
-		}
+		sp = state->sp;
 		sf = (struct stack_frame *) sp;
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
 		reliable = false;
@@ -81,21 +85,25 @@
 			/* No back-chain, look for a pt_regs structure */
 			sp = state->sp + STACK_FRAME_OVERHEAD;
 			if (!on_stack(info, sp, sizeof(struct pt_regs)))
-				goto out_stop;
+				goto out_err;
 			regs = (struct pt_regs *) sp;
-			if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
+			if (is_final_pt_regs(state, regs))
 				goto out_stop;
 			ip = READ_ONCE_NOCHECK(regs->psw.addr);
+			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
 			reliable = true;
 		}
 	}
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Decode any ftrace redirection */
-	if (ip == (unsigned long) return_to_handler)
-		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-					   ip, (void *) sp);
-#endif
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto out_err;
+
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
 
 	/* Update unwind state */
 	state->sp = sp;
@@ -113,13 +121,11 @@
 EXPORT_SYMBOL_GPL(unwind_next_frame);
 
 void __unwind_start(struct unwind_state *state, struct task_struct *task,
-		    struct pt_regs *regs, unsigned long sp)
+		    struct pt_regs *regs, unsigned long first_frame)
 {
 	struct stack_info *info = &state->stack_info;
-	unsigned long *mask = &state->stack_mask;
-	bool reliable, reuse_sp;
 	struct stack_frame *sf;
-	unsigned long ip;
+	unsigned long ip, sp;
 
 	memset(state, 0, sizeof(*state));
 	state->task = task;
@@ -131,38 +137,46 @@
 		return;
 	}
 
+	/* Get the instruction pointer from pt_regs or the stack frame */
+	if (regs) {
+		ip = regs->psw.addr;
+		sp = regs->gprs[15];
+	} else if (task == current) {
+		sp = current_frame_address();
+	} else {
+		sp = task->thread.ksp;
+	}
+
 	/* Get current stack pointer and initialize stack info */
-	if (get_stack_info(sp, task, info, mask) != 0 ||
-	    !on_stack(info, sp, sizeof(struct stack_frame))) {
+	if (!update_stack_info(state, sp)) {
 		/* Something is wrong with the stack pointer */
 		info->type = STACK_TYPE_UNKNOWN;
 		state->error = true;
 		return;
 	}
 
-	/* Get the instruction pointer from pt_regs or the stack frame */
-	if (regs) {
-		ip = READ_ONCE_NOCHECK(regs->psw.addr);
-		reliable = true;
-		reuse_sp = true;
-	} else {
-		sf = (struct stack_frame *) sp;
+	if (!regs) {
+		/* Stack frame is within valid stack */
+		sf = (struct stack_frame *)sp;
 		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
-		reliable = false;
-		reuse_sp = false;
 	}
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	/* Decode any ftrace redirection */
-	if (ip == (unsigned long) return_to_handler)
-		ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
-					   ip, NULL);
-#endif
+	ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
 
 	/* Update unwind state */
 	state->sp = sp;
 	state->ip = ip;
-	state->reliable = reliable;
-	state->reuse_sp = reuse_sp;
+	state->reliable = true;
+
+	if (!first_frame)
+		return;
+	/* Skip through the call chain to the specified starting frame */
+	while (!unwind_done(state)) {
+		if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+			if (state->sp >= first_frame)
+				break;
+		}
+		unwind_next_frame(state);
+	}
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 0000000..c811b23
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2020
+ */
+#define KMSG_COMPONENT "prot_virt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
+struct uv_info __bootdata_preserved(uv_info);
+
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+EXPORT_SYMBOL(prot_virt_host);
+EXPORT_SYMBOL(uv_info);
+
+static int __init uv_init(unsigned long stor_base, unsigned long stor_len)
+{
+	struct uv_cb_init uvcb = {
+		.header.cmd = UVC_CMD_INIT_UV,
+		.header.len = sizeof(uvcb),
+		.stor_origin = stor_base,
+		.stor_len = stor_len,
+	};
+
+	if (uv_call(0, (uint64_t)&uvcb)) {
+		pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+		       uvcb.header.rc, uvcb.header.rrc);
+		return -1;
+	}
+	return 0;
+}
+
+void __init setup_uv(void)
+{
+	unsigned long uv_stor_base;
+
+	/*
+	 * keep these conditions in line with kasan init code has_uv_sec_stor_limit()
+	 */
+	if (!is_prot_virt_host())
+		return;
+
+	if (is_prot_virt_guest()) {
+		prot_virt_host = 0;
+		pr_warn("Protected virtualization not available in protected guests.");
+		return;
+	}
+
+	if (!test_facility(158)) {
+		prot_virt_host = 0;
+		pr_warn("Protected virtualization not supported by the hardware.");
+		return;
+	}
+
+	uv_stor_base = (unsigned long)memblock_alloc_try_nid(
+		uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+		MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+	if (!uv_stor_base) {
+		pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+			uv_info.uv_base_stor_len);
+		goto fail;
+	}
+
+	if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
+		memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+		goto fail;
+	}
+
+	pr_info("Reserving %luMB as ultravisor base storage\n",
+		uv_info.uv_base_stor_len >> 20);
+	return;
+fail:
+	pr_info("Disabling support for protected virtualization");
+	prot_virt_host = 0;
+}
+
+void adjust_to_uv_max(unsigned long *vmax)
+{
+	if (uv_info.max_sec_stor_addr)
+		*vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+static int uv_pin_shared(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr,
+	};
+
+	if (uv_call(0, (u64)&uvcb))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Requests the Ultravisor to destroy a guest page and make it
+ * accessible to the host. The destroy clears the page instead of
+ * exporting.
+ *
+ * @paddr: Absolute host address of page to be destroyed
+ */
+int uv_destroy_page(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_DESTR_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr
+	};
+
+	if (uv_call(0, (u64)&uvcb)) {
+		/*
+		 * Older firmware uses 107/d as an indication of a non secure
+		 * page. Let us emulate the newer variant (no-op).
+		 */
+		if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd)
+			return 0;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr
+	};
+
+	if (uv_call(0, (u64)&uvcb))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+	int res;
+
+	res = page_mapcount(page);
+	if (PageSwapCache(page)) {
+		res++;
+	} else if (page_mapping(page)) {
+		res++;
+		if (page_has_private(page))
+			res++;
+	}
+	return res;
+}
+
+static int make_secure_pte(pte_t *ptep, unsigned long addr,
+			   struct page *exp_page, struct uv_cb_header *uvcb)
+{
+	pte_t entry = READ_ONCE(*ptep);
+	struct page *page;
+	int expected, rc = 0;
+
+	if (!pte_present(entry))
+		return -ENXIO;
+	if (pte_val(entry) & _PAGE_INVALID)
+		return -ENXIO;
+
+	page = pte_page(entry);
+	if (page != exp_page)
+		return -ENXIO;
+	if (PageWriteback(page))
+		return -EAGAIN;
+	expected = expected_page_refs(page);
+	if (!page_ref_freeze(page, expected))
+		return -EBUSY;
+	set_bit(PG_arch_1, &page->flags);
+	rc = uv_call(0, (u64)uvcb);
+	page_ref_unfreeze(page, expected);
+	/* Return -ENXIO if the page was not mapped, -EINVAL otherwise */
+	if (rc)
+		rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+	return rc;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+	struct vm_area_struct *vma;
+	bool local_drain = false;
+	spinlock_t *ptelock;
+	unsigned long uaddr;
+	struct page *page;
+	pte_t *ptep;
+	int rc;
+
+again:
+	rc = -EFAULT;
+	mmap_read_lock(gmap->mm);
+
+	uaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(uaddr))
+		goto out;
+	vma = find_vma(gmap->mm, uaddr);
+	if (!vma)
+		goto out;
+	/*
+	 * Secure pages cannot be huge and userspace should not combine both.
+	 * In case userspace does it anyway this will result in an -EFAULT for
+	 * the unpack. The guest is thus never reaching secure mode. If
+	 * userspace is playing dirty tricky with mapping huge pages later
+	 * on this will result in a segmentation fault.
+	 */
+	if (is_vm_hugetlb_page(vma))
+		goto out;
+
+	rc = -ENXIO;
+	page = follow_page(vma, uaddr, FOLL_WRITE);
+	if (IS_ERR_OR_NULL(page))
+		goto out;
+
+	lock_page(page);
+	ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+	rc = make_secure_pte(ptep, uaddr, page, uvcb);
+	pte_unmap_unlock(ptep, ptelock);
+	unlock_page(page);
+out:
+	mmap_read_unlock(gmap->mm);
+
+	if (rc == -EAGAIN) {
+		wait_on_page_writeback(page);
+	} else if (rc == -EBUSY) {
+		/*
+		 * If we have tried a local drain and the page refcount
+		 * still does not match our expected safe value, try with a
+		 * system wide drain. This is needed if the pagevecs holding
+		 * the page are on a different CPU.
+		 */
+		if (local_drain) {
+			lru_add_drain_all();
+			/* We give up here, and let the caller try again */
+			return -EAGAIN;
+		}
+		/*
+		 * We are here if the page refcount does not match the
+		 * expected safe value. The main culprits are usually
+		 * pagevecs. With lru_add_drain() we drain the pagevecs
+		 * on the local CPU so that hopefully the refcount will
+		 * reach the expected safe value.
+		 */
+		lru_add_drain();
+		local_drain = true;
+		/* And now we try again immediately after draining */
+		goto again;
+	} else if (rc == -ENXIO) {
+		if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+			return -EFAULT;
+		return -EAGAIN;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+	struct uv_cb_cts uvcb = {
+		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.guest_handle = gmap->guest_handle,
+		.gaddr = gaddr,
+	};
+
+	return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+	int rc = 0;
+
+	/* Hugepage cannot be protected, so nothing to do */
+	if (PageHuge(page))
+		return 0;
+
+	/*
+	 * PG_arch_1 is used in 3 places:
+	 * 1. for kernel page tables during early boot
+	 * 2. for storage keys of huge pages and KVM
+	 * 3. As an indication that this page might be secure. This can
+	 *    overindicate, e.g. we set the bit before calling
+	 *    convert_to_secure.
+	 * As secure pages are never huge, all 3 variants can co-exists.
+	 */
+	if (!test_bit(PG_arch_1, &page->flags))
+		return 0;
+
+	rc = uv_pin_shared(page_to_phys(page));
+	if (!rc) {
+		clear_bit(PG_arch_1, &page->flags);
+		return 0;
+	}
+
+	rc = uv_convert_from_secure(page_to_phys(page));
+	if (!rc) {
+		clear_bit(PG_arch_1, &page->flags);
+		return 0;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	return scnprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",
+			uv_info.inst_calls_list[0],
+			uv_info.inst_calls_list[1],
+			uv_info.inst_calls_list[2],
+			uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+	__ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_feature_indications(struct kobject *kobj,
+					    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications);
+}
+
+static struct kobj_attribute uv_query_feature_indications_attr =
+	__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return scnprintf(page, PAGE_SIZE, "%d\n",
+			uv_info.max_guest_cpu_id + 1);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+	__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *page)
+{
+	return scnprintf(page, PAGE_SIZE, "%d\n",
+			uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+	__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return scnprintf(page, PAGE_SIZE, "%lx\n",
+			uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+	__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+	&uv_query_facilities_attr.attr,
+	&uv_query_feature_indications_attr.attr,
+	&uv_query_max_guest_cpus_attr.attr,
+	&uv_query_max_guest_vms_attr.attr,
+	&uv_query_max_guest_addr_attr.attr,
+	NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+	.attrs = uv_query_attrs,
+};
+
+static struct kset *uv_query_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!test_facility(158))
+		return 0;
+
+	uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+	if (!uv_kobj)
+		return -ENOMEM;
+
+	uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+	if (!uv_query_kset)
+		goto out_kobj;
+
+	rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+	if (!rc)
+		return 0;
+
+	kset_unregister(uv_query_kset);
+out_kobj:
+	kobject_del(uv_kobj);
+	kobject_put(uv_kobj);
+	return rc;
+}
+device_initcall(uv_info_init);
+#endif
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ed1fc08..f9da5b1 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -20,8 +20,9 @@
 #include <linux/security.h>
 #include <linux/memblock.h>
 #include <linux/compat.h>
+#include <linux/binfmts.h>
+#include <vdso/datapage.h>
 #include <asm/asm-offsets.h>
-#include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
@@ -29,13 +30,6 @@
 #include <asm/vdso.h>
 #include <asm/facility.h>
 
-#ifdef CONFIG_COMPAT_VDSO
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
-static unsigned int vdso32_pages;
-static struct page **vdso32_pagelist;
-#endif
-
 extern char vdso64_start, vdso64_end;
 static void *vdso64_kbase = &vdso64_start;
 static unsigned int vdso64_pages;
@@ -55,12 +49,6 @@
 
 	vdso_pagelist = vdso64_pagelist;
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	if (vma->vm_mm->context.compat_mm) {
-		vdso_pagelist = vdso32_pagelist;
-		vdso_pages = vdso32_pages;
-	}
-#endif
 
 	if (vmf->pgoff >= vdso_pages)
 		return VM_FAULT_SIGBUS;
@@ -76,10 +64,6 @@
 	unsigned long vdso_pages;
 
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	if (vma->vm_mm->context.compat_mm)
-		vdso_pages = vdso32_pages;
-#endif
 
 	if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
 		return -EINVAL;
@@ -114,35 +98,12 @@
 	struct vdso_data	data;
 	u8			page[PAGE_SIZE];
 } vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
-
-/*
- * Setup vdso data page.
- */
-static void __init vdso_init_data(struct vdso_data *vd)
-{
-	vd->ectg_available = test_facility(31);
-}
-
+struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data;
 /*
  * Allocate/free per cpu vdso data.
  */
 #define SEGMENT_ORDER	2
 
-/*
- * The initial vdso_data structure for the boot CPU. Eventually
- * it is replaced with a properly allocated structure in vdso_init.
- * This is necessary because a valid S390_lowcore.vdso_per_cpu_data
- * pointer is required to be able to return from an interrupt or
- * program check. See the exit paths in entry.S.
- */
-struct vdso_data boot_vdso_data __initdata;
-
-void __init vdso_alloc_boot_cpu(struct lowcore *lowcore)
-{
-	lowcore->vdso_per_cpu_data = (unsigned long) &boot_vdso_data;
-}
-
 int vdso_alloc_per_cpu(struct lowcore *lowcore)
 {
 	unsigned long segment_table, page_table, page_frame;
@@ -209,12 +170,10 @@
 	if (!vdso_enabled)
 		return 0;
 
+	if (is_compat_task())
+		return 0;
+
 	vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
-	mm->context.compat_mm = is_compat_task();
-	if (mm->context.compat_mm)
-		vdso_pages = vdso32_pages;
-#endif
 	/*
 	 * vDSO has a problem and was disabled, just don't "enable" it for
 	 * the process
@@ -227,7 +186,7 @@
 	 * it at vdso_base which is the "natural" base for it, but we might
 	 * fail and end up putting it elsewhere.
 	 */
-	if (down_write_killable(&mm->mmap_sem))
+	if (mmap_write_lock_killable(mm))
 		return -EINTR;
 	vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
 	if (IS_ERR_VALUE(vdso_base)) {
@@ -258,7 +217,7 @@
 	rc = 0;
 
 out_up:
-	up_write(&mm->mmap_sem);
+	mmap_write_unlock(mm);
 	return rc;
 }
 
@@ -266,25 +225,6 @@
 {
 	int i;
 
-	vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT_VDSO
-	/* Calculate the size of the 32 bit vDSO */
-	vdso32_pages = ((&vdso32_end - &vdso32_start
-			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
-	/* Make sure pages are in the correct state */
-	vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *),
-				  GFP_KERNEL);
-	BUG_ON(vdso32_pagelist == NULL);
-	for (i = 0; i < vdso32_pages - 1; i++) {
-		struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
-		get_page(pg);
-		vdso32_pagelist[i] = pg;
-	}
-	vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
-	vdso32_pagelist[vdso32_pages] = NULL;
-#endif
-
 	/* Calculate the size of the 64 bit vDSO */
 	vdso64_pages = ((&vdso64_end - &vdso64_start
 			 + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
deleted file mode 100644
index e45fba9..0000000
--- a/arch/s390/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
deleted file mode 100644
index aee9ffb..0000000
--- a/arch/s390/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
-
-KCOV_INSTRUMENT := n
-
-obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
-
-# Build rules
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
-
-KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_31 += -m31 -s
-
-KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
-		    -Wl,--hash-style=both
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
-	$(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
-	$(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S FORCE
-	$(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
-quiet_cmd_vdso32as = VDSO32A $@
-      cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
-      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
-	@mkdir -p $(MODLIB)/vdso
-	$(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
deleted file mode 100644
index eaf9cf1..0000000
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_getres
-	.type  __kernel_clock_getres,@function
-__kernel_clock_getres:
-	CFI_STARTPROC
-	basr	%r1,0
-	la	%r1,4f-.(%r1)
-	chi	%r2,__CLOCK_REALTIME
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC
-	je	0f
-	la	%r1,5f-4f(%r1)
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	0f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	jne	3f
-0:	ltr	%r3,%r3
-	jz	2f				/* res == NULL */
-1:	l	%r0,0(%r1)
-	xc	0(4,%r3),0(%r3)			/* set tp->tv_sec to zero */
-	st	%r0,4(%r3)			/* store tp->tv_usec */
-2:	lhi	%r2,0
-	br	%r14
-3:	lhi	%r1,__NR_clock_getres		/* fallback to svc */
-	svc	0
-	br	%r14
-	CFI_ENDPROC
-4:	.long	__CLOCK_REALTIME_RES
-5:	.long	__CLOCK_COARSE_RES
-	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
deleted file mode 100644
index ada5c11..0000000
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_gettime
-	.type  __kernel_clock_gettime,@function
-__kernel_clock_gettime:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,21f-0b(%r5)			/* get &_vdso_data */
-	chi	%r2,__CLOCK_REALTIME_COARSE
-	je	10f
-	chi	%r2,__CLOCK_REALTIME
-	je	11f
-	chi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	9f
-	chi	%r2,__CLOCK_MONOTONIC
-	jne	19f
-
-	/* CLOCK_MONOTONIC */
-1:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,1(%r15)
-	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,2f
-	ahi	%r0,-1
-2:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	3f
-	a	%r0,__VDSO_TK_MULT(%r5)
-3:	alr	%r0,%r2
-	al	%r0,__VDSO_WTOM_NSEC(%r5)
-	al	%r1,__VDSO_WTOM_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_WTOM_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,20f-6b(%r5)
-	jl	8f
-7:	ahi	%r2,1
-	sl	%r1,20f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CLOCK_MONOTONIC_COARSE */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	9b
-	l	%r2,__VDSO_WTOM_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	9b
-	j	8b
-
-	/* CLOCK_REALTIME_COARSE */
-10:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	10b
-	l	%r2,__VDSO_XTIME_CRS_SEC+4(%r5)
-	l	%r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	10b
-	j	17f
-
-	/* CLOCK_REALTIME */
-11:	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	11b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)			/* no - ts_steering_end */
-	sl	%r1,5(%r15)
-	brc	3,22f
-	ahi	%r0,-1
-22:	ltr	%r0,%r0				/* past end of steering? */
-	jm	24f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	23f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	23f
-	ahi	%r0,-1
-23:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,25f
-	ahi	%r0,1
-	j	25f
-24:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-25:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,12f
-	ahi	%r0,-1
-12:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	lr	%r2,%r0
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	13f
-	a	%r0,__VDSO_TK_MULT(%r5)
-13:	alr	%r0,%r2
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,14f
-	ahi	%r0,1
-14:	l	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r2)			/*  >> tk->shift */
-	l	%r2,__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	11b
-	basr	%r5,0
-15:	ltr	%r0,%r0
-	jnz	16f
-	cl	%r1,20f-15b(%r5)
-	jl	17f
-16:	ahi	%r2,1
-	sl	%r1,20f-15b(%r5)
-	brc	3,15b
-	ahi	%r0,-1
-	j	15b
-17:	st	%r2,0(%r3)			/* store tp->tv_sec */
-	st	%r1,4(%r3)			/* store tp->tv_nsec */
-	lhi	%r2,0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* Fallback to system call */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-19:	lhi	%r1,__NR_clock_gettime
-	svc	0
-	ahi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-
-20:	.long	1000000000
-21:	.long	_vdso_data - 0b
-	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
deleted file mode 100644
index 25515f3..0000000
--- a/arch/s390/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of getcpu() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2016
- *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_getcpu
-	.type  __kernel_getcpu,@function
-__kernel_getcpu:
-	CFI_STARTPROC
-	la	%r4,0
-	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
-	sacf	0
-	ltr	%r2,%r2
-	jz	2f
-	st	%r5,0(%r2)
-2:	ltr	%r3,%r3
-	jz	3f
-	st	%r4,0(%r3)
-3:	lhi	%r2,0
-	br	%r14
-	CFI_ENDPROC
-	.size	__kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index b23063f..0000000
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_gettimeofday
-	.type  __kernel_gettimeofday,@function
-__kernel_gettimeofday:
-	CFI_STARTPROC
-	ahi	%r15,-16
-	CFI_ADJUST_CFA_OFFSET 16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	basr	%r5,0
-0:	al	%r5,13f-0b(%r5)			/* get &_vdso_data */
-1:	ltr	%r3,%r3				/* check if tz is NULL */
-	je	2f
-	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
-2:	ltr	%r2,%r2				/* check if tv is NULL */
-	je	10f
-	l	%r4,__VDSO_UPD_COUNT+4(%r5)	/* load update counter */
-	tml	%r4,0x0001			/* pending update ? loop */
-	jnz	1b
-	stcke	0(%r15)				/* Store TOD clock */
-	lm	%r0,%r1,__VDSO_TS_END(%r5)	/* TOD steering end time */
-	s	%r0,1(%r15)
-	sl	%r1,5(%r15)
-	brc	3,14f
-	ahi	%r0,-1
-14:	ltr	%r0,%r0				/* past end of steering? */
-	jm	16f
-	srdl	%r0,15				/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	15f
-	lcr	%r0,%r0				/* negative TOD offset */
-	lcr	%r1,%r1
-	je	15f
-	ahi	%r0,-1
-15:	a	%r0,1(%r15)			/* add TOD timestamp */
-	al	%r1,5(%r15)
-	brc	12,17f
-	ahi	%r0,1
-	j	17f
-16:	lm	%r0,%r1,1(%r15)			/* load TOD timestamp */
-17:	s	%r0,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	sl	%r1,__VDSO_XTIME_STAMP+4(%r5)
-	brc	3,3f
-	ahi	%r0,-1
-3:	ms	%r0,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	st	%r0,0(%r15)
-	l	%r0,__VDSO_TK_MULT(%r5)
-	ltr	%r1,%r1
-	mr	%r0,%r0
-	jnm	4f
-	a	%r0,__VDSO_TK_MULT(%r5)
-4:	al	%r0,0(%r15)
-	al	%r0,__VDSO_XTIME_NSEC(%r5)	/*  + xtime */
-	al	%r1,__VDSO_XTIME_NSEC+4(%r5)
-	brc	12,5f
-	ahi	%r0,1
-5:	mvc	0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
-	cl	%r4,__VDSO_UPD_COUNT+4(%r5)	/* check update counter */
-	jne	1b
-	l	%r4,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srdl	%r0,0(%r4)			/*  >> tk->shift */
-	l	%r4,0(%r15)			/* get tv_sec from stack */
-	basr	%r5,0
-6:	ltr	%r0,%r0
-	jnz	7f
-	cl	%r1,11f-6b(%r5)
-	jl	8f
-7:	ahi	%r4,1
-	sl	%r1,11f-6b(%r5)
-	brc	3,6b
-	ahi	%r0,-1
-	j	6b
-8:	st	%r4,0(%r2)			/* store tv->tv_sec */
-	ltr	%r1,%r1
-	m	%r0,12f-6b(%r5)
-	jnm	9f
-	al	%r0,12f-6b(%r5)
-9:	srl	%r0,6
-	st	%r0,4(%r2)			/* store tv->tv_usec */
-10:	slr	%r2,%r2
-	ahi	%r15,16
-	CFI_ADJUST_CFA_OFFSET -16
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-11:	.long	1000000000
-12:	.long	274877907
-13:	.long	_vdso_data - 0b
-	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
deleted file mode 100644
index db19d06..0000000
--- a/arch/s390/kernel/vdso32/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
-	.long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 721c495..0000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
-
-SECTIONS
-{
-	. = VDSO32_LBASE + SIZEOF_HEADERS;
-
-	.hash		: { *(.hash) }			:text
-	.gnu.hash	: { *(.gnu.hash) }
-	.dynsym		: { *(.dynsym) }
-	.dynstr		: { *(.dynstr) }
-	.gnu.version	: { *(.gnu.version) }
-	.gnu.version_d	: { *(.gnu.version_d) }
-	.gnu.version_r	: { *(.gnu.version_r) }
-
-	.note		: { *(.note.*) }		:text	:note
-
-	. = ALIGN(16);
-	.text		: {
-		*(.text .stub .text.* .gnu.linkonce.t.*)
-	} :text
-	PROVIDE(__etext = .);
-	PROVIDE(_etext = .);
-	PROVIDE(etext = .);
-
-	/*
-	 * Other stuff is appended to the text segment:
-	 */
-	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
-	.rodata1	: { *(.rodata1) }
-
-	.dynamic	: { *(.dynamic) }		:text	:dynamic
-
-	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
-	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
-	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
-	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
-	.got ALIGN(8)	: { *(.got .toc) }
-
-	_end = .;
-	PROVIDE(end = .);
-
-	/*
-	 * Stabs debugging sections are here too.
-	 */
-	.stab	       0 : { *(.stab) }
-	.stabstr       0 : { *(.stabstr) }
-	.stab.excl     0 : { *(.stab.excl) }
-	.stab.exclstr  0 : { *(.stab.exclstr) }
-	.stab.index    0 : { *(.stab.index) }
-	.stab.indexstr 0 : { *(.stab.indexstr) }
-	.comment       0 : { *(.comment) }
-
-	/*
-	 * DWARF debug sections.
-	 * Symbols in the DWARF debugging sections are relative to the
-	 * beginning of the section so we begin them at 0.
-	 */
-	/* DWARF 1 */
-	.debug		0 : { *(.debug) }
-	.line		0 : { *(.line) }
-	/* GNU DWARF 1 extensions */
-	.debug_srcinfo	0 : { *(.debug_srcinfo) }
-	.debug_sfnames	0 : { *(.debug_sfnames) }
-	/* DWARF 1.1 and DWARF 2 */
-	.debug_aranges	0 : { *(.debug_aranges) }
-	.debug_pubnames 0 : { *(.debug_pubnames) }
-	/* DWARF 2 */
-	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
-	.debug_abbrev	0 : { *(.debug_abbrev) }
-	.debug_line	0 : { *(.debug_line) }
-	.debug_frame	0 : { *(.debug_frame) }
-	.debug_str	0 : { *(.debug_str) }
-	.debug_loc	0 : { *(.debug_loc) }
-	.debug_macinfo	0 : { *(.debug_macinfo) }
-	/* SGI/MIPS DWARF 2 extensions */
-	.debug_weaknames 0 : { *(.debug_weaknames) }
-	.debug_funcnames 0 : { *(.debug_funcnames) }
-	.debug_typenames 0 : { *(.debug_typenames) }
-	.debug_varnames  0 : { *(.debug_varnames) }
-	/* DWARF 3 */
-	.debug_pubtypes 0 : { *(.debug_pubtypes) }
-	.debug_ranges	0 : { *(.debug_ranges) }
-	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
-	. = ALIGN(PAGE_SIZE);
-	PROVIDE(_vdso_data = .);
-
-	/DISCARD/	: {
-		*(.note.GNU-stack)
-		*(.branch_lt)
-		*(.data .data.* .gnu.linkonce.d.* .sdata*)
-		*(.bss .sbss .dynbss .dynsbss)
-	}
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME	0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
-	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
-	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
-	note		PT_NOTE FLAGS(4);		/* PF_R */
-	eh_frame_hdr	PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
-	VDSO_VERSION_STRING {
-	global:
-		/*
-		 * Has to be there for the kernel to find
-		 */
-		__kernel_gettimeofday;
-		__kernel_clock_gettime;
-		__kernel_clock_getres;
-		__kernel_getcpu;
-
-	local: *;
-	};
-}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index de2fb93..0000000
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
-	__PAGE_ALIGNED_DATA
-
-	.globl vdso32_start, vdso32_end
-	.balign PAGE_SIZE
-vdso32_start:
-	.incbin "arch/s390/kernel/vdso32/vdso32.so"
-	.balign PAGE_SIZE
-vdso32_end:
-
-	.previous
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
index 3fd18cf..4ec8068 100644
--- a/arch/s390/kernel/vdso64/.gitignore
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 4a66a1c..13cc5a3 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -1,17 +1,23 @@
 # SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
+# List of files in the vdso
 
 KCOV_INSTRUMENT := n
+ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
+ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
 
-obj-vdso64 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
+include $(srctree)/lib/vdso/Makefile
+obj-vdso64 = vdso_user_wrapper.o note.o getcpu.o
+obj-cvdso64 = vdso64_generic.o
+CFLAGS_REMOVE_vdso64_generic.o = -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE)
 
 # Build rules
 
-targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
+targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
 obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64))
 
 KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
 
 KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
 KBUILD_AFLAGS_64 += -m64 -s
@@ -19,13 +25,13 @@
 KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
 KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
 ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \
-	     --hash-style=both --build-id -T
+	     --hash-style=both --build-id=sha1 -T
 
 $(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
 $(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
 
 obj-y += vdso64_wrapper.o
-extra-y += vdso64.lds
+targets += vdso64.lds
 CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
 
 # Disable gcov profiling, ubsan and kasan for VDSO code
@@ -37,7 +43,7 @@
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
 	$(call if_changed,ld)
 
 # strip rule for the .so file
@@ -49,9 +55,14 @@
 $(obj-vdso64): %.o: %.S FORCE
 	$(call if_changed_dep,vdso64as)
 
+$(obj-cvdso64): %.o: %.c FORCE
+	$(call if_changed_dep,vdso64cc)
+
 # actual build commands
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso64cc = VDSO64C $@
+      cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
 
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
deleted file mode 100644
index 0c79caa..0000000
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_getres
-	.type  __kernel_clock_getres,@function
-__kernel_clock_getres:
-	CFI_STARTPROC
-	larl	%r1,3f
-	lg	%r0,0(%r1)
-	cghi	%r2,__CLOCK_REALTIME_COARSE
-	je	0f
-	cghi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	0f
-	larl	%r1,_vdso_data
-	llgf	%r0,__VDSO_CLOCK_REALTIME_RES(%r1)
-	cghi	%r2,__CLOCK_REALTIME
-	je	0f
-	cghi	%r2,__CLOCK_MONOTONIC
-	je	0f
-	cghi	%r2,__CLOCK_THREAD_CPUTIME_ID
-	je	0f
-	cghi	%r2,-2		/* Per-thread CPUCLOCK with PID=0, VIRT=1 */
-	jne	2f
-	larl	%r5,_vdso_data
-	icm	%r0,15,__LC_ECTG_OK(%r5)
-	jz	2f
-0:	ltgr	%r3,%r3
-	jz	1f				/* res == NULL */
-	xc	0(8,%r3),0(%r3)			/* set tp->tv_sec to zero */
-	stg	%r0,8(%r3)			/* store tp->tv_usec */
-1:	lghi	%r2,0
-	br	%r14
-2:	lghi	%r1,__NR_clock_getres		/* fallback to svc */
-	svc	0
-	br	%r14
-	CFI_ENDPROC
-3:	.quad	__CLOCK_COARSE_RES
-	.size	__kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
deleted file mode 100644
index 9d2ee79..0000000
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ /dev/null
@@ -1,163 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_clock_gettime
-	.type  __kernel_clock_gettime,@function
-__kernel_clock_gettime:
-	CFI_STARTPROC
-	aghi	%r15,-16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	larl	%r5,_vdso_data
-	cghi	%r2,__CLOCK_REALTIME_COARSE
-	je	4f
-	cghi	%r2,__CLOCK_REALTIME
-	je	5f
-	cghi	%r2,-3		/* Per-thread CPUCLOCK with PID=0, VIRT=1 */
-	je	9f
-	cghi	%r2,__CLOCK_MONOTONIC_COARSE
-	je	3f
-	cghi	%r2,__CLOCK_MONOTONIC
-	jne	12f
-
-	/* CLOCK_MONOTONIC */
-0:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	0b
-	stcke	0(%r15)				/* Store TOD clock */
-	lgf	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	lg	%r0,__VDSO_WTOM_SEC(%r5)
-	lg	%r1,1(%r15)
-	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	alg	%r1,__VDSO_WTOM_NSEC(%r5)
-	srlg	%r1,%r1,0(%r2)			/*  >> tk->shift */
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	0b
-	larl	%r5,13f
-1:	clg	%r1,0(%r5)
-	jl	2f
-	slg	%r1,0(%r5)
-	aghi	%r0,1
-	j	1b
-2:	stg	%r0,0(%r3)			/* store tp->tv_sec */
-	stg	%r1,8(%r3)			/* store tp->tv_nsec */
-	lghi	%r2,0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CLOCK_MONOTONIC_COARSE */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-3:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	3b
-	lg	%r0,__VDSO_WTOM_CRS_SEC(%r5)
-	lg	%r1,__VDSO_WTOM_CRS_NSEC(%r5)
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	3b
-	j	2b
-
-	/* CLOCK_REALTIME_COARSE */
-4:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	4b
-	lg	%r0,__VDSO_XTIME_CRS_SEC(%r5)
-	lg	%r1,__VDSO_XTIME_CRS_NSEC(%r5)
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	4b
-	j	7f
-
-	/* CLOCK_REALTIME */
-5:	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	5b
-	stcke	0(%r15)				/* Store TOD clock */
-	lg	%r1,1(%r15)
-	lg	%r0,__VDSO_TS_END(%r5)		/* TOD steering end time */
-	slgr	%r0,%r1				/* now - ts_steering_end */
-	ltgr	%r0,%r0				/* past end of steering ? */
-	jm	17f
-	srlg	%r0,%r0,15			/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	18f
-	lcgr	%r0,%r0				/* negative TOD offset */
-18:	algr	%r1,%r0				/* add steering offset */
-17:	lgf	%r2,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	alg	%r1,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	srlg	%r1,%r1,0(%r2)			/*  >> tk->shift */
-	lg	%r0,__VDSO_XTIME_SEC(%r5)	/* tk->xtime_sec */
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	5b
-	larl	%r5,13f
-6:	clg	%r1,0(%r5)
-	jl	7f
-	slg	%r1,0(%r5)
-	aghi	%r0,1
-	j	6b
-7:	stg	%r0,0(%r3)			/* store tp->tv_sec */
-	stg	%r1,8(%r3)			/* store tp->tv_nsec */
-	lghi	%r2,0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* CPUCLOCK_VIRT for this thread */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9:	lghi	%r4,0
-	icm	%r0,15,__VDSO_ECTG_OK(%r5)
-	jz	12f
-	sacf	256				/* Magic ectg instruction */
-	.insn	ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
-	sacf	0
-	algr	%r1,%r0				/* r1 = cputime as TOD value */
-	mghi	%r1,1000			/* convert to nanoseconds */
-	srlg	%r1,%r1,12			/* r1 = cputime in nanosec */
-	lgr	%r4,%r1
-	larl	%r5,13f
-	srlg	%r1,%r1,9			/* divide by 1000000000 */
-	mlg	%r0,8(%r5)
-	srlg	%r0,%r0,11			/* r0 = tv_sec */
-	stg	%r0,0(%r3)
-	msg	%r0,0(%r5)			/* calculate tv_nsec */
-	slgr	%r4,%r0				/* r4 = tv_nsec */
-	stg	%r4,8(%r3)
-	lghi	%r2,0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-
-	/* Fallback to system call */
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-12:	lghi	%r1,__NR_clock_gettime
-	svc	0
-	aghi	%r15,16
-	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-
-13:	.quad	1000000000
-14:	.quad	19342813113834067
-	.size	__kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
index 2446e9d..3c04f73 100644
--- a/arch/s390/kernel/vdso64/getcpu.S
+++ b/arch/s390/kernel/vdso64/getcpu.S
@@ -16,10 +16,8 @@
 	.type  __kernel_getcpu,@function
 __kernel_getcpu:
 	CFI_STARTPROC
-	la	%r4,0
 	sacf	256
-	l	%r5,__VDSO_CPU_NR(%r4)
-	l	%r4,__VDSO_NODE_ID(%r4)
+	lm	%r4,%r5,__VDSO_GETCPU_VAL(%r0)
 	sacf	0
 	ltgr	%r2,%r2
 	jz	2f
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index aebe10d..0000000
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,71 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * s390 kernel for use in the vDSO
- *
- *  Copyright IBM Corp. 2008
- *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
-	.text
-	.align 4
-	.globl __kernel_gettimeofday
-	.type  __kernel_gettimeofday,@function
-__kernel_gettimeofday:
-	CFI_STARTPROC
-	aghi	%r15,-16
-	CFI_ADJUST_CFA_OFFSET 16
-	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-	larl	%r5,_vdso_data
-0:	ltgr	%r3,%r3				/* check if tz is NULL */
-	je	1f
-	mvc	0(8,%r3),__VDSO_TIMEZONE(%r5)
-1:	ltgr	%r2,%r2				/* check if tv is NULL */
-	je	4f
-	lg	%r4,__VDSO_UPD_COUNT(%r5)	/* load update counter */
-	tmll	%r4,0x0001			/* pending update ? loop */
-	jnz	0b
-	stcke	0(%r15)				/* Store TOD clock */
-	lg	%r1,1(%r15)
-	lg	%r0,__VDSO_TS_END(%r5)		/* TOD steering end time */
-	slgr	%r0,%r1				/* now - ts_steering_end */
-	ltgr	%r0,%r0				/* past end of steering ? */
-	jm	6f
-	srlg	%r0,%r0,15			/* 1 per 2^16 */
-	tm	__VDSO_TS_DIR+3(%r5),0x01	/* steering direction? */
-	jz	7f
-	lcgr	%r0,%r0				/* negative TOD offset */
-7:	algr	%r1,%r0				/* add steering offset */
-6:	sg	%r1,__VDSO_XTIME_STAMP(%r5)	/* TOD - cycle_last */
-	msgf	%r1,__VDSO_TK_MULT(%r5)		/*  * tk->mult */
-	alg	%r1,__VDSO_XTIME_NSEC(%r5)	/*  + tk->xtime_nsec */
-	lg	%r0,__VDSO_XTIME_SEC(%r5)	/* tk->xtime_sec */
-	clg	%r4,__VDSO_UPD_COUNT(%r5)	/* check update counter */
-	jne	0b
-	lgf	%r5,__VDSO_TK_SHIFT(%r5)	/* Timekeeper shift */
-	srlg	%r1,%r1,0(%r5)			/*  >> tk->shift */
-	larl	%r5,5f
-2:	clg	%r1,0(%r5)
-	jl	3f
-	slg	%r1,0(%r5)
-	aghi	%r0,1
-	j	2b
-3:	stg	%r0,0(%r2)			/* store tv->tv_sec */
-	slgr	%r0,%r0				/* tv_nsec -> tv_usec */
-	ml	%r0,8(%r5)
-	srlg	%r0,%r0,6
-	stg	%r0,8(%r2)			/* store tv->tv_usec */
-4:	lghi	%r2,0
-	aghi	%r15,16
-	CFI_ADJUST_CFA_OFFSET -16
-	CFI_RESTORE 15
-	br	%r14
-	CFI_ENDPROC
-5:	.quad	1000000000
-	.long	274877907
-	.size	__kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c
new file mode 100644
index 0000000..a8cef7e
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_generic.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/vdso/gettimeofday.c"
+
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv,
+			     struct timezone *tz)
+{
+	return __cvdso_gettimeofday(tv, tz);
+}
+
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+	return __cvdso_clock_gettime(clock, ts);
+}
+
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts)
+{
+	return __cvdso_clock_getres(clock, ts);
+}
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
new file mode 100644
index 0000000..a775d7e
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/vdso.h>
+#include <asm/unistd.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
+
+/*
+ * Older glibc version called vdso without allocating a stackframe. This wrapper
+ * is just used to allocate a stackframe. See
+ * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e
+ * for details.
+ */
+.macro vdso_func func
+	.globl __kernel_\func
+	.type  __kernel_\func,@function
+	.align 8
+__kernel_\func:
+	CFI_STARTPROC
+	aghi	%r15,-WRAPPER_FRAME_SIZE
+	CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
+	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+	stg	%r14,STACK_FRAME_OVERHEAD(%r15)
+	brasl	%r14,__s390_vdso_\func
+	lg	%r14,STACK_FRAME_OVERHEAD(%r15)
+	aghi	%r15,WRAPPER_FRAME_SIZE
+	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+	CFI_RESTORE 15
+	br	%r14
+	CFI_ENDPROC
+	.size	__kernel_\func,.-__kernel_\func
+.endm
+
+vdso_func gettimeofday
+vdso_func clock_getres
+vdso_func clock_gettime
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 7e0eb40..177ccfb 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -15,6 +15,8 @@
 /* Handle ro_after_init data on our own. */
 #define RO_AFTER_INIT_DATA
 
+#define EMITS_PT_NOTE
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/vmlinux.lds.h>
 
@@ -50,11 +52,7 @@
 		_etext = .;		/* End of text section */
 	} :text = 0x0700
 
-	NOTES :text :note
-
-	.dummy : { *(.dummy) } :data
-
-	RO_DATA_SECTION(PAGE_SIZE)
+	RO_DATA(PAGE_SIZE)
 
 	. = ALIGN(PAGE_SIZE);
 	_sdata = .;		/* Start of data section */
@@ -64,12 +62,12 @@
 	.data..ro_after_init : {
 		 *(.data..ro_after_init)
 		JUMP_TABLE_DATA
-	}
+	} :data
 	EXCEPTION_TABLE(16)
 	. = ALIGN(PAGE_SIZE);
 	__end_ro_after_init = .;
 
-	RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+	RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
 	BOOT_DATA_PRESERVED
 
 	_edata = .;		/* End of data section */
@@ -183,6 +181,7 @@
 	/* Debugging sections.	*/
 	STABS_DEBUG
 	DWARF_DEBUG
+	ELF_DETAILS
 
 	/* Sections to be discarded */
 	DISCARDS
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 6e60cc2..579ec3a 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -248,9 +248,9 @@
 }
 EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
 
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
 __attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
 
 /*
  * Sorted add to a list. List is linear searched until first bigger
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index d3db3d7..67a8e77 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -7,7 +7,7 @@
 menuconfig VIRTUALIZATION
 	def_bool y
 	prompt "KVM"
-	---help---
+	help
 	  Say Y here to get to see options for using your Linux host to run other
 	  operating systems inside virtual machines (guests).
 	  This option alone does not add any kernel code.
@@ -33,7 +33,7 @@
 	select HAVE_KVM_NO_POLL
 	select SRCU
 	select KVM_VFIO
-	---help---
+	help
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
 	  on any 64bit machine.
@@ -49,14 +49,10 @@
 config KVM_S390_UCONTROL
 	bool "Userspace controlled virtual machines"
 	depends on KVM
-	---help---
+	help
 	  Allow CAP_SYS_ADMIN users to create KVM virtual machines that are
 	  controlled by userspace.
 
 	  If unsure, say N.
 
-# OK, it's a little counter-intuitive to do this, but it puts it neatly under
-# the virtualization menu.
-source "drivers/vhost/Kconfig"
-
 endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 05ee90a..12decca 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -9,6 +9,6 @@
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
+kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
 
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 45634b3..5b8ec1c 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -2,7 +2,7 @@
 /*
  * handling diagnose instructions
  *
- * Copyright IBM Corp. 2008, 2011
+ * Copyright IBM Corp. 2008, 2020
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -10,7 +10,6 @@
 
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
-#include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/virtio-ccw.h>
 #include "kvm-s390.h"
@@ -158,14 +157,28 @@
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	vcpu->stat.diagnose_9c++;
-	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d", tid);
 
+	/* yield to self */
 	if (tid == vcpu->vcpu_id)
-		return 0;
+		goto no_yield;
 
+	/* yield to invalid */
 	tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
-	if (tcpu)
-		kvm_vcpu_yield_to(tcpu);
+	if (!tcpu)
+		goto no_yield;
+
+	/* target already running */
+	if (READ_ONCE(tcpu->cpu) >= 0)
+		goto no_yield;
+
+	if (kvm_vcpu_yield_to(tcpu) <= 0)
+		goto no_yield;
+
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: done", tid);
+	return 0;
+no_yield:
+	VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid);
+	vcpu->stat.diagnose_9c_ignored++;
 	return 0;
 }
 
@@ -187,6 +200,10 @@
 		return -EOPNOTSUPP;
 	}
 
+	/*
+	 * no need to check the return value of vcpu_stop as it can only have
+	 * an error for protvirt, but protvirt means user cpu state
+	 */
 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
 		kvm_s390_vcpu_stop(vcpu);
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 07d30ff..b9f85b2 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -9,8 +9,8 @@
 #include <linux/vmalloc.h>
 #include <linux/mm_types.h>
 #include <linux/err.h>
+#include <linux/pgtable.h>
 
-#include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -505,7 +505,7 @@
 		switch (prot) {
 		case PROT_TYPE_IEP:
 			tec->b61 = 1;
-			/* FALL THROUGH */
+			fallthrough;
 		case PROT_TYPE_LA:
 			tec->b56 = 1;
 			break;
@@ -514,12 +514,12 @@
 			break;
 		case PROT_TYPE_ALC:
 			tec->b60 = 1;
-			/* FALL THROUGH */
+			fallthrough;
 		case PROT_TYPE_DAT:
 			tec->b61 = 1;
 			break;
 		}
-		/* FALL THROUGH */
+		fallthrough;
 	case PGM_ASCE_TYPE:
 	case PGM_PAGE_TRANSLATION:
 	case PGM_REGION_FIRST_TRANS:
@@ -534,7 +534,7 @@
 		tec->addr = gva >> PAGE_SHIFT;
 		tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
 		tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
-		/* FALL THROUGH */
+		fallthrough;
 	case PGM_ALEN_TRANSLATION:
 	case PGM_ALE_SEQUENCE:
 	case PGM_ASTE_VALIDITY:
@@ -677,7 +677,7 @@
 			dat_protection |= rfte.p;
 		ptr = rfte.rto * PAGE_SIZE + vaddr.rsx * 8;
 	}
-		/* fallthrough */
+		fallthrough;
 	case ASCE_TYPE_REGION2: {
 		union region2_table_entry rste;
 
@@ -695,7 +695,7 @@
 			dat_protection |= rste.p;
 		ptr = rste.rto * PAGE_SIZE + vaddr.rtx * 8;
 	}
-		/* fallthrough */
+		fallthrough;
 	case ASCE_TYPE_REGION3: {
 		union region3_table_entry rtte;
 
@@ -723,7 +723,7 @@
 			dat_protection |= rtte.fc0.p;
 		ptr = rtte.fc0.sto * PAGE_SIZE + vaddr.sx * 8;
 	}
-		/* fallthrough */
+		fallthrough;
 	case ASCE_TYPE_SEGMENT: {
 		union segment_table_entry ste;
 
@@ -976,7 +976,9 @@
  * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
  * @sg: pointer to the shadow guest address space structure
  * @saddr: faulting address in the shadow gmap
- * @pgt: pointer to the page table address result
+ * @pgt: pointer to the beginning of the page table for the given address if
+ *	 successful (return value 0), or to the first invalid DAT entry in
+ *	 case of exceptions (return value > 0)
  * @fake: pgt references contiguous guest memory block, not a pgtable
  */
 static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
@@ -1034,6 +1036,7 @@
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
+		*pgt = ptr + vaddr.rfx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
 		if (rc)
 			return rc;
@@ -1050,7 +1053,8 @@
 		rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
 		if (rc)
 			return rc;
-	} /* fallthrough */
+	}
+		fallthrough;
 	case ASCE_TYPE_REGION2: {
 		union region2_table_entry rste;
 
@@ -1059,6 +1063,7 @@
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
+		*pgt = ptr + vaddr.rsx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
 		if (rc)
 			return rc;
@@ -1076,7 +1081,8 @@
 		rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
 		if (rc)
 			return rc;
-	} /* fallthrough */
+	}
+		fallthrough;
 	case ASCE_TYPE_REGION3: {
 		union region3_table_entry rtte;
 
@@ -1085,6 +1091,7 @@
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
+		*pgt = ptr + vaddr.rtx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
 		if (rc)
 			return rc;
@@ -1111,7 +1118,8 @@
 		rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
 		if (rc)
 			return rc;
-	} /* fallthrough */
+	}
+		fallthrough;
 	case ASCE_TYPE_SEGMENT: {
 		union segment_table_entry ste;
 
@@ -1120,6 +1128,7 @@
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
+		*pgt = ptr + vaddr.sx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
 		if (rc)
 			return rc;
@@ -1154,6 +1163,8 @@
  * @vcpu: virtual cpu
  * @sg: pointer to the shadow guest address space structure
  * @saddr: faulting address in the shadow gmap
+ * @datptr: will contain the address of the faulting DAT table entry, or of
+ *	    the valid leaf, plus some flags
  *
  * Returns: - 0 if the shadow fault was successfully resolved
  *	    - > 0 (pgm exception code) on exceptions while faulting
@@ -1162,15 +1173,15 @@
  *	    - -ENOMEM if out of memory
  */
 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
-			  unsigned long saddr)
+			  unsigned long saddr, unsigned long *datptr)
 {
 	union vaddress vaddr;
 	union page_table_entry pte;
-	unsigned long pgt;
+	unsigned long pgt = 0;
 	int dat_protection, fake;
 	int rc;
 
-	down_read(&sg->mm->mmap_sem);
+	mmap_read_lock(sg->mm);
 	/*
 	 * We don't want any guest-2 tables to change - so the parent
 	 * tables/pointers we read stay valid - unshadowing is however
@@ -1188,8 +1199,20 @@
 		pte.val = pgt + vaddr.px * PAGE_SIZE;
 		goto shadow_page;
 	}
-	if (!rc)
-		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+
+	switch (rc) {
+	case PGM_SEGMENT_TRANSLATION:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_FIRST_TRANS:
+		pgt |= PEI_NOT_PTE;
+		break;
+	case 0:
+		pgt += vaddr.px * 8;
+		rc = gmap_read_table(sg->parent, pgt, &pte.val);
+	}
+	if (datptr)
+		*datptr = pgt | dat_protection * PEI_DAT_PROT;
 	if (!rc && pte.i)
 		rc = PGM_PAGE_TRANSLATION;
 	if (!rc && pte.z)
@@ -1199,6 +1222,6 @@
 	if (!rc)
 		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
 	ipte_unlock(vcpu);
-	up_read(&sg->mm->mmap_sem);
+	mmap_read_unlock(sg->mm);
 	return rc;
 }
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 4c56de5..7c72a5e 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -387,7 +387,11 @@
 int ipte_lock_held(struct kvm_vcpu *vcpu);
 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
+/* MVPG PEI indication bits */
+#define PEI_DAT_PROT 2
+#define PEI_NOT_PTE 4
+
 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
-			  unsigned long saddr);
+			  unsigned long saddr, unsigned long *datptr);
 
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a389fa8..e7a7c49 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -2,7 +2,7 @@
 /*
  * in-kernel handling for sie intercepts
  *
- * Copyright IBM Corp. 2008, 2014
+ * Copyright IBM Corp. 2008, 2020
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -12,10 +12,10 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 
-#include <asm/kvm_host.h>
 #include <asm/asm-offsets.h>
 #include <asm/irq.h>
 #include <asm/sysinfo.h>
+#include <asm/uv.h>
 
 #include "kvm-s390.h"
 #include "gaccess.h"
@@ -79,6 +79,10 @@
 			return rc;
 	}
 
+	/*
+	 * no need to check the return value of vcpu_stop as it can only have
+	 * an error for protvirt, but protvirt means user cpu state
+	 */
 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
 		kvm_s390_vcpu_stop(vcpu);
 	return -EOPNOTSUPP;
@@ -231,6 +235,13 @@
 
 	vcpu->stat.exit_program_interruption++;
 
+	/*
+	 * Intercept 8 indicates a loop of specification exceptions
+	 * for protected guests.
+	 */
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		return -EOPNOTSUPP;
+
 	if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
 		rc = kvm_s390_handle_per_event(vcpu);
 		if (rc)
@@ -384,7 +395,7 @@
 		goto out;
 	}
 
-	if (addr & ~PAGE_MASK)
+	if (!kvm_s390_pv_cpu_is_protected(vcpu) && (addr & ~PAGE_MASK))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	sctns = (void *)get_zeroed_page(GFP_KERNEL);
@@ -395,10 +406,15 @@
 
 out:
 	if (!cc) {
-		r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
-		if (r) {
-			free_page((unsigned long)sctns);
-			return kvm_s390_inject_prog_cond(vcpu, r);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
+			       sctns, PAGE_SIZE);
+		} else {
+			r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+			if (r) {
+				free_page((unsigned long)sctns);
+				return kvm_s390_inject_prog_cond(vcpu, r);
+			}
 		}
 	}
 
@@ -444,6 +460,77 @@
 	return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 }
 
+static int handle_pv_spx(struct kvm_vcpu *vcpu)
+{
+	u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+
+	kvm_s390_set_prefix(vcpu, pref);
+	trace_kvm_s390_handle_prefix(vcpu, 1, pref);
+	return 0;
+}
+
+static int handle_pv_sclp(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+	spin_lock(&fi->lock);
+	/*
+	 * 2 cases:
+	 * a: an sccb answering interrupt was already pending or in flight.
+	 *    As the sccb value is not known we can simply set some value to
+	 *    trigger delivery of a saved SCCB. UV will then use its saved
+	 *    copy of the SCCB value.
+	 * b: an error SCCB interrupt needs to be injected so we also inject
+	 *    a fake SCCB address. Firmware will use the proper one.
+	 * This makes sure, that both errors and real sccb returns will only
+	 * be delivered after a notification intercept (instruction has
+	 * finished) but not after others.
+	 */
+	fi->srv_signal.ext_params |= 0x43000;
+	set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
+	spin_unlock(&fi->lock);
+	return 0;
+}
+
+static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+{
+	struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+	struct uv_cb_cts uvcb = {
+		.header.cmd	= UVC_CMD_UNPIN_PAGE_SHARED,
+		.header.len	= sizeof(uvcb),
+		.guest_handle	= kvm_s390_pv_get_handle(vcpu->kvm),
+		.gaddr		= guest_uvcb->paddr,
+	};
+	int rc;
+
+	if (guest_uvcb->header.cmd != UVC_CMD_REMOVE_SHARED_ACCESS) {
+		WARN_ONCE(1, "Unexpected notification intercept for UVC 0x%x\n",
+			  guest_uvcb->header.cmd);
+		return 0;
+	}
+	rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
+	/*
+	 * If the unpin did not succeed, the guest will exit again for the UVC
+	 * and we will retry the unpin.
+	 */
+	if (rc == -EINVAL)
+		return 0;
+	return rc;
+}
+
+static int handle_pv_notification(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.sie_block->ipa == 0xb210)
+		return handle_pv_spx(vcpu);
+	if (vcpu->arch.sie_block->ipa == 0xb220)
+		return handle_pv_sclp(vcpu);
+	if (vcpu->arch.sie_block->ipa == 0xb9a4)
+		return handle_pv_uvc(vcpu);
+
+	return handle_instruction(vcpu);
+}
+
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
 	int rc, per_rc = 0;
@@ -480,6 +567,28 @@
 	case ICPT_KSS:
 		rc = kvm_s390_skey_check_enable(vcpu);
 		break;
+	case ICPT_MCHKREQ:
+	case ICPT_INT_ENABLE:
+		/*
+		 * PSW bit 13 or a CR (0, 6, 14) changed and we might
+		 * now be able to deliver interrupts. The pre-run code
+		 * will take care of this.
+		 */
+		rc = 0;
+		break;
+	case ICPT_PV_INSTR:
+		rc = handle_instruction(vcpu);
+		break;
+	case ICPT_PV_NOTIFY:
+		rc = handle_pv_notification(vcpu);
+		break;
+	case ICPT_PV_PREF:
+		rc = 0;
+		gmap_convert_to_secure(vcpu->arch.gmap,
+				       kvm_s390_get_prefix(vcpu));
+		gmap_convert_to_secure(vcpu->arch.gmap,
+				       kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fa9483a..b51ab19 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2,7 +2,7 @@
 /*
  * handling kvm guest interrupts
  *
- * Copyright IBM Corp. 2008, 2015
+ * Copyright IBM Corp. 2008, 2020
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  */
@@ -324,8 +324,11 @@
 
 static inline unsigned long pending_irqs_no_gisa(struct kvm_vcpu *vcpu)
 {
-	return vcpu->kvm->arch.float_int.pending_irqs |
-		vcpu->arch.local_int.pending_irqs;
+	unsigned long pending = vcpu->kvm->arch.float_int.pending_irqs |
+				vcpu->arch.local_int.pending_irqs;
+
+	pending &= ~vcpu->kvm->arch.float_int.masked_irqs;
+	return pending;
 }
 
 static inline unsigned long pending_irqs(struct kvm_vcpu *vcpu)
@@ -383,10 +386,18 @@
 		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
 		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
-	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
+	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
 		__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
+		__clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
+	}
 	if (psw_mchk_disabled(vcpu))
 		active_mask &= ~IRQ_PEND_MCHK_MASK;
+	/* PV guest cpus can have a single interruption injected at a time. */
+	if (kvm_s390_pv_cpu_get_handle(vcpu) &&
+	    vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
+		active_mask &= ~(IRQ_PEND_EXT_II_MASK |
+				 IRQ_PEND_IO_MASK |
+				 IRQ_PEND_MCHK_MASK);
 	/*
 	 * Check both floating and local interrupt's cr14 because
 	 * bit IRQ_PEND_MCHK_REP could be set in both cases.
@@ -479,19 +490,23 @@
 static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	int rc;
+	int rc = 0;
 
 	vcpu->stat.deliver_cputm++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
 					 0, 0);
-
-	rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
-			   (u16 *)__LC_EXT_INT_CODE);
-	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_CPU_TIMER;
+	} else {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+				   (u16 *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	}
 	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
 	return rc ? -EFAULT : 0;
 }
@@ -499,19 +514,23 @@
 static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	int rc;
+	int rc = 0;
 
 	vcpu->stat.deliver_ckc++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
 					 0, 0);
-
-	rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
-			   (u16 __user *)__LC_EXT_INT_CODE);
-	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_CLK_COMP;
+	} else {
+		rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+				   (u16 __user *)__LC_EXT_INT_CODE);
+		rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	}
 	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 	return rc ? -EFAULT : 0;
 }
@@ -553,6 +572,20 @@
 	union mci mci;
 	int rc;
 
+	/*
+	 * All other possible payload for a machine check (e.g. the register
+	 * contents in the save area) will be handled by the ultravisor, as
+	 * the hypervisor does not not have the needed information for
+	 * protected guests.
+	 */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
+		vcpu->arch.sie_block->mcic = mchk->mcic;
+		vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
+		vcpu->arch.sie_block->edc = mchk->ext_damage_code;
+		return 0;
+	}
+
 	mci.val = mchk->mcic;
 	/* take care of lazy register loading */
 	save_fpu_regs();
@@ -696,17 +729,21 @@
 static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	int rc;
+	int rc = 0;
 
 	VCPU_EVENT(vcpu, 3, "%s", "deliver: cpu restart");
 	vcpu->stat.deliver_restart_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
 
-	rc  = write_guest_lc(vcpu,
-			     offsetof(struct lowcore, restart_old_psw),
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_RESTART;
+	} else {
+		rc  = write_guest_lc(vcpu,
+				     offsetof(struct lowcore, restart_old_psw),
+				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc |= read_guest_lc(vcpu, offsetof(struct lowcore, restart_psw),
+				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	}
 	clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
 	return rc ? -EFAULT : 0;
 }
@@ -748,6 +785,12 @@
 	vcpu->stat.deliver_emergency_signal++;
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
 					 cpu_addr, 0);
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_EMERGENCY_SIG;
+		vcpu->arch.sie_block->extcpuaddr = cpu_addr;
+		return 0;
+	}
 
 	rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
 			   (u16 *)__LC_EXT_INT_CODE);
@@ -776,6 +819,12 @@
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
 					 KVM_S390_INT_EXTERNAL_CALL,
 					 extcall.code, 0);
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_EXTERNAL_CALL;
+		vcpu->arch.sie_block->extcpuaddr = extcall.code;
+		return 0;
+	}
 
 	rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
 			   (u16 *)__LC_EXT_INT_CODE);
@@ -787,6 +836,21 @@
 	return rc ? -EFAULT : 0;
 }
 
+static int __deliver_prog_pv(struct kvm_vcpu *vcpu, u16 code)
+{
+	switch (code) {
+	case PGM_SPECIFICATION:
+		vcpu->arch.sie_block->iictl = IICTL_CODE_SPECIFICATION;
+		break;
+	case PGM_OPERAND:
+		vcpu->arch.sie_block->iictl = IICTL_CODE_OPERAND;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -807,6 +871,10 @@
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 					 pgm_info.code, 0);
 
+	/* PER is handled by the ultravisor */
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
+
 	switch (pgm_info.code & ~PGM_PER) {
 	case PGM_AFX_TRANSLATION:
 	case PGM_ASX_TRANSLATION:
@@ -818,7 +886,7 @@
 	case PGM_PRIMARY_AUTHORITY:
 	case PGM_SECONDARY_AUTHORITY:
 		nullifying = true;
-		/* fall through */
+		fallthrough;
 	case PGM_SPACE_SWITCH:
 		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
@@ -902,20 +970,49 @@
 	return rc ? -EFAULT : 0;
 }
 
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int write_sclp(struct kvm_vcpu *vcpu, u32 parm)
+{
+	int rc;
+
+	if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_EXT;
+		vcpu->arch.sie_block->eic = EXT_IRQ_SERVICE_SIG;
+		vcpu->arch.sie_block->eiparams = parm;
+		return 0;
+	}
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, parm,
+			   (u32 *)__LC_EXT_PARAMS);
+
+	return rc ? -EFAULT : 0;
+}
+
 static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
 	struct kvm_s390_ext_info ext;
-	int rc = 0;
 
 	spin_lock(&fi->lock);
-	if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs) ||
+	    !(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
 		spin_unlock(&fi->lock);
 		return 0;
 	}
 	ext = fi->srv_signal;
 	memset(&fi->srv_signal, 0, sizeof(ext));
 	clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+	clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		set_bit(IRQ_PEND_EXT_SERVICE, &fi->masked_irqs);
 	spin_unlock(&fi->lock);
 
 	VCPU_EVENT(vcpu, 4, "deliver: sclp parameter 0x%x",
@@ -924,16 +1021,31 @@
 	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
 					 ext.ext_params, 0);
 
-	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
-	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= put_guest_lc(vcpu, ext.ext_params,
-			   (u32 *)__LC_EXT_PARAMS);
+	return write_sclp(vcpu, ext.ext_params);
+}
 
-	return rc ? -EFAULT : 0;
+static int __must_check __deliver_service_ev(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct kvm_s390_ext_info ext;
+
+	spin_lock(&fi->lock);
+	if (!(test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs))) {
+		spin_unlock(&fi->lock);
+		return 0;
+	}
+	ext = fi->srv_signal;
+	/* only clear the event bit */
+	fi->srv_signal.ext_params &= ~SCCB_EVENT_PENDING;
+	clear_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+	spin_unlock(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "deliver: sclp parameter event");
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+					 ext.ext_params, 0);
+
+	return write_sclp(vcpu, SCCB_EVENT_PENDING);
 }
 
 static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
@@ -1028,6 +1140,15 @@
 {
 	int rc;
 
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->iictl = IICTL_CODE_IO;
+		vcpu->arch.sie_block->subchannel_id = io->subchannel_id;
+		vcpu->arch.sie_block->subchannel_nr = io->subchannel_nr;
+		vcpu->arch.sie_block->io_int_parm = io->io_int_parm;
+		vcpu->arch.sie_block->io_int_word = io->io_int_word;
+		return 0;
+	}
+
 	rc  = put_guest_lc(vcpu, io->subchannel_id, (u16 *)__LC_SUBCHANNEL_ID);
 	rc |= put_guest_lc(vcpu, io->subchannel_nr, (u16 *)__LC_SUBCHANNEL_NR);
 	rc |= put_guest_lc(vcpu, io->io_int_parm, (u32 *)__LC_IO_INT_PARM);
@@ -1329,6 +1450,9 @@
 		case IRQ_PEND_EXT_SERVICE:
 			rc = __deliver_service(vcpu);
 			break;
+		case IRQ_PEND_EXT_SERVICE_EV:
+			rc = __deliver_service_ev(vcpu);
+			break;
 		case IRQ_PEND_PFAULT_DONE:
 			rc = __deliver_pfault_done(vcpu);
 			break;
@@ -1421,7 +1545,7 @@
 	if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
 		return -EINVAL;
 
-	if (sclp.has_sigpif)
+	if (sclp.has_sigpif && !kvm_s390_pv_cpu_get_handle(vcpu))
 		return sca_inject_ext_call(vcpu, src_id);
 
 	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
@@ -1477,8 +1601,7 @@
 	return 0;
 }
 
-static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
-				 struct kvm_s390_irq *irq)
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 
@@ -1682,9 +1805,6 @@
 	return inti;
 }
 
-#define SCCB_MASK 0xFFFFFFF8
-#define SCCB_EVENT_PENDING 0x3
-
 static int __inject_service(struct kvm *kvm,
 			     struct kvm_s390_interrupt_info *inti)
 {
@@ -1693,6 +1813,11 @@
 	kvm->stat.inject_service_signal++;
 	spin_lock(&fi->lock);
 	fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+
+	/* We always allow events, track them separately from the sccb ints */
+	if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
+		set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
+
 	/*
 	 * Early versions of the QEMU s390 bios will inject several
 	 * service interrupts after another without handling a
@@ -1774,7 +1899,14 @@
 	kvm->stat.inject_io++;
 	isc = int_word_to_isc(inti->io.io_int_word);
 
-	if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
+	/*
+	 * Do not make use of gisa in protected mode. We do not use the lock
+	 * checking variant as this is just a performance optimization and we
+	 * do not hold the lock here. This is ok as the code will pick
+	 * interrupts from both "lists" for delivery.
+	 */
+	if (!kvm_s390_pv_get_handle(kvm) &&
+	    gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
 		VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
 		gisa_set_ipm_gisc(gi->origin, isc);
 		kfree(inti);
@@ -1835,7 +1967,8 @@
 		break;
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 		if (!(type & KVM_S390_INT_IO_AI_MASK &&
-		      kvm->arch.gisa_int.origin))
+		      kvm->arch.gisa_int.origin) ||
+		      kvm_s390_pv_cpu_get_handle(dst_vcpu))
 			kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT);
 		break;
 	default:
@@ -1982,6 +2115,13 @@
 	return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
 }
 
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	return test_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+}
+
 void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -2007,7 +2147,7 @@
 		rc = __inject_sigp_stop(vcpu, irq);
 		break;
 	case KVM_S390_RESTART:
-		rc = __inject_sigp_restart(vcpu, irq);
+		rc = __inject_sigp_restart(vcpu);
 		break;
 	case KVM_S390_INT_CLOCK_COMP:
 		rc = __inject_ckc(vcpu);
@@ -2081,6 +2221,10 @@
 	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
 	int i;
 
+	mutex_lock(&kvm->lock);
+	if (!kvm_s390_pv_is_protected(kvm))
+		fi->masked_irqs = 0;
+	mutex_unlock(&kvm->lock);
 	spin_lock(&fi->lock);
 	fi->pending_irqs = 0;
 	memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
@@ -2147,7 +2291,8 @@
 			n++;
 		}
 	}
-	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+	if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
+	    test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) {
 		if (n == max_irqs) {
 			/* signal userspace to try again */
 			ret = -ENOMEM;
@@ -2328,9 +2473,6 @@
 	if (!adapter)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&adapter->maps);
-	init_rwsem(&adapter->maps_lock);
-	atomic_set(&adapter->nr_maps, 0);
 	adapter->id = adapter_info.id;
 	adapter->isc = adapter_info.isc;
 	adapter->maskable = adapter_info.maskable;
@@ -2355,87 +2497,12 @@
 	return ret;
 }
 
-static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
-{
-	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
-	struct s390_map_info *map;
-	int ret;
-
-	if (!adapter || !addr)
-		return -EINVAL;
-
-	map = kzalloc(sizeof(*map), GFP_KERNEL);
-	if (!map) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	INIT_LIST_HEAD(&map->list);
-	map->guest_addr = addr;
-	map->addr = gmap_translate(kvm->arch.gmap, addr);
-	if (map->addr == -EFAULT) {
-		ret = -EFAULT;
-		goto out;
-	}
-	ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page);
-	if (ret < 0)
-		goto out;
-	BUG_ON(ret != 1);
-	down_write(&adapter->maps_lock);
-	if (atomic_inc_return(&adapter->nr_maps) < MAX_S390_ADAPTER_MAPS) {
-		list_add_tail(&map->list, &adapter->maps);
-		ret = 0;
-	} else {
-		put_page(map->page);
-		ret = -EINVAL;
-	}
-	up_write(&adapter->maps_lock);
-out:
-	if (ret)
-		kfree(map);
-	return ret;
-}
-
-static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
-{
-	struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
-	struct s390_map_info *map, *tmp;
-	int found = 0;
-
-	if (!adapter || !addr)
-		return -EINVAL;
-
-	down_write(&adapter->maps_lock);
-	list_for_each_entry_safe(map, tmp, &adapter->maps, list) {
-		if (map->guest_addr == addr) {
-			found = 1;
-			atomic_dec(&adapter->nr_maps);
-			list_del(&map->list);
-			put_page(map->page);
-			kfree(map);
-			break;
-		}
-	}
-	up_write(&adapter->maps_lock);
-
-	return found ? 0 : -EINVAL;
-}
-
 void kvm_s390_destroy_adapters(struct kvm *kvm)
 {
 	int i;
-	struct s390_map_info *map, *tmp;
 
-	for (i = 0; i < MAX_S390_IO_ADAPTERS; i++) {
-		if (!kvm->arch.adapters[i])
-			continue;
-		list_for_each_entry_safe(map, tmp,
-					 &kvm->arch.adapters[i]->maps, list) {
-			list_del(&map->list);
-			put_page(map->page);
-			kfree(map);
-		}
+	for (i = 0; i < MAX_S390_IO_ADAPTERS; i++)
 		kfree(kvm->arch.adapters[i]);
-	}
 }
 
 static int modify_io_adapter(struct kvm_device *dev,
@@ -2457,11 +2524,14 @@
 		if (ret > 0)
 			ret = 0;
 		break;
+	/*
+	 * The following operations are no longer needed and therefore no-ops.
+	 * The gpa to hva translation is done when an IRQ route is set up. The
+	 * set_irq code uses get_user_pages_remote() to do the actual write.
+	 */
 	case KVM_S390_IO_ADAPTER_MAP:
-		ret = kvm_s390_adapter_map(dev->kvm, req.id, req.addr);
-		break;
 	case KVM_S390_IO_ADAPTER_UNMAP:
-		ret = kvm_s390_adapter_unmap(dev->kvm, req.id, req.addr);
+		ret = 0;
 		break;
 	default:
 		ret = -EINVAL;
@@ -2700,19 +2770,15 @@
 	return swap ? (bit ^ (BITS_PER_LONG - 1)) : bit;
 }
 
-static struct s390_map_info *get_map_info(struct s390_io_adapter *adapter,
-					  u64 addr)
+static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
 {
-	struct s390_map_info *map;
+	struct page *page = NULL;
 
-	if (!adapter)
-		return NULL;
-
-	list_for_each_entry(map, &adapter->maps, list) {
-		if (map->guest_addr == addr)
-			return map;
-	}
-	return NULL;
+	mmap_read_lock(kvm->mm);
+	get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
+			      &page, NULL, NULL);
+	mmap_read_unlock(kvm->mm);
+	return page;
 }
 
 static int adapter_indicators_set(struct kvm *kvm,
@@ -2721,30 +2787,35 @@
 {
 	unsigned long bit;
 	int summary_set, idx;
-	struct s390_map_info *info;
+	struct page *ind_page, *summary_page;
 	void *map;
 
-	info = get_map_info(adapter, adapter_int->ind_addr);
-	if (!info)
+	ind_page = get_map_page(kvm, adapter_int->ind_addr);
+	if (!ind_page)
 		return -1;
-	map = page_address(info->page);
-	bit = get_ind_bit(info->addr, adapter_int->ind_offset, adapter->swap);
-	set_bit(bit, map);
-	idx = srcu_read_lock(&kvm->srcu);
-	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
-	set_page_dirty_lock(info->page);
-	info = get_map_info(adapter, adapter_int->summary_addr);
-	if (!info) {
-		srcu_read_unlock(&kvm->srcu, idx);
+	summary_page = get_map_page(kvm, adapter_int->summary_addr);
+	if (!summary_page) {
+		put_page(ind_page);
 		return -1;
 	}
-	map = page_address(info->page);
-	bit = get_ind_bit(info->addr, adapter_int->summary_offset,
-			  adapter->swap);
+
+	idx = srcu_read_lock(&kvm->srcu);
+	map = page_address(ind_page);
+	bit = get_ind_bit(adapter_int->ind_addr,
+			  adapter_int->ind_offset, adapter->swap);
+	set_bit(bit, map);
+	mark_page_dirty(kvm, adapter_int->ind_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(ind_page);
+	map = page_address(summary_page);
+	bit = get_ind_bit(adapter_int->summary_addr,
+			  adapter_int->summary_offset, adapter->swap);
 	summary_set = test_and_set_bit(bit, map);
-	mark_page_dirty(kvm, info->guest_addr >> PAGE_SHIFT);
-	set_page_dirty_lock(info->page);
+	mark_page_dirty(kvm, adapter_int->summary_addr >> PAGE_SHIFT);
+	set_page_dirty_lock(summary_page);
 	srcu_read_unlock(&kvm->srcu, idx);
+
+	put_page(ind_page);
+	put_page(summary_page);
 	return summary_set ? 0 : 1;
 }
 
@@ -2766,9 +2837,7 @@
 	adapter = get_io_adapter(kvm, e->adapter.adapter_id);
 	if (!adapter)
 		return -1;
-	down_read(&adapter->maps_lock);
 	ret = adapter_indicators_set(kvm, adapter, &e->adapter);
-	up_read(&adapter->maps_lock);
 	if ((ret > 0) && !adapter->masked) {
 		ret = kvm_s390_inject_airq(kvm, adapter);
 		if (ret == 0)
@@ -2819,23 +2888,27 @@
 			  struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
-	int ret;
+	u64 uaddr;
 
 	switch (ue->type) {
+	/* we store the userspace addresses instead of the guest addresses */
 	case KVM_IRQ_ROUTING_S390_ADAPTER:
 		e->set = set_adapter_int;
-		e->adapter.summary_addr = ue->u.adapter.summary_addr;
-		e->adapter.ind_addr = ue->u.adapter.ind_addr;
+		uaddr =  gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr);
+		if (uaddr == -EFAULT)
+			return -EFAULT;
+		e->adapter.summary_addr = uaddr;
+		uaddr =  gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr);
+		if (uaddr == -EFAULT)
+			return -EFAULT;
+		e->adapter.ind_addr = uaddr;
 		e->adapter.summary_offset = ue->u.adapter.summary_offset;
 		e->adapter.ind_offset = ue->u.adapter.ind_offset;
 		e->adapter.adapter_id = ue->u.adapter.adapter_id;
-		ret = 0;
-		break;
+		return 0;
 	default:
-		ret = -EINVAL;
+		return -EINVAL;
 	}
-
-	return ret;
 }
 
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
@@ -2987,13 +3060,14 @@
 	int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus);
 	struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
 	struct kvm_vcpu *vcpu;
+	u8 vcpu_isc_mask;
 
 	for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) {
 		vcpu = kvm_get_vcpu(kvm, vcpu_idx);
 		if (psw_ioint_disabled(vcpu))
 			continue;
-		deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
-		if (deliverable_mask) {
+		vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+		if (deliverable_mask & vcpu_isc_mask) {
 			/* lately kicked but not yet running */
 			if (test_and_set_bit(vcpu_idx, gi->kicked_mask))
 				return;
@@ -3016,7 +3090,7 @@
 		__airqs_kick_single_vcpu(kvm, pending_mask);
 		hrtimer_forward_now(timer, ns_to_ktime(gi->expires));
 		return HRTIMER_RESTART;
-	};
+	}
 
 	return HRTIMER_NORESTART;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9ed2fee..d8e9239 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2,7 +2,7 @@
 /*
  * hosting IBM Z kernel virtual machines (s390x)
  *
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -31,11 +31,11 @@
 #include <linux/bitmap.h>
 #include <linux/sched/signal.h>
 #include <linux/string.h>
+#include <linux/pgtable.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/stp.h>
-#include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
@@ -44,6 +44,7 @@
 #include <asm/cpacf.h>
 #include <asm/timex.h>
 #include <asm/ap.h>
+#include <asm/uv.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -56,109 +57,109 @@
 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
 
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ "userspace_handled", VCPU_STAT(exit_userspace) },
-	{ "exit_null", VCPU_STAT(exit_null) },
-	{ "exit_validity", VCPU_STAT(exit_validity) },
-	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
-	{ "exit_external_request", VCPU_STAT(exit_external_request) },
-	{ "exit_io_request", VCPU_STAT(exit_io_request) },
-	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
-	{ "exit_instruction", VCPU_STAT(exit_instruction) },
-	{ "exit_pei", VCPU_STAT(exit_pei) },
-	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
-	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
-	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
-	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
-	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
-	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
-	{ "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) },
-	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
-	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
-	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
-	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
-	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
-	{ "deliver_ckc", VCPU_STAT(deliver_ckc) },
-	{ "deliver_cputm", VCPU_STAT(deliver_cputm) },
-	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
-	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
-	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
-	{ "deliver_virtio", VCPU_STAT(deliver_virtio) },
-	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
-	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
-	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
-	{ "deliver_program", VCPU_STAT(deliver_program) },
-	{ "deliver_io", VCPU_STAT(deliver_io) },
-	{ "deliver_machine_check", VCPU_STAT(deliver_machine_check) },
-	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
-	{ "inject_ckc", VCPU_STAT(inject_ckc) },
-	{ "inject_cputm", VCPU_STAT(inject_cputm) },
-	{ "inject_external_call", VCPU_STAT(inject_external_call) },
-	{ "inject_float_mchk", VM_STAT(inject_float_mchk) },
-	{ "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) },
-	{ "inject_io", VM_STAT(inject_io) },
-	{ "inject_mchk", VCPU_STAT(inject_mchk) },
-	{ "inject_pfault_done", VM_STAT(inject_pfault_done) },
-	{ "inject_program", VCPU_STAT(inject_program) },
-	{ "inject_restart", VCPU_STAT(inject_restart) },
-	{ "inject_service_signal", VM_STAT(inject_service_signal) },
-	{ "inject_set_prefix", VCPU_STAT(inject_set_prefix) },
-	{ "inject_stop_signal", VCPU_STAT(inject_stop_signal) },
-	{ "inject_pfault_init", VCPU_STAT(inject_pfault_init) },
-	{ "inject_virtio", VM_STAT(inject_virtio) },
-	{ "instruction_epsw", VCPU_STAT(instruction_epsw) },
-	{ "instruction_gs", VCPU_STAT(instruction_gs) },
-	{ "instruction_io_other", VCPU_STAT(instruction_io_other) },
-	{ "instruction_lpsw", VCPU_STAT(instruction_lpsw) },
-	{ "instruction_lpswe", VCPU_STAT(instruction_lpswe) },
-	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
-	{ "instruction_ptff", VCPU_STAT(instruction_ptff) },
-	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
-	{ "instruction_sck", VCPU_STAT(instruction_sck) },
-	{ "instruction_sckpf", VCPU_STAT(instruction_sckpf) },
-	{ "instruction_spx", VCPU_STAT(instruction_spx) },
-	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
-	{ "instruction_stap", VCPU_STAT(instruction_stap) },
-	{ "instruction_iske", VCPU_STAT(instruction_iske) },
-	{ "instruction_ri", VCPU_STAT(instruction_ri) },
-	{ "instruction_rrbe", VCPU_STAT(instruction_rrbe) },
-	{ "instruction_sske", VCPU_STAT(instruction_sske) },
-	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
-	{ "instruction_essa", VCPU_STAT(instruction_essa) },
-	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
-	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
-	{ "instruction_tb", VCPU_STAT(instruction_tb) },
-	{ "instruction_tpi", VCPU_STAT(instruction_tpi) },
-	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
-	{ "instruction_tsch", VCPU_STAT(instruction_tsch) },
-	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
-	{ "instruction_sie", VCPU_STAT(instruction_sie) },
-	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
-	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
-	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
-	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
-	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
-	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
-	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
-	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
-	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
-	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
-	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
-	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
-	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
-	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
-	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
-	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
-	{ "instruction_diag_10", VCPU_STAT(diagnose_10) },
-	{ "instruction_diag_44", VCPU_STAT(diagnose_44) },
-	{ "instruction_diag_9c", VCPU_STAT(diagnose_9c) },
-	{ "instruction_diag_258", VCPU_STAT(diagnose_258) },
-	{ "instruction_diag_308", VCPU_STAT(diagnose_308) },
-	{ "instruction_diag_500", VCPU_STAT(diagnose_500) },
-	{ "instruction_diag_other", VCPU_STAT(diagnose_other) },
+	VCPU_STAT("userspace_handled", exit_userspace),
+	VCPU_STAT("exit_null", exit_null),
+	VCPU_STAT("exit_validity", exit_validity),
+	VCPU_STAT("exit_stop_request", exit_stop_request),
+	VCPU_STAT("exit_external_request", exit_external_request),
+	VCPU_STAT("exit_io_request", exit_io_request),
+	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
+	VCPU_STAT("exit_instruction", exit_instruction),
+	VCPU_STAT("exit_pei", exit_pei),
+	VCPU_STAT("exit_program_interruption", exit_program_interruption),
+	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
+	VCPU_STAT("exit_operation_exception", exit_operation_exception),
+	VCPU_STAT("halt_successful_poll", halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
+	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("instruction_lctlg", instruction_lctlg),
+	VCPU_STAT("instruction_lctl", instruction_lctl),
+	VCPU_STAT("instruction_stctl", instruction_stctl),
+	VCPU_STAT("instruction_stctg", instruction_stctg),
+	VCPU_STAT("deliver_ckc", deliver_ckc),
+	VCPU_STAT("deliver_cputm", deliver_cputm),
+	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
+	VCPU_STAT("deliver_external_call", deliver_external_call),
+	VCPU_STAT("deliver_service_signal", deliver_service_signal),
+	VCPU_STAT("deliver_virtio", deliver_virtio),
+	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
+	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
+	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
+	VCPU_STAT("deliver_program", deliver_program),
+	VCPU_STAT("deliver_io", deliver_io),
+	VCPU_STAT("deliver_machine_check", deliver_machine_check),
+	VCPU_STAT("exit_wait_state", exit_wait_state),
+	VCPU_STAT("inject_ckc", inject_ckc),
+	VCPU_STAT("inject_cputm", inject_cputm),
+	VCPU_STAT("inject_external_call", inject_external_call),
+	VM_STAT("inject_float_mchk", inject_float_mchk),
+	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
+	VM_STAT("inject_io", inject_io),
+	VCPU_STAT("inject_mchk", inject_mchk),
+	VM_STAT("inject_pfault_done", inject_pfault_done),
+	VCPU_STAT("inject_program", inject_program),
+	VCPU_STAT("inject_restart", inject_restart),
+	VM_STAT("inject_service_signal", inject_service_signal),
+	VCPU_STAT("inject_set_prefix", inject_set_prefix),
+	VCPU_STAT("inject_stop_signal", inject_stop_signal),
+	VCPU_STAT("inject_pfault_init", inject_pfault_init),
+	VM_STAT("inject_virtio", inject_virtio),
+	VCPU_STAT("instruction_epsw", instruction_epsw),
+	VCPU_STAT("instruction_gs", instruction_gs),
+	VCPU_STAT("instruction_io_other", instruction_io_other),
+	VCPU_STAT("instruction_lpsw", instruction_lpsw),
+	VCPU_STAT("instruction_lpswe", instruction_lpswe),
+	VCPU_STAT("instruction_pfmf", instruction_pfmf),
+	VCPU_STAT("instruction_ptff", instruction_ptff),
+	VCPU_STAT("instruction_stidp", instruction_stidp),
+	VCPU_STAT("instruction_sck", instruction_sck),
+	VCPU_STAT("instruction_sckpf", instruction_sckpf),
+	VCPU_STAT("instruction_spx", instruction_spx),
+	VCPU_STAT("instruction_stpx", instruction_stpx),
+	VCPU_STAT("instruction_stap", instruction_stap),
+	VCPU_STAT("instruction_iske", instruction_iske),
+	VCPU_STAT("instruction_ri", instruction_ri),
+	VCPU_STAT("instruction_rrbe", instruction_rrbe),
+	VCPU_STAT("instruction_sske", instruction_sske),
+	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
+	VCPU_STAT("instruction_essa", instruction_essa),
+	VCPU_STAT("instruction_stsi", instruction_stsi),
+	VCPU_STAT("instruction_stfl", instruction_stfl),
+	VCPU_STAT("instruction_tb", instruction_tb),
+	VCPU_STAT("instruction_tpi", instruction_tpi),
+	VCPU_STAT("instruction_tprot", instruction_tprot),
+	VCPU_STAT("instruction_tsch", instruction_tsch),
+	VCPU_STAT("instruction_sthyi", instruction_sthyi),
+	VCPU_STAT("instruction_sie", instruction_sie),
+	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
+	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
+	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
+	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
+	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
+	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
+	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
+	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
+	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
+	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
+	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
+	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
+	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
+	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
+	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
+	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
+	VCPU_STAT("instruction_diag_10", diagnose_10),
+	VCPU_STAT("instruction_diag_44", diagnose_44),
+	VCPU_STAT("instruction_diag_9c", diagnose_9c),
+	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
+	VCPU_STAT("instruction_diag_258", diagnose_258),
+	VCPU_STAT("instruction_diag_308", diagnose_308),
+	VCPU_STAT("instruction_diag_500", diagnose_500),
+	VCPU_STAT("instruction_diag_other", diagnose_other),
 	{ NULL }
 };
 
@@ -183,6 +184,11 @@
 module_param(halt_poll_max_steal, byte, 0644);
 MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
 
+/* if set to true, the GISA will be initialized and used if available */
+static bool use_gisa  = true;
+module_param(use_gisa, bool, 0644);
+MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
+
 /*
  * For now we handle at most 16 double words as this is what the s390 base
  * kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -219,6 +225,7 @@
 static struct gmap_notifier gmap_notifier;
 static struct gmap_notifier vsie_gmap_notifier;
 debug_info_t *kvm_s390_dbf;
+debug_info_t *kvm_s390_dbf_uv;
 
 /* Section: not file related */
 int kvm_arch_hardware_enable(void)
@@ -227,13 +234,15 @@
 	return 0;
 }
 
-int kvm_arch_check_processor_compat(void)
+int kvm_arch_check_processor_compat(void *opaque)
 {
 	return 0;
 }
 
+/* forward declarations */
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 			      unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
 
 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 {
@@ -292,7 +301,7 @@
 	.notifier_call = kvm_clock_sync,
 };
 
-int kvm_arch_hardware_setup(void)
+int kvm_arch_hardware_setup(void *opaque)
 {
 	gmap_notifier.notifier_call = kvm_gmap_notifier;
 	gmap_register_pte_notifier(&gmap_notifier);
@@ -453,16 +462,19 @@
 
 int kvm_arch_init(void *opaque)
 {
-	int rc;
+	int rc = -ENOMEM;
 
 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 	if (!kvm_s390_dbf)
 		return -ENOMEM;
 
-	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
-		rc = -ENOMEM;
-		goto out_debug_unreg;
-	}
+	kvm_s390_dbf_uv = debug_register("kvm-uv", 32, 1, 7 * sizeof(long));
+	if (!kvm_s390_dbf_uv)
+		goto out;
+
+	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view) ||
+	    debug_register_view(kvm_s390_dbf_uv, &debug_sprintf_view))
+		goto out;
 
 	kvm_s390_cpu_feat_init();
 
@@ -470,19 +482,17 @@
 	rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 	if (rc) {
 		pr_err("A FLIC registration call failed with rc=%d\n", rc);
-		goto out_debug_unreg;
+		goto out;
 	}
 
 	rc = kvm_s390_gib_init(GAL_ISC);
 	if (rc)
-		goto out_gib_destroy;
+		goto out;
 
 	return 0;
 
-out_gib_destroy:
-	kvm_s390_gib_destroy();
-out_debug_unreg:
-	debug_unregister(kvm_s390_dbf);
+out:
+	kvm_arch_exit();
 	return rc;
 }
 
@@ -490,6 +500,7 @@
 {
 	kvm_s390_gib_destroy();
 	debug_unregister(kvm_s390_dbf);
+	debug_unregister(kvm_s390_dbf_uv);
 }
 
 /* Section: device related */
@@ -532,6 +543,9 @@
 	case KVM_CAP_S390_CMMA_MIGRATION:
 	case KVM_CAP_S390_AIS:
 	case KVM_CAP_S390_AIS_MIGRATION:
+	case KVM_CAP_S390_VCPU_RESETS:
+	case KVM_CAP_SET_GUEST_DEBUG:
+	case KVM_CAP_S390_DIAG318:
 		r = 1;
 		break;
 	case KVM_CAP_S390_HPAGE_1M:
@@ -566,14 +580,16 @@
 	case KVM_CAP_S390_BPB:
 		r = test_facility(82);
 		break;
+	case KVM_CAP_S390_PROTECTED:
+		r = is_prot_virt_host();
+		break;
 	default:
 		r = 0;
 	}
 	return r;
 }
 
-static void kvm_s390_sync_dirty_log(struct kvm *kvm,
-				    struct kvm_memory_slot *memslot)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 	int i;
 	gfn_t cur_gfn, last_gfn;
@@ -614,9 +630,8 @@
 {
 	int r;
 	unsigned long n;
-	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
+	int is_dirty;
 
 	if (kvm_is_ucontrol(kvm))
 		return -EINVAL;
@@ -627,14 +642,7 @@
 	if (log->slot >= KVM_USER_MEM_SLOTS)
 		goto out;
 
-	slots = kvm_memslots(kvm);
-	memslot = id_to_memslot(slots, log->slot);
-	r = -ENOENT;
-	if (!memslot->dirty_bitmap)
-		goto out;
-
-	kvm_s390_sync_dirty_log(kvm, memslot);
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
 	if (r)
 		goto out;
 
@@ -756,9 +764,9 @@
 			r = -EINVAL;
 		else {
 			r = 0;
-			down_write(&kvm->mm->mmap_sem);
+			mmap_write_lock(kvm->mm);
 			kvm->mm->context.allow_gmap_hpage_1m = 1;
-			up_write(&kvm->mm->mmap_sem);
+			mmap_write_unlock(kvm->mm);
 			/*
 			 * We might have to create fake 4k page
 			 * tables. To avoid that the hardware works on
@@ -1808,7 +1816,7 @@
 	if (!keys)
 		return -ENOMEM;
 
-	down_read(&current->mm->mmap_sem);
+	mmap_read_lock(current->mm);
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	for (i = 0; i < args->count; i++) {
 		hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -1822,7 +1830,7 @@
 			break;
 	}
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 
 	if (!r) {
 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
@@ -1866,7 +1874,7 @@
 		goto out;
 
 	i = 0;
-	down_read(&current->mm->mmap_sem);
+	mmap_read_lock(current->mm);
 	srcu_idx = srcu_read_lock(&kvm->srcu);
         while (i < args->count) {
 		unlocked = false;
@@ -1884,7 +1892,7 @@
 
 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
 		if (r) {
-			r = fixup_user_fault(current, current->mm, hva,
+			r = fixup_user_fault(current->mm, hva,
 					     FAULT_FLAG_WRITE, &unlocked);
 			if (r)
 				break;
@@ -1893,7 +1901,7 @@
 			i++;
 	}
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 out:
 	kvfree(keys);
 	return r;
@@ -1998,6 +2006,9 @@
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	struct kvm_memory_slot *ms;
 
+	if (unlikely(!slots->used_slots))
+		return 0;
+
 	cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
 	ms = gfn_to_memslot(kvm, cur_gfn);
 	args->count = 0;
@@ -2079,14 +2090,14 @@
 	if (!values)
 		return -ENOMEM;
 
-	down_read(&kvm->mm->mmap_sem);
+	mmap_read_lock(kvm->mm);
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	if (peek)
 		ret = kvm_s390_peek_cmma(kvm, args, values, bufsize);
 	else
 		ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
-	up_read(&kvm->mm->mmap_sem);
+	mmap_read_unlock(kvm->mm);
 
 	if (kvm->arch.migration_mode)
 		args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages);
@@ -2136,7 +2147,7 @@
 		goto out;
 	}
 
-	down_read(&kvm->mm->mmap_sem);
+	mmap_read_lock(kvm->mm);
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	for (i = 0; i < args->count; i++) {
 		hva = gfn_to_hva(kvm, args->start_gfn + i);
@@ -2151,18 +2162,206 @@
 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
 	}
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
-	up_read(&kvm->mm->mmap_sem);
+	mmap_read_unlock(kvm->mm);
 
 	if (!kvm->mm->context.uses_cmm) {
-		down_write(&kvm->mm->mmap_sem);
+		mmap_write_lock(kvm->mm);
 		kvm->mm->context.uses_cmm = 1;
-		up_write(&kvm->mm->mmap_sem);
+		mmap_write_unlock(kvm->mm);
 	}
 out:
 	vfree(bits);
 	return r;
 }
 
+static int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rcp, u16 *rrcp)
+{
+	struct kvm_vcpu *vcpu;
+	u16 rc, rrc;
+	int ret = 0;
+	int i;
+
+	/*
+	 * We ignore failures and try to destroy as many CPUs as possible.
+	 * At the same time we must not free the assigned resources when
+	 * this fails, as the ultravisor has still access to that memory.
+	 * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak
+	 * behind.
+	 * We want to return the first failure rc and rrc, though.
+	 */
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		if (kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc) && !ret) {
+			*rcp = rc;
+			*rrcp = rrc;
+			ret = -EIO;
+		}
+		mutex_unlock(&vcpu->mutex);
+	}
+	return ret;
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	int i, r = 0;
+	u16 dummy;
+
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+		mutex_unlock(&vcpu->mutex);
+		if (r)
+			break;
+	}
+	if (r)
+		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+	return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+	int r = 0;
+	u16 dummy;
+	void __user *argp = (void __user *)cmd->data;
+
+	switch (cmd->cmd) {
+	case KVM_PV_ENABLE: {
+		r = -EINVAL;
+		if (kvm_s390_pv_is_protected(kvm))
+			break;
+
+		/*
+		 *  FMT 4 SIE needs esca. As we never switch back to bsca from
+		 *  esca, we need no cleanup in the error cases below
+		 */
+		r = sca_switch_to_extended(kvm);
+		if (r)
+			break;
+
+		mmap_write_lock(current->mm);
+		r = gmap_mark_unmergeable();
+		mmap_write_unlock(current->mm);
+		if (r)
+			break;
+
+		r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc);
+		if (r)
+			break;
+
+		r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+		if (r)
+			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+
+		/* we need to block service interrupts from now on */
+		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+		break;
+	}
+	case KVM_PV_DISABLE: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+		/*
+		 * If a CPU could not be destroyed, destroy VM will also fail.
+		 * There is no point in trying to destroy it. Instead return
+		 * the rc and rrc from the first CPU that failed destroying.
+		 */
+		if (r)
+			break;
+		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+
+		/* no need to block service interrupts any more */
+		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+		break;
+	}
+	case KVM_PV_SET_SEC_PARMS: {
+		struct kvm_s390_pv_sec_parm parms = {};
+		void *hdr;
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&parms, argp, sizeof(parms)))
+			break;
+
+		/* Currently restricted to 8KB */
+		r = -EINVAL;
+		if (parms.length > PAGE_SIZE * 2)
+			break;
+
+		r = -ENOMEM;
+		hdr = vmalloc(parms.length);
+		if (!hdr)
+			break;
+
+		r = -EFAULT;
+		if (!copy_from_user(hdr, (void __user *)parms.origin,
+				    parms.length))
+			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+						      &cmd->rc, &cmd->rrc);
+
+		vfree(hdr);
+		break;
+	}
+	case KVM_PV_UNPACK: {
+		struct kvm_s390_pv_unp unp = {};
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&unp, argp, sizeof(unp)))
+			break;
+
+		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+				       &cmd->rc, &cmd->rrc);
+		break;
+	}
+	case KVM_PV_VERIFY: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+			     cmd->rrc);
+		break;
+	}
+	case KVM_PV_PREP_RESET: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
+			     cmd->rc, cmd->rrc);
+		break;
+	}
+	case KVM_PV_UNSHARE_ALL: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
+			     cmd->rc, cmd->rrc);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+	}
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -2260,6 +2459,33 @@
 		mutex_unlock(&kvm->slots_lock);
 		break;
 	}
+	case KVM_S390_PV_COMMAND: {
+		struct kvm_pv_cmd args;
+
+		/* protvirt means user sigp */
+		kvm->arch.user_cpu_state_ctrl = 1;
+		r = 0;
+		if (!is_prot_virt_host()) {
+			r = -EINVAL;
+			break;
+		}
+		if (copy_from_user(&args, argp, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		if (args.flags) {
+			r = -EINVAL;
+			break;
+		}
+		mutex_lock(&kvm->lock);
+		r = kvm_s390_handle_pv(kvm, &args);
+		mutex_unlock(&kvm->lock);
+		if (copy_to_user(argp, &args, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -2509,7 +2735,8 @@
 	kvm->arch.use_skf = sclp.has_skey;
 	spin_lock_init(&kvm->arch.start_stop_lock);
 	kvm_s390_vsie_init(kvm);
-	kvm_s390_gisa_init(kvm);
+	if (use_gisa)
+		kvm_s390_gisa_init(kvm);
 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
 
 	return 0;
@@ -2523,6 +2750,8 @@
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+	u16 rc, rrc;
+
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
 	kvm_s390_clear_local_irqs(vcpu);
@@ -2535,10 +2764,10 @@
 
 	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
+	/* We can not hold the vcpu mutex here, we are already dying */
+	if (kvm_s390_pv_cpu_get_handle(vcpu))
+		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
 	free_page((unsigned long)(vcpu->arch.sie_block));
-
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
 static void kvm_free_vcpus(struct kvm *kvm)
@@ -2547,7 +2776,7 @@
 	struct kvm_vcpu *vcpu;
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_arch_vcpu_destroy(vcpu);
+		kvm_vcpu_destroy(vcpu);
 
 	mutex_lock(&kvm->lock);
 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
@@ -2559,10 +2788,20 @@
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	u16 rc, rrc;
+
 	kvm_free_vcpus(kvm);
 	sca_dispose(kvm);
-	debug_unregister(kvm->arch.dbf);
 	kvm_s390_gisa_destroy(kvm);
+	/*
+	 * We are already at the end of life and kvm->lock is not taken.
+	 * This is ok as the file descriptor is closed by now and nobody
+	 * can mess with the pv state. To avoid lockdep_assert_held from
+	 * complaining we do not use kvm_s390_pv_is_protected.
+	 */
+	if (kvm_s390_pv_get_handle(kvm))
+		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+	debug_unregister(kvm->arch.dbf);
 	free_page((unsigned long)kvm->arch.sie_page2);
 	if (!kvm_is_ucontrol(kvm))
 		gmap_remove(kvm->arch.gmap);
@@ -2658,6 +2897,9 @@
 	unsigned int vcpu_idx;
 	u32 scaol, scaoh;
 
+	if (kvm->arch.use_esca)
+		return 0;
+
 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
 	if (!new_sca)
 		return -ENOMEM;
@@ -2709,39 +2951,6 @@
 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
 }
 
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
-	kvm_clear_async_pf_completion_queue(vcpu);
-	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
-				    KVM_SYNC_GPRS |
-				    KVM_SYNC_ACRS |
-				    KVM_SYNC_CRS |
-				    KVM_SYNC_ARCH0 |
-				    KVM_SYNC_PFAULT;
-	kvm_s390_set_prefix(vcpu, 0);
-	if (test_kvm_facility(vcpu->kvm, 64))
-		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
-	if (test_kvm_facility(vcpu->kvm, 82))
-		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
-	if (test_kvm_facility(vcpu->kvm, 133))
-		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
-	if (test_kvm_facility(vcpu->kvm, 156))
-		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
-	/* fprs can be synchronized via vrs, even if the guest has no vx. With
-	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
-	 */
-	if (MACHINE_HAS_VX)
-		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
-	else
-		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
-
-	if (kvm_is_ucontrol(vcpu->kvm))
-		return __kvm_ucontrol_vcpu_init(vcpu);
-
-	return 0;
-}
-
 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
 {
@@ -2850,33 +3059,6 @@
 
 }
 
-static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
-{
-	/* this equals initial cpu reset in pop, but we don't switch to ESA */
-	vcpu->arch.sie_block->gpsw.mask = 0UL;
-	vcpu->arch.sie_block->gpsw.addr = 0UL;
-	kvm_s390_set_prefix(vcpu, 0);
-	kvm_s390_set_cpu_timer(vcpu, 0);
-	vcpu->arch.sie_block->ckc       = 0UL;
-	vcpu->arch.sie_block->todpr     = 0;
-	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
-	vcpu->arch.sie_block->gcr[0]  = CR0_UNUSED_56 |
-					CR0_INTERRUPT_KEY_SUBMASK |
-					CR0_MEASUREMENT_ALERT_SUBMASK;
-	vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
-					CR14_UNUSED_33 |
-					CR14_EXTERNAL_DAMAGE_SUBMASK;
-	vcpu->run->s.regs.fpc = 0;
-	vcpu->arch.sie_block->gbea = 1;
-	vcpu->arch.sie_block->pp = 0;
-	vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
-	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
-	kvm_clear_async_pf_completion_queue(vcpu);
-	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
-		kvm_s390_vcpu_stop(vcpu);
-	kvm_s390_clear_local_irqs(vcpu);
-}
-
 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 {
 	mutex_lock(&vcpu->kvm->lock);
@@ -2966,9 +3148,10 @@
 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
 }
 
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int rc = 0;
+	u16 uvrc, uvrrc;
 
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
 						    CPUSTAT_SM |
@@ -3036,29 +3219,33 @@
 
 	kvm_s390_vcpu_crypto_setup(vcpu);
 
+	mutex_lock(&vcpu->kvm->lock);
+	if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+		if (rc)
+			kvm_s390_vcpu_unsetup_cmma(vcpu);
+	}
+	mutex_unlock(&vcpu->kvm->lock);
+
 	return rc;
 }
 
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-				      unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 {
-	struct kvm_vcpu *vcpu;
-	struct sie_page *sie_page;
-	int rc = -EINVAL;
-
 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
-		goto out;
+		return -EINVAL;
+	return 0;
+}
 
-	rc = -ENOMEM;
-
-	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu)
-		goto out;
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
+{
+	struct sie_page *sie_page;
+	int rc;
 
 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
 	if (!sie_page)
-		goto out_free_cpu;
+		return -ENOMEM;
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
@@ -3067,31 +3254,65 @@
 	vcpu->arch.sie_block->mso = 0;
 	vcpu->arch.sie_block->msl = sclp.hamax;
 
-	vcpu->arch.sie_block->icpua = id;
+	vcpu->arch.sie_block->icpua = vcpu->vcpu_id;
 	spin_lock_init(&vcpu->arch.local_int.lock);
-	vcpu->arch.sie_block->gd = (u32)(u64)kvm->arch.gisa_int.origin;
+	vcpu->arch.sie_block->gd = (u32)(u64)vcpu->kvm->arch.gisa_int.origin;
 	if (vcpu->arch.sie_block->gd && sclp.has_gisaf)
 		vcpu->arch.sie_block->gd |= GISA_FORMAT1;
 	seqcount_init(&vcpu->arch.cputm_seqcount);
 
-	rc = kvm_vcpu_init(vcpu, kvm, id);
-	if (rc)
-		goto out_free_sie_block;
-	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
-		 vcpu->arch.sie_block);
-	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	kvm_clear_async_pf_completion_queue(vcpu);
+	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
+				    KVM_SYNC_GPRS |
+				    KVM_SYNC_ACRS |
+				    KVM_SYNC_CRS |
+				    KVM_SYNC_ARCH0 |
+				    KVM_SYNC_PFAULT |
+				    KVM_SYNC_DIAG318;
+	kvm_s390_set_prefix(vcpu, 0);
+	if (test_kvm_facility(vcpu->kvm, 64))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+	if (test_kvm_facility(vcpu->kvm, 82))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
+	if (test_kvm_facility(vcpu->kvm, 133))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
+	if (test_kvm_facility(vcpu->kvm, 156))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN;
+	/* fprs can be synchronized via vrs, even if the guest has no vx. With
+	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
+	 */
+	if (MACHINE_HAS_VX)
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
+	else
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
 
-	return vcpu;
+	if (kvm_is_ucontrol(vcpu->kvm)) {
+		rc = __kvm_ucontrol_vcpu_init(vcpu);
+		if (rc)
+			goto out_free_sie_block;
+	}
+
+	VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
+		 vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+	trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
+
+	rc = kvm_s390_vcpu_setup(vcpu);
+	if (rc)
+		goto out_ucontrol_uninit;
+	return 0;
+
+out_ucontrol_uninit:
+	if (kvm_is_ucontrol(vcpu->kvm))
+		gmap_remove(vcpu->arch.gmap);
 out_free_sie_block:
 	free_page((unsigned long)(vcpu->arch.sie_block));
-out_free_cpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
-	return ERR_PTR(rc);
+	return rc;
 }
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
+	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
 	return kvm_s390_vcpu_has_irq(vcpu, 0);
 }
 
@@ -3291,10 +3512,76 @@
 	return r;
 }
 
-static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+static void kvm_arch_vcpu_ioctl_normal_reset(struct kvm_vcpu *vcpu)
 {
-	kvm_s390_vcpu_initial_reset(vcpu);
-	return 0;
+	vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_RI;
+	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+	memset(vcpu->run->s.regs.riccb, 0, sizeof(vcpu->run->s.regs.riccb));
+
+	kvm_clear_async_pf_completion_queue(vcpu);
+	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+		kvm_s390_vcpu_stop(vcpu);
+	kvm_s390_clear_local_irqs(vcpu);
+}
+
+static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+	/* Initial reset is a superset of the normal reset */
+	kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+
+	/*
+	 * This equals initial cpu reset in pop, but we don't switch to ESA.
+	 * We do not only reset the internal data, but also ...
+	 */
+	vcpu->arch.sie_block->gpsw.mask = 0;
+	vcpu->arch.sie_block->gpsw.addr = 0;
+	kvm_s390_set_prefix(vcpu, 0);
+	kvm_s390_set_cpu_timer(vcpu, 0);
+	vcpu->arch.sie_block->ckc = 0;
+	memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
+	vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
+	vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
+
+	/* ... the data in sync regs */
+	memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
+	vcpu->run->s.regs.ckc = 0;
+	vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
+	vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
+	vcpu->run->psw_addr = 0;
+	vcpu->run->psw_mask = 0;
+	vcpu->run->s.regs.todpr = 0;
+	vcpu->run->s.regs.cputm = 0;
+	vcpu->run->s.regs.ckc = 0;
+	vcpu->run->s.regs.pp = 0;
+	vcpu->run->s.regs.gbea = 1;
+	vcpu->run->s.regs.fpc = 0;
+	/*
+	 * Do not reset these registers in the protected case, as some of
+	 * them are overlayed and they are not accessible in this case
+	 * anyway.
+	 */
+	if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+		vcpu->arch.sie_block->gbea = 1;
+		vcpu->arch.sie_block->pp = 0;
+		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+		vcpu->arch.sie_block->todpr = 0;
+	}
+}
+
+static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu)
+{
+	struct kvm_sync_regs *regs = &vcpu->run->s.regs;
+
+	/* Clear reset is a superset of the initial reset */
+	kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+
+	memset(&regs->gprs, 0, sizeof(regs->gprs));
+	memset(&regs->vrs, 0, sizeof(regs->vrs));
+	memset(&regs->acrs, 0, sizeof(regs->acrs));
+	memset(&regs->gscb, 0, sizeof(regs->gscb));
+
+	regs->etoken = 0;
+	regs->etoken_extension = 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -3468,14 +3755,20 @@
 
 	switch (mp_state->mp_state) {
 	case KVM_MP_STATE_STOPPED:
-		kvm_s390_vcpu_stop(vcpu);
+		rc = kvm_s390_vcpu_stop(vcpu);
 		break;
 	case KVM_MP_STATE_OPERATING:
-		kvm_s390_vcpu_start(vcpu);
+		rc = kvm_s390_vcpu_start(vcpu);
 		break;
 	case KVM_MP_STATE_LOAD:
+		if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
+			rc = -ENXIO;
+			break;
+		}
+		rc = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR_LOAD);
+		break;
 	case KVM_MP_STATE_CHECK_STOP:
-		/* fall through - CHECK_STOP and LOAD are not supported yet */
+		fallthrough;	/* CHECK_STOP and LOAD are not supported yet */
 	default:
 		rc = -ENXIO;
 	}
@@ -3633,11 +3926,13 @@
 	}
 }
 
-void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work)
 {
 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+
+	return true;
 }
 
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
@@ -3653,7 +3948,7 @@
 	/* s390 will always inject the page directly */
 }
 
-bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
 {
 	/*
 	 * s390 will always inject the page directly,
@@ -3662,33 +3957,31 @@
 	return true;
 }
 
-static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
 {
 	hva_t hva;
 	struct kvm_arch_async_pf arch;
-	int rc;
 
 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
-		return 0;
+		return false;
 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
 	    vcpu->arch.pfault_compare)
-		return 0;
+		return false;
 	if (psw_extint_disabled(vcpu))
-		return 0;
+		return false;
 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
-		return 0;
+		return false;
 	if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK))
-		return 0;
+		return false;
 	if (!vcpu->arch.gmap->pfault_enabled)
-		return 0;
+		return false;
 
 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
 	hva += current->thread.gmap_addr & ~PAGE_MASK;
 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
-		return 0;
+		return false;
 
-	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
-	return rc;
+	return kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
 }
 
 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
@@ -3708,9 +4001,6 @@
 	if (need_resched())
 		schedule();
 
-	if (test_cpu_flag(CIF_MCCK_PENDING))
-		s390_handle_mcck();
-
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
 		if (rc)
@@ -3825,9 +4115,11 @@
 	return vcpu_post_run_fault_in_sie(vcpu);
 }
 
+#define PSW_INT_MASK (PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_MCHECK)
 static int __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	int rc, exit_reason;
+	struct sie_page *sie_page = (struct sie_page *)vcpu->arch.sie_block;
 
 	/*
 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
@@ -3849,8 +4141,28 @@
 		guest_enter_irqoff();
 		__disable_cpu_timer_accounting(vcpu);
 		local_irq_enable();
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			memcpy(sie_page->pv_grregs,
+			       vcpu->run->s.regs.gprs,
+			       sizeof(sie_page->pv_grregs));
+		}
 		exit_reason = sie64a(vcpu->arch.sie_block,
 				     vcpu->run->s.regs.gprs);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			memcpy(vcpu->run->s.regs.gprs,
+			       sie_page->pv_grregs,
+			       sizeof(sie_page->pv_grregs));
+			/*
+			 * We're not allowed to inject interrupts on intercepts
+			 * that leave the guest state in an "in-between" state
+			 * where the next SIE entry will do a continuation.
+			 * Fence interrupts in our "internal" PSW.
+			 */
+			if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
+			    vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
+				vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+			}
+		}
 		local_irq_disable();
 		__enable_cpu_timer_accounting(vcpu);
 		guest_exit_irqoff();
@@ -3864,8 +4176,9 @@
 	return rc;
 }
 
-static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void sync_regs_fmt2(struct kvm_vcpu *vcpu)
 {
+	struct kvm_run *kvm_run = vcpu->run;
 	struct runtime_instr_cb *riccb;
 	struct gs_cb *gscb;
 
@@ -3873,16 +4186,7 @@
 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
-	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
-		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
-	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
-		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
-		/* some control register changes require a tlb flush */
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-	}
 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
-		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
-		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
@@ -3894,6 +4198,10 @@
 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
 			kvm_clear_async_pf_completion_queue(vcpu);
 	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_DIAG318) {
+		vcpu->arch.diag318_info.val = kvm_run->s.regs.diag318;
+		vcpu->arch.sie_block->cpnc = vcpu->arch.diag318_info.cpnc;
+	}
 	/*
 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
 	 * we should enable RI here instead of doing the lazy enablement.
@@ -3923,20 +4231,6 @@
 		vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
 		vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
 	}
-	save_access_regs(vcpu->arch.host_acrs);
-	restore_access_regs(vcpu->run->s.regs.acrs);
-	/* save host (userspace) fprs/vrs */
-	save_fpu_regs();
-	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
-	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
-	if (MACHINE_HAS_VX)
-		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
-	else
-		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
-	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-	if (test_fp_ctl(current->thread.fpu.fpc))
-		/* User space provided an invalid FPC, let's clear it */
-		current->thread.fpu.fpc = 0;
 	if (MACHINE_HAS_GS) {
 		preempt_disable();
 		__ctl_set_bit(2, 4);
@@ -3952,33 +4246,68 @@
 		preempt_enable();
 	}
 	/* SIE will load etoken directly from SDNX and therefore kvm_run */
+}
+
+static void sync_regs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *kvm_run = vcpu->run;
+
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+		/* some control register changes require a tlb flush */
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	}
+	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
+		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+	}
+	save_access_regs(vcpu->arch.host_acrs);
+	restore_access_regs(vcpu->run->s.regs.acrs);
+	/* save host (userspace) fprs/vrs */
+	save_fpu_regs();
+	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+	if (MACHINE_HAS_VX)
+		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+	else
+		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
+	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+	if (test_fp_ctl(current->thread.fpu.fpc))
+		/* User space provided an invalid FPC, let's clear it */
+		current->thread.fpu.fpc = 0;
+
+	/* Sync fmt2 only data */
+	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
+		sync_regs_fmt2(vcpu);
+	} else {
+		/*
+		 * In several places we have to modify our internal view to
+		 * not do things that are disallowed by the ultravisor. For
+		 * example we must not inject interrupts after specific exits
+		 * (e.g. 112 prefix page not secure). We do this by turning
+		 * off the machine check, external and I/O interrupt bits
+		 * of our PSW copy. To avoid getting validity intercepts, we
+		 * do only accept the condition code from userspace.
+		 */
+		vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
+		vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
+						   PSW_MASK_CC;
+	}
 
 	kvm_run->kvm_dirty_regs = 0;
 }
 
-static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs_fmt2(struct kvm_vcpu *vcpu)
 {
-	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
-	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
-	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
-	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
-	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
-	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+	struct kvm_run *kvm_run = vcpu->run;
+
 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
-	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
-	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
-	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
 	kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
-	save_access_regs(vcpu->run->s.regs.acrs);
-	restore_access_regs(vcpu->arch.host_acrs);
-	/* Save guest register state */
-	save_fpu_regs();
-	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-	/* Restore will be done lazily at return */
-	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
-	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+	kvm_run->s.regs.diag318 = vcpu->arch.diag318_info.val;
 	if (MACHINE_HAS_GS) {
 		preempt_disable();
 		__ctl_set_bit(2, 4);
@@ -3994,8 +4323,34 @@
 	/* SIE will save etoken directly into SDNX and therefore kvm_run */
 }
 
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void store_regs(struct kvm_vcpu *vcpu)
 {
+	struct kvm_run *kvm_run = vcpu->run;
+
+	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
+	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+	save_access_regs(vcpu->run->s.regs.acrs);
+	restore_access_regs(vcpu->arch.host_acrs);
+	/* Save guest register state */
+	save_fpu_regs();
+	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+	/* Restore will be done lazily at return */
+	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+	if (likely(!kvm_s390_pv_cpu_is_protected(vcpu)))
+		store_regs_fmt2(vcpu);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *kvm_run = vcpu->run;
 	int rc;
 
 	if (kvm_run->immediate_exit)
@@ -4015,6 +4370,10 @@
 
 	kvm_sigset_activate(vcpu);
 
+	/*
+	 * no need to check the return value of vcpu_start as it can only have
+	 * an error for protvirt, but protvirt means user cpu state
+	 */
 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
 		kvm_s390_vcpu_start(vcpu);
 	} else if (is_vcpu_stopped(vcpu)) {
@@ -4024,7 +4383,7 @@
 		goto out;
 	}
 
-	sync_regs(vcpu, kvm_run);
+	sync_regs(vcpu);
 	enable_cpu_timer_accounting(vcpu);
 
 	might_fault();
@@ -4046,7 +4405,7 @@
 	}
 
 	disable_cpu_timer_accounting(vcpu);
-	store_regs(vcpu, kvm_run);
+	store_regs(vcpu);
 
 	kvm_sigset_deactivate(vcpu);
 
@@ -4152,18 +4511,27 @@
 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
 }
 
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
 {
-	int i, online_vcpus, started_vcpus = 0;
+	int i, online_vcpus, r = 0, started_vcpus = 0;
 
 	if (!is_vcpu_stopped(vcpu))
-		return;
+		return 0;
 
 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
 	/* Only one cpu at a time may enter/leave the STOPPED state. */
 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
 
+	/* Let's tell the UV that we want to change into the operating state */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR);
+		if (r) {
+			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+			return r;
+		}
+	}
+
 	for (i = 0; i < online_vcpus; i++) {
 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
 			started_vcpus++;
@@ -4183,31 +4551,52 @@
 
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED);
 	/*
+	 * The real PSW might have changed due to a RESTART interpreted by the
+	 * ultravisor. We block all interrupts and let the next sie exit
+	 * refresh our view.
+	 */
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
+	/*
 	 * Another VCPU might have used IBS while we were offline.
 	 * Let's play safe and flush the VCPU at startup.
 	 */
 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
-	return;
+	return 0;
 }
 
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
 {
-	int i, online_vcpus, started_vcpus = 0;
+	int i, online_vcpus, r = 0, started_vcpus = 0;
 	struct kvm_vcpu *started_vcpu = NULL;
 
 	if (is_vcpu_stopped(vcpu))
-		return;
+		return 0;
 
 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
 	/* Only one cpu at a time may enter/leave the STOPPED state. */
 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
 
-	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
+	/* Let's tell the UV that we want to change into the stopped state */
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP);
+		if (r) {
+			spin_unlock(&vcpu->kvm->arch.start_stop_lock);
+			return r;
+		}
+	}
+
+	/*
+	 * Set the VCPU to STOPPED and THEN clear the interrupt flag,
+	 * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders
+	 * have been fully processed. This will ensure that the VCPU
+	 * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
+	 */
+	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
 	kvm_s390_clear_stop_irq(vcpu);
 
-	kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
 	__disable_ibs_on_vcpu(vcpu);
 
 	for (i = 0; i < online_vcpus; i++) {
@@ -4226,7 +4615,7 @@
 	}
 
 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
-	return;
+	return 0;
 }
 
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
@@ -4253,12 +4642,42 @@
 	return r;
 }
 
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	int r = 0;
+
+	if (mop->flags || !mop->size)
+		return -EINVAL;
+	if (mop->size + mop->sida_offset < mop->size)
+		return -EINVAL;
+	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+		return -E2BIG;
+	if (!kvm_s390_pv_cpu_is_protected(vcpu))
+		return -EINVAL;
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_SIDA_READ:
+		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+				 mop->sida_offset), mop->size))
+			r = -EFAULT;
+
+		break;
+	case KVM_S390_MEMOP_SIDA_WRITE:
+		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+				   mop->sida_offset), uaddr, mop->size))
+			r = -EFAULT;
+		break;
+	}
+	return r;
+}
 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 				  struct kvm_s390_mem_op *mop)
 {
 	void __user *uaddr = (void __user *)mop->buf;
 	void *tmpbuf = NULL;
-	int r, srcu_idx;
+	int r = 0;
 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
 
@@ -4268,14 +4687,15 @@
 	if (mop->size > MEM_OP_MAX_SIZE)
 		return -E2BIG;
 
+	if (kvm_s390_pv_cpu_is_protected(vcpu))
+		return -EINVAL;
+
 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
 		tmpbuf = vmalloc(mop->size);
 		if (!tmpbuf)
 			return -ENOMEM;
 	}
 
-	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-
 	switch (mop->op) {
 	case KVM_S390_MEMOP_LOGICAL_READ:
 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
@@ -4301,12 +4721,8 @@
 		}
 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
 		break;
-	default:
-		r = -EINVAL;
 	}
 
-	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-
 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 
@@ -4314,6 +4730,31 @@
 	return r;
 }
 
+static long kvm_s390_guest_memsida_op(struct kvm_vcpu *vcpu,
+				      struct kvm_s390_mem_op *mop)
+{
+	int r, srcu_idx;
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_LOGICAL_READ:
+	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		r = kvm_s390_guest_mem_op(vcpu, mop);
+		break;
+	case KVM_S390_MEMOP_SIDA_READ:
+	case KVM_S390_MEMOP_SIDA_WRITE:
+		/* we are locked against sida going away by the vcpu->mutex */
+		r = kvm_s390_guest_sida_op(vcpu, mop);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+	return r;
+}
+
 long kvm_arch_vcpu_async_ioctl(struct file *filp,
 			       unsigned int ioctl, unsigned long arg)
 {
@@ -4349,6 +4790,7 @@
 	void __user *argp = (void __user *)arg;
 	int idx;
 	long r;
+	u16 rc, rrc;
 
 	vcpu_load(vcpu);
 
@@ -4367,12 +4809,43 @@
 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
 		break;
 	}
+	case KVM_S390_CLEAR_RESET:
+		r = 0;
+		kvm_arch_vcpu_ioctl_clear_reset(vcpu);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+					  UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
+			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
+				   rc, rrc);
+		}
+		break;
 	case KVM_S390_INITIAL_RESET:
-		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+		r = 0;
+		kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+					  UVC_CMD_CPU_RESET_INITIAL,
+					  &rc, &rrc);
+			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
+				   rc, rrc);
+		}
+		break;
+	case KVM_S390_NORMAL_RESET:
+		r = 0;
+		kvm_arch_vcpu_ioctl_normal_reset(vcpu);
+		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+			r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+					  UVC_CMD_CPU_RESET, &rc, &rrc);
+			VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
+				   rc, rrc);
+		}
 		break;
 	case KVM_SET_ONE_REG:
 	case KVM_GET_ONE_REG: {
 		struct kvm_one_reg reg;
+		r = -EINVAL;
+		if (kvm_s390_pv_cpu_is_protected(vcpu))
+			break;
 		r = -EFAULT;
 		if (copy_from_user(&reg, argp, sizeof(reg)))
 			break;
@@ -4435,7 +4908,7 @@
 		struct kvm_s390_mem_op mem_op;
 
 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
-			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+			r = kvm_s390_guest_memsida_op(vcpu, &mem_op);
 		else
 			r = -EFAULT;
 		break;
@@ -4495,12 +4968,6 @@
 	return VM_FAULT_SIGBUS;
 }
 
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
-
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot,
@@ -4521,12 +4988,15 @@
 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
 		return -EINVAL;
 
+	/* When we are protected, we should not change the memory slots */
+	if (kvm_s390_pv_get_handle(kvm))
+		return -EINVAL;
 	return 0;
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
 				const struct kvm_userspace_memory_region *mem,
-				const struct kvm_memory_slot *old,
+				struct kvm_memory_slot *old,
 				const struct kvm_memory_slot *new,
 				enum kvm_mr_change change)
 {
@@ -4542,7 +5012,7 @@
 					old->npages * PAGE_SIZE);
 		if (rc)
 			break;
-		/* FALLTHROUGH */
+		fallthrough;
 	case KVM_MR_CREATE:
 		rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
 				      mem->guest_phys_addr, mem->memory_size);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 63d94a5..a3e9b71 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -2,7 +2,7 @@
 /*
  * definition for kvm on s390
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2020
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -15,6 +15,7 @@
 #include <linux/hrtimer.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <linux/lockdep.h>
 #include <asm/facility.h>
 #include <asm/processor.h>
 #include <asm/sclp.h>
@@ -25,6 +26,17 @@
 #define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
 
 extern debug_info_t *kvm_s390_dbf;
+extern debug_info_t *kvm_s390_dbf_uv;
+
+#define KVM_UV_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event((d_kvm)->arch.dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+	debug_sprintf_event(kvm_s390_dbf_uv, d_loglevel, \
+			    "%d: " d_string "\n", (d_kvm)->userspace_pid, \
+			    d_args); \
+} while (0)
+
 #define KVM_EVENT(d_loglevel, d_string, d_args...)\
 do { \
 	debug_sprintf_event(kvm_s390_dbf, d_loglevel, d_string "\n", \
@@ -196,6 +208,39 @@
 	return kvm->arch.user_cpu_state_ctrl != 0;
 }
 
+/* implemented in pv.c */
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+			      u16 *rrc);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+		       unsigned long tweak, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+
+static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
+{
+	return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.pv.handle;
+}
+
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->lock);
+	return !!kvm_s390_pv_get_handle(kvm);
+}
+
+static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+	lockdep_assert_held(&vcpu->mutex);
+	return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+
 /* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
@@ -286,8 +331,8 @@
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
-void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
-void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
+int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu);
 bool kvm_s390_vcpu_sie_inhibited(struct kvm_vcpu *vcpu);
@@ -373,6 +418,7 @@
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
 extern struct kvm_device_ops kvm_flic_ops;
 int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
+int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
 int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
 			   void __user *buf, int len);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 560310e..3b1a498 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -2,7 +2,7 @@
 /*
  * handling privileged instructions
  *
- * Copyright IBM Corp. 2008, 2018
+ * Copyright IBM Corp. 2008, 2020
  *
  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  *               Christian Borntraeger <borntraeger@de.ibm.com>
@@ -13,6 +13,7 @@
 #include <linux/errno.h>
 #include <linux/compat.h>
 #include <linux/mm_types.h>
+#include <linux/pgtable.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/facility.h>
@@ -20,9 +21,7 @@
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
 #include <asm/sysinfo.h>
-#include <asm/pgtable.h>
 #include <asm/page-states.h>
-#include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/io.h>
 #include <asm/ptrace.h>
@@ -270,18 +269,18 @@
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 retry:
 	unlocked = false;
-	down_read(&current->mm->mmap_sem);
+	mmap_read_lock(current->mm);
 	rc = get_guest_storage_key(current->mm, vmaddr, &key);
 
 	if (rc) {
-		rc = fixup_user_fault(current, current->mm, vmaddr,
+		rc = fixup_user_fault(current->mm, vmaddr,
 				      FAULT_FLAG_WRITE, &unlocked);
 		if (!rc) {
-			up_read(&current->mm->mmap_sem);
+			mmap_read_unlock(current->mm);
 			goto retry;
 		}
 	}
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 	if (rc == -EFAULT)
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	if (rc < 0)
@@ -317,17 +316,17 @@
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 retry:
 	unlocked = false;
-	down_read(&current->mm->mmap_sem);
+	mmap_read_lock(current->mm);
 	rc = reset_guest_reference_bit(current->mm, vmaddr);
 	if (rc < 0) {
-		rc = fixup_user_fault(current, current->mm, vmaddr,
+		rc = fixup_user_fault(current->mm, vmaddr,
 				      FAULT_FLAG_WRITE, &unlocked);
 		if (!rc) {
-			up_read(&current->mm->mmap_sem);
+			mmap_read_unlock(current->mm);
 			goto retry;
 		}
 	}
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 	if (rc == -EFAULT)
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	if (rc < 0)
@@ -385,19 +384,21 @@
 		if (kvm_is_error_hva(vmaddr))
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 
-		down_read(&current->mm->mmap_sem);
+		mmap_read_lock(current->mm);
 		rc = cond_set_guest_storage_key(current->mm, vmaddr, key, &oldkey,
 						m3 & SSKE_NQ, m3 & SSKE_MR,
 						m3 & SSKE_MC);
 
 		if (rc < 0) {
-			rc = fixup_user_fault(current, current->mm, vmaddr,
+			rc = fixup_user_fault(current->mm, vmaddr,
 					      FAULT_FLAG_WRITE, &unlocked);
 			rc = !rc ? -EAGAIN : rc;
 		}
-		up_read(&current->mm->mmap_sem);
+		mmap_read_unlock(current->mm);
 		if (rc == -EFAULT)
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		if (rc == -EAGAIN)
+			continue;
 		if (rc < 0)
 			return rc;
 		start += PAGE_SIZE;
@@ -874,7 +875,7 @@
 
 	operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
-	if (operand2 & 0xfff)
+	if (!kvm_s390_pv_cpu_is_protected(vcpu) && (operand2 & 0xfff))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	switch (fc) {
@@ -895,8 +896,13 @@
 		handle_stsi_3_2_2(vcpu, (void *) mem);
 		break;
 	}
-
-	rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+		memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
+		       PAGE_SIZE);
+		rc = 0;
+	} else {
+		rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
+	}
 	if (rc) {
 		rc = kvm_s390_inject_prog_cond(vcpu, rc);
 		goto out;
@@ -1086,15 +1092,15 @@
 
 			if (rc)
 				return rc;
-			down_read(&current->mm->mmap_sem);
+			mmap_read_lock(current->mm);
 			rc = cond_set_guest_storage_key(current->mm, vmaddr,
 							key, NULL, nq, mr, mc);
 			if (rc < 0) {
-				rc = fixup_user_fault(current, current->mm, vmaddr,
+				rc = fixup_user_fault(current->mm, vmaddr,
 						      FAULT_FLAG_WRITE, &unlocked);
 				rc = !rc ? -EAGAIN : rc;
 			}
-			up_read(&current->mm->mmap_sem);
+			mmap_read_unlock(current->mm);
 			if (rc == -EFAULT)
 				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 			if (rc == -EAGAIN)
@@ -1117,7 +1123,7 @@
 }
 
 /*
- * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu)
+ * Must be called with relevant read locks held (kvm->mm->mmap_lock, kvm->srcu)
  */
 static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
 {
@@ -1215,9 +1221,9 @@
 		 * already correct, we do nothing and avoid the lock.
 		 */
 		if (vcpu->kvm->mm->context.uses_cmm == 0) {
-			down_write(&vcpu->kvm->mm->mmap_sem);
+			mmap_write_lock(vcpu->kvm->mm);
 			vcpu->kvm->mm->context.uses_cmm = 1;
-			up_write(&vcpu->kvm->mm->mmap_sem);
+			mmap_write_unlock(vcpu->kvm->mm);
 		}
 		/*
 		 * If we are here, we are supposed to have CMMA enabled in
@@ -1234,11 +1240,11 @@
 	} else {
 		int srcu_idx;
 
-		down_read(&vcpu->kvm->mm->mmap_sem);
+		mmap_read_lock(vcpu->kvm->mm);
 		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		i = __do_essa(vcpu, orc);
 		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
-		up_read(&vcpu->kvm->mm->mmap_sem);
+		mmap_read_unlock(vcpu->kvm->mm);
 		if (i < 0)
 			return i;
 		/* Account for the possible extra cbrl entry */
@@ -1246,10 +1252,10 @@
 	}
 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
 	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
-	down_read(&gmap->mm->mmap_sem);
+	mmap_read_lock(gmap->mm);
 	for (i = 0; i < entries; ++i)
 		__gmap_zap(gmap, cbrlo[i]);
-	up_read(&gmap->mm->mmap_sem);
+	mmap_read_unlock(gmap->mm);
 	return 0;
 }
 
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 0000000..8228878
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Protected Virtual Machines
+ *
+ * Copyright IBM Corp. 2019, 2020
+ *    Author(s): Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/mman.h>
+#include "kvm-s390.h"
+
+int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	if (!kvm_s390_pv_cpu_get_handle(vcpu))
+		return 0;
+
+	cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
+
+	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+		     vcpu->vcpu_id, *rc, *rrc);
+	WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
+
+	/* Intended memory leak for something that should never happen. */
+	if (!cc)
+		free_pages(vcpu->arch.pv.stor_base,
+			   get_order(uv_info.guest_cpu_stor_len));
+
+	free_page(sida_origin(vcpu->arch.sie_block));
+	vcpu->arch.sie_block->pv_handle_cpu = 0;
+	vcpu->arch.sie_block->pv_handle_config = 0;
+	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+	vcpu->arch.sie_block->sdf = 0;
+	/*
+	 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
+	 * Use the reset value of gbea to avoid leaking the kernel pointer of
+	 * the just freed sida.
+	 */
+	vcpu->arch.sie_block->gbea = 1;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+
+	return cc ? EIO : 0;
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_csc uvcb = {
+		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
+		.header.len = sizeof(uvcb),
+	};
+	int cc;
+
+	if (kvm_s390_pv_cpu_get_handle(vcpu))
+		return -EINVAL;
+
+	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+						   get_order(uv_info.guest_cpu_stor_len));
+	if (!vcpu->arch.pv.stor_base)
+		return -ENOMEM;
+
+	/* Input */
+	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
+	uvcb.num = vcpu->arch.sie_block->icpua;
+	uvcb.state_origin = (u64)vcpu->arch.sie_block;
+	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+
+	/* Alloc Secure Instruction Data Area Designation */
+	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!vcpu->arch.sie_block->sidad) {
+		free_pages(vcpu->arch.pv.stor_base,
+			   get_order(uv_info.guest_cpu_stor_len));
+		return -ENOMEM;
+	}
+
+	cc = uv_call(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(vcpu->kvm, 3,
+		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+		     uvcb.header.rrc);
+
+	if (cc) {
+		u16 dummy;
+
+		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
+		return -EIO;
+	}
+
+	/* Output */
+	vcpu->arch.pv.handle = uvcb.cpu_handle;
+	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
+	vcpu->arch.sie_block->sdf = 2;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	return 0;
+}
+
+/* only free resources when the destroy was successful */
+static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+	vfree(kvm->arch.pv.stor_var);
+	free_pages(kvm->arch.pv.stor_base,
+		   get_order(uv_info.guest_base_stor_len));
+	memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+}
+
+static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+	unsigned long base = uv_info.guest_base_stor_len;
+	unsigned long virt = uv_info.guest_virt_var_stor_len;
+	unsigned long npages = 0, vlen = 0;
+	struct kvm_memory_slot *memslot;
+
+	kvm->arch.pv.stor_var = NULL;
+	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+	if (!kvm->arch.pv.stor_base)
+		return -ENOMEM;
+
+	/*
+	 * Calculate current guest storage for allocation of the
+	 * variable storage, which is based on the length in MB.
+	 *
+	 * Slots are sorted by GFN
+	 */
+	mutex_lock(&kvm->slots_lock);
+	memslot = kvm_memslots(kvm)->memslots;
+	npages = memslot->base_gfn + memslot->npages;
+	mutex_unlock(&kvm->slots_lock);
+
+	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+	/* Allocate variable storage */
+	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
+	vlen += uv_info.guest_virt_base_stor_len;
+	kvm->arch.pv.stor_var = vzalloc(vlen);
+	if (!kvm->arch.pv.stor_var)
+		goto out_err;
+	return 0;
+
+out_err:
+	kvm_s390_pv_dealloc_vm(kvm);
+	return -ENOMEM;
+}
+
+/* this should not fail, but if it does, we must not free the donated memory */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	/* make all pages accessible before destroying the guest */
+	s390_reset_acc(kvm->mm);
+
+	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	atomic_set(&kvm->mm->context.is_protected, 0);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
+	/* Inteded memory leak on "impossible" error */
+	if (!cc)
+		kvm_s390_pv_dealloc_vm(kvm);
+	return cc ? -EIO : 0;
+}
+
+int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_cgc uvcb = {
+		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
+		.header.len = sizeof(uvcb)
+	};
+	int cc, ret;
+	u16 dummy;
+
+	ret = kvm_s390_pv_alloc_vm(kvm);
+	if (ret)
+		return ret;
+
+	/* Inputs */
+	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+	uvcb.guest_asce = kvm->arch.gmap->asce;
+	uvcb.guest_sca = (unsigned long)kvm->arch.sca;
+	uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+	cc = uv_call_sched(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+
+	/* Outputs */
+	kvm->arch.pv.handle = uvcb.guest_handle;
+
+	if (cc) {
+		if (uvcb.header.rc & UVC_RC_NEED_DESTROY)
+			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
+		else
+			kvm_s390_pv_dealloc_vm(kvm);
+		return -EIO;
+	}
+	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+	return 0;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+			      u16 *rrc)
+{
+	struct uv_cb_ssc uvcb = {
+		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+		.header.len = sizeof(uvcb),
+		.sec_header_origin = (u64)hdr,
+		.sec_header_len = length,
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+	};
+	int cc = uv_call(0, (u64)&uvcb);
+
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+		     *rc, *rrc);
+	if (!cc)
+		atomic_set(&kvm->mm->context.is_protected, 1);
+	return cc ? -EINVAL : 0;
+}
+
+static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
+		      u64 offset, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_unp uvcb = {
+		.header.cmd = UVC_CMD_UNPACK_IMG,
+		.header.len = sizeof(uvcb),
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+		.gaddr = addr,
+		.tweak[0] = tweak,
+		.tweak[1] = offset,
+	};
+	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+
+	if (ret && ret != -EAGAIN)
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
+			     uvcb.gaddr, *rc, *rrc);
+	return ret;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+		       unsigned long tweak, u16 *rc, u16 *rrc)
+{
+	u64 offset = 0;
+	int ret = 0;
+
+	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
+		return -EINVAL;
+
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+		     addr, size);
+
+	while (offset < size) {
+		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
+		if (ret == -EAGAIN) {
+			cond_resched();
+			if (fatal_signal_pending(current))
+				break;
+			continue;
+		}
+		if (ret)
+			break;
+		addr += PAGE_SIZE;
+		offset += PAGE_SIZE;
+	}
+	if (!ret)
+		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
+	return ret;
+}
+
+int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
+{
+	struct uv_cb_cpu_set_state uvcb = {
+		.header.cmd	= UVC_CMD_CPU_SET_STATE,
+		.header.len	= sizeof(uvcb),
+		.cpu_handle	= kvm_s390_pv_cpu_get_handle(vcpu),
+		.state		= state,
+	};
+	int cc;
+
+	cc = uv_call(0, (u64)&uvcb);
+	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
+		     vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
+	if (cc)
+		return -EINVAL;
+	return 0;
+}
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 683036c..3dc921e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -288,6 +288,34 @@
 	if (!dst_vcpu)
 		return SIGP_CC_NOT_OPERATIONAL;
 
+	/*
+	 * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders
+	 * are processed asynchronously. Until the affected VCPU finishes
+	 * its work and calls back into KVM to clear the (RESTART or STOP)
+	 * interrupt, we need to return any new non-reset orders "busy".
+	 *
+	 * This is important because a single VCPU could issue:
+	 *  1) SIGP STOP $DESTINATION
+	 *  2) SIGP SENSE $DESTINATION
+	 *
+	 * If the SIGP SENSE would not be rejected as "busy", it could
+	 * return an incorrect answer as to whether the VCPU is STOPPED
+	 * or OPERATING.
+	 */
+	if (order_code != SIGP_INITIAL_CPU_RESET &&
+	    order_code != SIGP_CPU_RESET) {
+		/*
+		 * Lockless check. Both SIGP STOP and SIGP (RE)START
+		 * properly synchronize everything while processing
+		 * their orders, while the guest cannot observe a
+		 * difference when issuing other orders from two
+		 * different VCPUs.
+		 */
+		if (kvm_s390_is_stop_irq_pending(dst_vcpu) ||
+		    kvm_s390_is_restart_irq_pending(dst_vcpu))
+			return SIGP_CC_BUSY;
+	}
+
 	switch (order_code) {
 	case SIGP_SENSE:
 		vcpu->stat.instruction_sigp_sense++;
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 4f6c22d..3fbf708 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -416,11 +416,6 @@
 		memcpy((void *)((u64)scb_o + 0xc0),
 		       (void *)((u64)scb_s + 0xc0), 0xf0 - 0xc0);
 		break;
-	case ICPT_PARTEXEC:
-		/* MVPG only */
-		memcpy((void *)((u64)scb_o + 0xc0),
-		       (void *)((u64)scb_s + 0xc0), 0xd0 - 0xc0);
-		break;
 	}
 
 	if (scb_s->ihcpu != 0xffffU)
@@ -548,6 +543,7 @@
 		scb_s->ecd |= scb_o->ecd & ECD_ETOKENF;
 
 	scb_s->hpid = HPID_VSIE;
+	scb_s->cpnc = scb_o->cpnc;
 
 	prepare_ibc(vcpu, vsie_page);
 	rc = shadow_crycb(vcpu, vsie_page);
@@ -618,10 +614,10 @@
 	/* with mso/msl, the prefix lies at offset *mso* */
 	prefix += scb_s->mso;
 
-	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
 	if (!rc && (scb_s->ecb & ECB_TE))
 		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-					   prefix + PAGE_SIZE);
+					   prefix + PAGE_SIZE, NULL);
 	/*
 	 * We don't have to mprotect, we will be called for all unshadows.
 	 * SIE will detect if protection applies and trigger a validity.
@@ -912,7 +908,7 @@
 				    current->thread.gmap_addr, 1);
 
 	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-				   current->thread.gmap_addr);
+				   current->thread.gmap_addr, NULL);
 	if (rc > 0) {
 		rc = inject_fault(vcpu, rc,
 				  current->thread.gmap_addr,
@@ -934,7 +930,7 @@
 {
 	if (vsie_page->fault_addr)
 		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-				      vsie_page->fault_addr);
+				      vsie_page->fault_addr, NULL);
 	vsie_page->fault_addr = 0;
 }
 
@@ -982,6 +978,98 @@
 }
 
 /*
+ * Get a register for a nested guest.
+ * @vcpu the vcpu of the guest
+ * @vsie_page the vsie_page for the nested guest
+ * @reg the register number, the upper 4 bits are ignored.
+ * returns: the value of the register.
+ */
+static u64 vsie_get_register(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, u8 reg)
+{
+	/* no need to validate the parameter and/or perform error handling */
+	reg &= 0xf;
+	switch (reg) {
+	case 15:
+		return vsie_page->scb_s.gg15;
+	case 14:
+		return vsie_page->scb_s.gg14;
+	default:
+		return vcpu->run->s.regs.gprs[reg];
+	}
+}
+
+static int vsie_handle_mvpg(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
+{
+	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
+	unsigned long pei_dest, pei_src, src, dest, mask, prefix;
+	u64 *pei_block = &vsie_page->scb_o->mcic;
+	int edat, rc_dest, rc_src;
+	union ctlreg0 cr0;
+
+	cr0.val = vcpu->arch.sie_block->gcr[0];
+	edat = cr0.edat && test_kvm_facility(vcpu->kvm, 8);
+	mask = _kvm_s390_logical_to_effective(&scb_s->gpsw, PAGE_MASK);
+	prefix = scb_s->prefix << GUEST_PREFIX_SHIFT;
+
+	dest = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 20) & mask;
+	dest = _kvm_s390_real_to_abs(prefix, dest) + scb_s->mso;
+	src = vsie_get_register(vcpu, vsie_page, scb_s->ipb >> 16) & mask;
+	src = _kvm_s390_real_to_abs(prefix, src) + scb_s->mso;
+
+	rc_dest = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, dest, &pei_dest);
+	rc_src = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, src, &pei_src);
+	/*
+	 * Either everything went well, or something non-critical went wrong
+	 * e.g. because of a race. In either case, simply retry.
+	 */
+	if (rc_dest == -EAGAIN || rc_src == -EAGAIN || (!rc_dest && !rc_src)) {
+		retry_vsie_icpt(vsie_page);
+		return -EAGAIN;
+	}
+	/* Something more serious went wrong, propagate the error */
+	if (rc_dest < 0)
+		return rc_dest;
+	if (rc_src < 0)
+		return rc_src;
+
+	/* The only possible suppressing exception: just deliver it */
+	if (rc_dest == PGM_TRANSLATION_SPEC || rc_src == PGM_TRANSLATION_SPEC) {
+		clear_vsie_icpt(vsie_page);
+		rc_dest = kvm_s390_inject_program_int(vcpu, PGM_TRANSLATION_SPEC);
+		WARN_ON_ONCE(rc_dest);
+		return 1;
+	}
+
+	/*
+	 * Forward the PEI intercept to the guest if it was a page fault, or
+	 * also for segment and region table faults if EDAT applies.
+	 */
+	if (edat) {
+		rc_dest = rc_dest == PGM_ASCE_TYPE ? rc_dest : 0;
+		rc_src = rc_src == PGM_ASCE_TYPE ? rc_src : 0;
+	} else {
+		rc_dest = rc_dest != PGM_PAGE_TRANSLATION ? rc_dest : 0;
+		rc_src = rc_src != PGM_PAGE_TRANSLATION ? rc_src : 0;
+	}
+	if (!rc_dest && !rc_src) {
+		pei_block[0] = pei_dest;
+		pei_block[1] = pei_src;
+		return 1;
+	}
+
+	retry_vsie_icpt(vsie_page);
+
+	/*
+	 * The host has edat, and the guest does not, or it was an ASCE type
+	 * exception. The host needs to inject the appropriate DAT interrupts
+	 * into the guest.
+	 */
+	if (rc_dest)
+		return inject_fault(vcpu, rc_dest, dest, 1);
+	return inject_fault(vcpu, rc_src, src, 0);
+}
+
+/*
  * Run the vsie on a shadow scb and a shadow gmap, without any further
  * sanity checks, handling SIE faults.
  *
@@ -1000,11 +1088,6 @@
 
 	handle_last_fault(vcpu, vsie_page);
 
-	if (need_resched())
-		schedule();
-	if (test_cpu_flag(CIF_MCCK_PENDING))
-		s390_handle_mcck();
-
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 
 	/* save current guest state of bp isolation override */
@@ -1072,6 +1155,10 @@
 		if ((scb_s->ipa & 0xf000) != 0xf000)
 			scb_s->ipa += 0x1000;
 		break;
+	case ICPT_PARTEXEC:
+		if (scb_s->ipa == 0xb254)
+			rc = vsie_handle_mvpg(vcpu, vsie_page);
+		break;
 	}
 	return rc;
 }
@@ -1185,6 +1272,7 @@
 		    kvm_s390_vcpu_has_irq(vcpu, 0) ||
 		    kvm_s390_vcpu_sie_inhibited(vcpu))
 			break;
+		cond_resched();
 	}
 
 	if (rc == -EFAULT) {
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index d7c218e..6783339 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -11,3 +11,8 @@
 # Instrumenting memory accesses to __user data (in different address space)
 # produce false positives
 KASAN_SANITIZE_uaccess.o := n
+
+obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o
+CFLAGS_test_unwind.o += -fno-optimize-sibling-calls
+
+lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index d4aa107..8c0c68e 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -46,6 +46,7 @@
 	set_clock_comparator(end);
 	set_cpu_flag(CIF_IGNORE_IRQ);
 	psw_idle(&idle, psw_mask);
+	trace_hardirqs_off();
 	clear_cpu_flag(CIF_IGNORE_IRQ);
 	set_clock_comparator(S390_lowcore.clock_comparator);
 	__ctl_load(cr0, 0, 0);
diff --git a/arch/s390/lib/error-inject.c b/arch/s390/lib/error-inject.c
new file mode 100644
index 0000000..8c9d4da
--- /dev/null
+++ b/arch/s390/lib/error-inject.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <asm/ptrace.h>
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+	/*
+	 * Emulate 'br 14'. 'regs' is captured by kprobes on entry to some
+	 * kernel function.
+	 */
+	regs->psw.addr = regs->gprs[14];
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 30a7c8c..9b2dab5 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -74,7 +74,7 @@
 {
 	int owner;
 
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0040", 49)	/* NIAI 4 */
 		"	l	%0,%1\n"
 		: "=d" (owner) : "Q" (*lock) : "memory");
@@ -85,7 +85,7 @@
 {
 	int expected = old;
 
-	asm volatile(
+	asm_inline volatile(
 		ALTERNATIVE("", ".long 0xb2fa0080", 49)	/* NIAI 8 */
 		"	cs	%0,%3,%1\n"
 		: "=d" (old), "=Q" (*lock)
@@ -242,7 +242,6 @@
 
 void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
-	/* Use classic spinlocks + niai if the steal time is >= 10% */
 	if (test_cpu_flag(CIF_DEDICATED_CPU))
 		arch_spin_lock_queued(lp);
 	else
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index 0e30e6e..db4e539 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -246,14 +246,13 @@
 #ifdef __HAVE_ARCH_STRRCHR
 char *strrchr(const char *s, int c)
 {
-       size_t len = __strend(s) - s;
+	ssize_t len = __strend(s) - s;
 
-       if (len)
-	       do {
-		       if (s[len] == (char) c)
-			       return (char *) s + len;
-	       } while (--len > 0);
-       return NULL;
+	do {
+		if (s[len] == (char)c)
+			return (char *)s + len;
+	} while (--len >= 0);
+	return NULL;
 }
 EXPORT_SYMBOL(strrchr);
 #endif
@@ -333,7 +332,7 @@
  * memcmp - Compare two areas of memory
  * @s1: One area of memory
  * @s2: Another area of memory
- * @count: The size of the area.
+ * @n: The size of the area.
  */
 #ifdef __HAVE_ARCH_MEMCMP
 int memcmp(const void *s1, const void *s2, size_t n)
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
new file mode 100644
index 0000000..b0b67e6
--- /dev/null
+++ b/arch/s390/lib/test_unwind.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test module for unwind_for_each_frame
+ */
+
+#define pr_fmt(fmt) "test_unwind: " fmt
+#include <asm/unwind.h>
+#include <linux/completion.h>
+#include <linux/kallsyms.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <linux/wait.h>
+#include <asm/irq.h>
+#include <asm/delay.h>
+
+#define BT_BUF_SIZE (PAGE_SIZE * 4)
+
+/*
+ * To avoid printk line limit split backtrace by lines
+ */
+static void print_backtrace(char *bt)
+{
+	char *p;
+
+	while (true) {
+		p = strsep(&bt, "\n");
+		if (!p)
+			break;
+		pr_err("%s\n", p);
+	}
+}
+
+/*
+ * Calls unwind_for_each_frame(task, regs, sp) and verifies that the result
+ * contains unwindme_func2 followed by unwindme_func1.
+ */
+static noinline int test_unwind(struct task_struct *task, struct pt_regs *regs,
+				unsigned long sp)
+{
+	int frame_count, prev_is_func2, seen_func2_func1;
+	const int max_frames = 128;
+	struct unwind_state state;
+	size_t bt_pos = 0;
+	int ret = 0;
+	char *bt;
+
+	bt = kmalloc(BT_BUF_SIZE, GFP_ATOMIC);
+	if (!bt) {
+		pr_err("failed to allocate backtrace buffer\n");
+		return -ENOMEM;
+	}
+	/* Unwind. */
+	frame_count = 0;
+	prev_is_func2 = 0;
+	seen_func2_func1 = 0;
+	unwind_for_each_frame(&state, task, regs, sp) {
+		unsigned long addr = unwind_get_return_address(&state);
+		char sym[KSYM_SYMBOL_LEN];
+
+		if (frame_count++ == max_frames)
+			break;
+		if (state.reliable && !addr) {
+			pr_err("unwind state reliable but addr is 0\n");
+			kfree(bt);
+			return -EINVAL;
+		}
+		sprint_symbol(sym, addr);
+		if (bt_pos < BT_BUF_SIZE) {
+			bt_pos += snprintf(bt + bt_pos, BT_BUF_SIZE - bt_pos,
+					   state.reliable ? " [%-7s%px] %pSR\n" :
+							    "([%-7s%px] %pSR)\n",
+					   stack_type_name(state.stack_info.type),
+					   (void *)state.sp, (void *)state.ip);
+			if (bt_pos >= BT_BUF_SIZE)
+				pr_err("backtrace buffer is too small\n");
+		}
+		frame_count += 1;
+		if (prev_is_func2 && str_has_prefix(sym, "unwindme_func1"))
+			seen_func2_func1 = 1;
+		prev_is_func2 = str_has_prefix(sym, "unwindme_func2");
+	}
+
+	/* Check the results. */
+	if (unwind_error(&state)) {
+		pr_err("unwind error\n");
+		ret = -EINVAL;
+	}
+	if (!seen_func2_func1) {
+		pr_err("unwindme_func2 and unwindme_func1 not found\n");
+		ret = -EINVAL;
+	}
+	if (frame_count == max_frames) {
+		pr_err("Maximum number of frames exceeded\n");
+		ret = -EINVAL;
+	}
+	if (ret)
+		print_backtrace(bt);
+	kfree(bt);
+	return ret;
+}
+
+/* State of the task being unwound. */
+struct unwindme {
+	int flags;
+	int ret;
+	struct task_struct *task;
+	struct completion task_ready;
+	wait_queue_head_t task_wq;
+	unsigned long sp;
+};
+
+static struct unwindme *unwindme;
+
+/* Values of unwindme.flags. */
+#define UWM_DEFAULT		0x0
+#define UWM_THREAD		0x1	/* Unwind a separate task. */
+#define UWM_REGS		0x2	/* Pass regs to test_unwind(). */
+#define UWM_SP			0x4	/* Pass sp to test_unwind(). */
+#define UWM_CALLER		0x8	/* Unwind starting from caller. */
+#define UWM_SWITCH_STACK	0x10	/* Use CALL_ON_STACK. */
+#define UWM_IRQ			0x20	/* Unwind from irq context. */
+#define UWM_PGM			0x40	/* Unwind from program check handler. */
+
+static __always_inline unsigned long get_psw_addr(void)
+{
+	unsigned long psw_addr;
+
+	asm volatile(
+		"basr	%[psw_addr],0\n"
+		: [psw_addr] "=d" (psw_addr));
+	return psw_addr;
+}
+
+#ifdef CONFIG_KPROBES
+static int pgm_pre_handler(struct kprobe *p, struct pt_regs *regs)
+{
+	struct unwindme *u = unwindme;
+
+	u->ret = test_unwind(NULL, (u->flags & UWM_REGS) ? regs : NULL,
+			     (u->flags & UWM_SP) ? u->sp : 0);
+	return 0;
+}
+#endif
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func4(struct unwindme *u)
+{
+	if (!(u->flags & UWM_CALLER))
+		u->sp = current_frame_address();
+	if (u->flags & UWM_THREAD) {
+		complete(&u->task_ready);
+		wait_event(u->task_wq, kthread_should_park());
+		kthread_parkme();
+		return 0;
+#ifdef CONFIG_KPROBES
+	} else if (u->flags & UWM_PGM) {
+		struct kprobe kp;
+		int ret;
+
+		unwindme = u;
+		memset(&kp, 0, sizeof(kp));
+		kp.symbol_name = "do_report_trap";
+		kp.pre_handler = pgm_pre_handler;
+		ret = register_kprobe(&kp);
+		if (ret < 0) {
+			pr_err("register_kprobe failed %d\n", ret);
+			return -EINVAL;
+		}
+
+		/*
+		 * Trigger operation exception; use insn notation to bypass
+		 * llvm's integrated assembler sanity checks.
+		 */
+		asm volatile(
+			"	.insn	e,0x0000\n"	/* illegal opcode */
+			"0:	nopr	%%r7\n"
+			EX_TABLE(0b, 0b)
+			:);
+
+		unregister_kprobe(&kp);
+		unwindme = NULL;
+		return u->ret;
+#endif
+	} else {
+		struct pt_regs regs;
+
+		memset(&regs, 0, sizeof(regs));
+		regs.psw.addr = get_psw_addr();
+		regs.gprs[15] = current_stack_pointer();
+		return test_unwind(NULL,
+				   (u->flags & UWM_REGS) ? &regs : NULL,
+				   (u->flags & UWM_SP) ? u->sp : 0);
+	}
+}
+
+/* This function may or may not appear in the backtrace. */
+static noinline int unwindme_func3(struct unwindme *u)
+{
+	u->sp = current_frame_address();
+	return unwindme_func4(u);
+}
+
+/* This function must appear in the backtrace. */
+static noinline int unwindme_func2(struct unwindme *u)
+{
+	unsigned long flags;
+	int rc;
+
+	if (u->flags & UWM_SWITCH_STACK) {
+		local_irq_save(flags);
+		local_mcck_disable();
+		rc = CALL_ON_STACK(unwindme_func3, S390_lowcore.nodat_stack, 1, u);
+		local_mcck_enable();
+		local_irq_restore(flags);
+		return rc;
+	} else {
+		return unwindme_func3(u);
+	}
+}
+
+/* This function must follow unwindme_func2 in the backtrace. */
+static noinline int unwindme_func1(void *u)
+{
+	return unwindme_func2((struct unwindme *)u);
+}
+
+static void unwindme_irq_handler(struct ext_code ext_code,
+				       unsigned int param32,
+				       unsigned long param64)
+{
+	struct unwindme *u = READ_ONCE(unwindme);
+
+	if (u && u->task == current) {
+		unwindme = NULL;
+		u->task = NULL;
+		u->ret = unwindme_func1(u);
+	}
+}
+
+static int test_unwind_irq(struct unwindme *u)
+{
+	preempt_disable();
+	if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) {
+		pr_info("Couldn't register external interrupt handler");
+		return -1;
+	}
+	u->task = current;
+	unwindme = u;
+	udelay(1);
+	unregister_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler);
+	preempt_enable();
+	return u->ret;
+}
+
+/* Spawns a task and passes it to test_unwind(). */
+static int test_unwind_task(struct unwindme *u)
+{
+	struct task_struct *task;
+	int ret;
+
+	/* Initialize thread-related fields. */
+	init_completion(&u->task_ready);
+	init_waitqueue_head(&u->task_wq);
+
+	/*
+	 * Start the task and wait until it reaches unwindme_func4() and sleeps
+	 * in (task_ready, unwind_done] range.
+	 */
+	task = kthread_run(unwindme_func1, u, "%s", __func__);
+	if (IS_ERR(task)) {
+		pr_err("kthread_run() failed\n");
+		return PTR_ERR(task);
+	}
+	/*
+	 * Make sure task reaches unwindme_func4 before parking it,
+	 * we might park it before kthread function has been executed otherwise
+	 */
+	wait_for_completion(&u->task_ready);
+	kthread_park(task);
+	/* Unwind. */
+	ret = test_unwind(task, NULL, (u->flags & UWM_SP) ? u->sp : 0);
+	kthread_stop(task);
+	return ret;
+}
+
+static int test_unwind_flags(int flags)
+{
+	struct unwindme u;
+
+	u.flags = flags;
+	if (u.flags & UWM_THREAD)
+		return test_unwind_task(&u);
+	else if (u.flags & UWM_IRQ)
+		return test_unwind_irq(&u);
+	else
+		return unwindme_func1(&u);
+}
+
+static int test_unwind_init(void)
+{
+	int ret = 0;
+
+#define TEST(flags)							\
+do {									\
+	pr_info("[ RUN      ] " #flags "\n");				\
+	if (!test_unwind_flags((flags))) {				\
+		pr_info("[       OK ] " #flags "\n");			\
+	} else {							\
+		pr_err("[  FAILED  ] " #flags "\n");			\
+		ret = -EINVAL;						\
+	}								\
+} while (0)
+
+	TEST(UWM_DEFAULT);
+	TEST(UWM_SP);
+	TEST(UWM_REGS);
+	TEST(UWM_SWITCH_STACK);
+	TEST(UWM_SP | UWM_REGS);
+	TEST(UWM_CALLER | UWM_SP);
+	TEST(UWM_CALLER | UWM_SP | UWM_REGS);
+	TEST(UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
+	TEST(UWM_THREAD);
+	TEST(UWM_THREAD | UWM_SP);
+	TEST(UWM_THREAD | UWM_CALLER | UWM_SP);
+	TEST(UWM_IRQ);
+	TEST(UWM_IRQ | UWM_SWITCH_STACK);
+	TEST(UWM_IRQ | UWM_SP);
+	TEST(UWM_IRQ | UWM_REGS);
+	TEST(UWM_IRQ | UWM_SP | UWM_REGS);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS);
+	TEST(UWM_IRQ | UWM_CALLER | UWM_SP | UWM_REGS | UWM_SWITCH_STACK);
+#ifdef CONFIG_KPROBES
+	TEST(UWM_PGM);
+	TEST(UWM_PGM | UWM_SP);
+	TEST(UWM_PGM | UWM_REGS);
+	TEST(UWM_PGM | UWM_SP | UWM_REGS);
+#endif
+#undef TEST
+
+	return ret;
+}
+
+static void test_unwind_exit(void)
+{
+}
+
+module_init(test_unwind_init);
+module_exit(test_unwind_exit);
+MODULE_LICENSE("GPL");
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 3175413..cd67e94 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -8,7 +8,7 @@
 
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
-obj-$(CONFIG_S390_PTDUMP)	+= dump_pagetables.o
+obj-$(CONFIG_PTDUMP_CORE)	+= dump_pagetables.o
 obj-$(CONFIG_PGSTE)		+= gmap.o
 
 KASAN_SANITIZE_kasan_init.o	:= n
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index a51c892..1141c8d 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -19,10 +19,8 @@
 #include <linux/swap.h>
 #include <linux/kthread.h>
 #include <linux/oom.h>
-#include <linux/suspend.h>
 #include <linux/uaccess.h>
 
-#include <asm/pgalloc.h>
 #include <asm/diag.h>
 
 #ifdef CONFIG_CMM_IUCV
@@ -49,7 +47,6 @@
 static volatile long cmm_timed_pages_target;
 static long cmm_timeout_pages;
 static long cmm_timeout_seconds;
-static int cmm_suspended;
 
 static struct cmm_page_array *cmm_page_list;
 static struct cmm_page_array *cmm_timed_page_list;
@@ -151,9 +148,9 @@
 
 	while (1) {
 		rc = wait_event_interruptible(cmm_thread_wait,
-			(!cmm_suspended && (cmm_pages != cmm_pages_target ||
-			 cmm_timed_pages != cmm_timed_pages_target)) ||
-			 kthread_should_stop());
+			cmm_pages != cmm_pages_target ||
+			cmm_timed_pages != cmm_timed_pages_target ||
+			kthread_should_stop());
 		if (kthread_should_stop() || rc == -ERESTARTSYS) {
 			cmm_pages_target = cmm_pages;
 			cmm_timed_pages_target = cmm_timed_pages;
@@ -191,7 +188,7 @@
 			del_timer(&cmm_timer);
 		return;
 	}
-	mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ);
+	mod_timer(&cmm_timer, jiffies + msecs_to_jiffies(cmm_timeout_seconds * MSEC_PER_SEC));
 }
 
 static void cmm_timer_fn(struct timer_list *unused)
@@ -247,7 +244,7 @@
 }
 
 static int cmm_pages_handler(struct ctl_table *ctl, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
+			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	long nr = cmm_get_pages();
 	struct ctl_table ctl_entry = {
@@ -266,7 +263,7 @@
 }
 
 static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
-				   void __user *buffer, size_t *lenp,
+				   void *buffer, size_t *lenp,
 				   loff_t *ppos)
 {
 	long nr = cmm_get_timed_pages();
@@ -286,7 +283,7 @@
 }
 
 static int cmm_timeout_handler(struct ctl_table *ctl, int write,
-			       void __user *buffer, size_t *lenp, loff_t *ppos)
+			       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[64], *p;
 	long nr, seconds;
@@ -299,8 +296,7 @@
 
 	if (write) {
 		len = min(*lenp, sizeof(buf));
-		if (copy_from_user(buf, buffer, len))
-			return -EFAULT;
+		memcpy(buf, buffer, len);
 		buf[len - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
 		nr = simple_strtoul(p, &p, 0);
@@ -313,8 +309,7 @@
 			      cmm_timeout_pages, cmm_timeout_seconds);
 		if (len > *lenp)
 			len = *lenp;
-		if (copy_to_user(buffer, buf, len))
-			return -EFAULT;
+		memcpy(buffer, buf, len);
 		*lenp = len;
 		*ppos += len;
 	}
@@ -390,38 +385,6 @@
 
 static struct ctl_table_header *cmm_sysctl_header;
 
-static int cmm_suspend(void)
-{
-	cmm_suspended = 1;
-	cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list);
-	cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list);
-	return 0;
-}
-
-static int cmm_resume(void)
-{
-	cmm_suspended = 0;
-	cmm_kick_thread();
-	return 0;
-}
-
-static int cmm_power_event(struct notifier_block *this,
-			   unsigned long event, void *ptr)
-{
-	switch (event) {
-	case PM_POST_HIBERNATION:
-		return cmm_resume();
-	case PM_HIBERNATION_PREPARE:
-		return cmm_suspend();
-	default:
-		return NOTIFY_DONE;
-	}
-}
-
-static struct notifier_block cmm_power_notifier = {
-	.notifier_call = cmm_power_event,
-};
-
 static int __init cmm_init(void)
 {
 	int rc = -ENOMEM;
@@ -446,16 +409,11 @@
 	rc = register_oom_notifier(&cmm_oom_nb);
 	if (rc < 0)
 		goto out_oom_notify;
-	rc = register_pm_notifier(&cmm_power_notifier);
-	if (rc)
-		goto out_pm;
 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
 	if (!IS_ERR(cmm_thread_ptr))
 		return 0;
 
 	rc = PTR_ERR(cmm_thread_ptr);
-	unregister_pm_notifier(&cmm_power_notifier);
-out_pm:
 	unregister_oom_notifier(&cmm_oom_nb);
 out_oom_notify:
 #ifdef CONFIG_CMM_IUCV
@@ -475,7 +433,6 @@
 #ifdef CONFIG_CMM_IUCV
 	smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target);
 #endif
-	unregister_pm_notifier(&cmm_power_notifier);
 	unregister_oom_notifier(&cmm_oom_nb);
 	kthread_stop(cmm_thread_ptr);
 	del_timer_sync(&cmm_timer);
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 5d67b81..8f9ff7e 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -1,12 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/set_memory.h>
+#include <linux/ptdump.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
-#include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/kasan.h>
+#include <asm/ptdump.h>
 #include <asm/kasan.h>
 #include <asm/sections.h>
-#include <asm/pgtable.h>
 
 static unsigned long max_addr;
 
@@ -16,263 +17,234 @@
 };
 
 enum address_markers_idx {
-	IDENTITY_NR = 0,
+	IDENTITY_BEFORE_NR = 0,
+	IDENTITY_BEFORE_END_NR,
 	KERNEL_START_NR,
 	KERNEL_END_NR,
+	IDENTITY_AFTER_NR,
+	IDENTITY_AFTER_END_NR,
 #ifdef CONFIG_KASAN
 	KASAN_SHADOW_START_NR,
 	KASAN_SHADOW_END_NR,
 #endif
 	VMEMMAP_NR,
+	VMEMMAP_END_NR,
 	VMALLOC_NR,
+	VMALLOC_END_NR,
 	MODULES_NR,
+	MODULES_END_NR,
 };
 
 static struct addr_marker address_markers[] = {
-	[IDENTITY_NR]		= {0, "Identity Mapping"},
+	[IDENTITY_BEFORE_NR]	= {0, "Identity Mapping Start"},
+	[IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"},
 	[KERNEL_START_NR]	= {(unsigned long)_stext, "Kernel Image Start"},
 	[KERNEL_END_NR]		= {(unsigned long)_end, "Kernel Image End"},
+	[IDENTITY_AFTER_NR]	= {(unsigned long)_end, "Identity Mapping Start"},
+	[IDENTITY_AFTER_END_NR]	= {0, "Identity Mapping End"},
 #ifdef CONFIG_KASAN
 	[KASAN_SHADOW_START_NR]	= {KASAN_SHADOW_START, "Kasan Shadow Start"},
 	[KASAN_SHADOW_END_NR]	= {KASAN_SHADOW_END, "Kasan Shadow End"},
 #endif
-	[VMEMMAP_NR]		= {0, "vmemmap Area"},
-	[VMALLOC_NR]		= {0, "vmalloc Area"},
-	[MODULES_NR]		= {0, "Modules Area"},
+	[VMEMMAP_NR]		= {0, "vmemmap Area Start"},
+	[VMEMMAP_END_NR]	= {0, "vmemmap Area End"},
+	[VMALLOC_NR]		= {0, "vmalloc Area Start"},
+	[VMALLOC_END_NR]	= {0, "vmalloc Area End"},
+	[MODULES_NR]		= {0, "Modules Area Start"},
+	[MODULES_END_NR]	= {0, "Modules Area End"},
 	{ -1, NULL }
 };
 
 struct pg_state {
+	struct ptdump_state ptdump;
+	struct seq_file *seq;
 	int level;
 	unsigned int current_prot;
+	bool check_wx;
+	unsigned long wx_pages;
 	unsigned long start_address;
-	unsigned long current_address;
 	const struct addr_marker *marker;
 };
 
+#define pt_dump_seq_printf(m, fmt, args...)	\
+({						\
+	struct seq_file *__m = (m);		\
+						\
+	if (__m)				\
+		seq_printf(__m, fmt, ##args);	\
+})
+
+#define pt_dump_seq_puts(m, fmt)		\
+({						\
+	struct seq_file *__m = (m);		\
+						\
+	if (__m)				\
+		seq_printf(__m, fmt);		\
+})
+
 static void print_prot(struct seq_file *m, unsigned int pr, int level)
 {
 	static const char * const level_name[] =
 		{ "ASCE", "PGD", "PUD", "PMD", "PTE" };
 
-	seq_printf(m, "%s ", level_name[level]);
+	pt_dump_seq_printf(m, "%s ", level_name[level]);
 	if (pr & _PAGE_INVALID) {
-		seq_printf(m, "I\n");
+		pt_dump_seq_printf(m, "I\n");
 		return;
 	}
-	seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
-	seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
+	pt_dump_seq_puts(m, (pr & _PAGE_PROTECT) ? "RO " : "RW ");
+	pt_dump_seq_puts(m, (pr & _PAGE_NOEXEC) ? "NX\n" : "X\n");
 }
 
-static void note_page(struct seq_file *m, struct pg_state *st,
-		     unsigned int new_prot, int level)
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
 {
-	static const char units[] = "KMGTPE";
+#ifdef CONFIG_DEBUG_WX
+	if (!st->check_wx)
+		return;
+	if (st->current_prot & _PAGE_INVALID)
+		return;
+	if (st->current_prot & _PAGE_PROTECT)
+		return;
+	if (st->current_prot & _PAGE_NOEXEC)
+		return;
+	/* The first lowcore page is currently still W+X. */
+	if (addr == PAGE_SIZE)
+		return;
+	WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
+		  (void *)st->start_address);
+	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+#endif /* CONFIG_DEBUG_WX */
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
+{
 	int width = sizeof(unsigned long) * 2;
+	static const char units[] = "KMGTPE";
 	const char *unit = units;
-	unsigned int prot, cur;
 	unsigned long delta;
+	struct pg_state *st;
+	struct seq_file *m;
+	unsigned int prot;
 
-	/*
-	 * If we have a "break" in the series, we need to flush the state
-	 * that we have now. "break" is either changing perms, levels or
-	 * address space marker.
-	 */
-	prot = new_prot;
-	cur = st->current_prot;
-
-	if (!st->level) {
-		/* First entry */
-		st->current_prot = new_prot;
+	st = container_of(pt_st, struct pg_state, ptdump);
+	m = st->seq;
+	prot = val & (_PAGE_PROTECT | _PAGE_NOEXEC);
+	if (level == 4 && (val & _PAGE_INVALID))
+		prot = _PAGE_INVALID;
+	/* For pmd_none() & friends val gets passed as zero. */
+	if (level != 4 && !val)
+		prot = _PAGE_INVALID;
+	/* Final flush from generic code. */
+	if (level == -1)
+		addr = max_addr;
+	if (st->level == -1) {
+		pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
+		st->start_address = addr;
+		st->current_prot = prot;
 		st->level = level;
-		st->marker = address_markers;
-		seq_printf(m, "---[ %s ]---\n", st->marker->name);
-	} else if (prot != cur || level != st->level ||
-		   st->current_address >= st->marker[1].start_address) {
-		/* Print the actual finished series */
-		seq_printf(m, "0x%0*lx-0x%0*lx ",
-			   width, st->start_address,
-			   width, st->current_address);
-		delta = (st->current_address - st->start_address) >> 10;
+	} else if (prot != st->current_prot || level != st->level ||
+		   addr >= st->marker[1].start_address) {
+		note_prot_wx(st, addr);
+		pt_dump_seq_printf(m, "0x%0*lx-0x%0*lx ",
+				   width, st->start_address,
+				   width, addr);
+		delta = (addr - st->start_address) >> 10;
 		while (!(delta & 0x3ff) && unit[1]) {
 			delta >>= 10;
 			unit++;
 		}
-		seq_printf(m, "%9lu%c ", delta, *unit);
+		pt_dump_seq_printf(m, "%9lu%c ", delta, *unit);
 		print_prot(m, st->current_prot, st->level);
-		while (st->current_address >= st->marker[1].start_address) {
+		while (addr >= st->marker[1].start_address) {
 			st->marker++;
-			seq_printf(m, "---[ %s ]---\n", st->marker->name);
+			pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name);
 		}
-		st->start_address = st->current_address;
-		st->current_prot = new_prot;
+		st->start_address = addr;
+		st->current_prot = prot;
 		st->level = level;
 	}
 }
 
-#ifdef CONFIG_KASAN
-static void note_kasan_early_shadow_page(struct seq_file *m,
-						struct pg_state *st)
+#ifdef CONFIG_DEBUG_WX
+void ptdump_check_wx(void)
 {
-	unsigned int prot;
+	struct pg_state st = {
+		.ptdump = {
+			.note_page = note_page,
+			.range = (struct ptdump_range[]) {
+				{.start = 0, .end = max_addr},
+				{.start = 0, .end = 0},
+			}
+		},
+		.seq = NULL,
+		.level = -1,
+		.current_prot = 0,
+		.check_wx = true,
+		.wx_pages = 0,
+		.start_address = 0,
+		.marker = (struct addr_marker[]) {
+			{ .start_address =  0, .name = NULL},
+			{ .start_address = -1, .name = NULL},
+		},
+	};
 
-	prot = pte_val(*kasan_early_shadow_pte) &
-		(_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
-	note_page(m, st, prot, 4);
-}
-#endif
-
-/*
- * The actual page table walker functions. In order to keep the
- * implementation of print_prot() short, we only check and pass
- * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region,
- * segment or page table entry is invalid or read-only.
- * After all it's just a hint that the current level being walked
- * contains an invalid or read-only entry.
- */
-static void walk_pte_level(struct seq_file *m, struct pg_state *st,
-			   pmd_t *pmd, unsigned long addr)
-{
-	unsigned int prot;
-	pte_t *pte;
-	int i;
-
-	for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
-		st->current_address = addr;
-		pte = pte_offset_kernel(pmd, addr);
-		prot = pte_val(*pte) &
-			(_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
-		note_page(m, st, prot, 4);
-		addr += PAGE_SIZE;
-	}
-}
-
-static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
-			   pud_t *pud, unsigned long addr)
-{
-	unsigned int prot;
-	pmd_t *pmd;
-	int i;
-
-#ifdef CONFIG_KASAN
-	if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_early_shadow_pmd)) {
-		note_kasan_early_shadow_page(m, st);
+	if (!MACHINE_HAS_NX)
 		return;
-	}
-#endif
-
-	pmd = pmd_offset(pud, addr);
-	for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++, pmd++) {
-		st->current_address = addr;
-		if (!pmd_none(*pmd)) {
-			if (pmd_large(*pmd)) {
-				prot = pmd_val(*pmd) &
-					(_SEGMENT_ENTRY_PROTECT |
-					 _SEGMENT_ENTRY_NOEXEC);
-				note_page(m, st, prot, 3);
-			} else
-				walk_pte_level(m, st, pmd, addr);
-		} else
-			note_page(m, st, _PAGE_INVALID, 3);
-		addr += PMD_SIZE;
-	}
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+	if (st.wx_pages)
+		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
+	else
+		pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n");
 }
+#endif /* CONFIG_DEBUG_WX */
 
-static void walk_pud_level(struct seq_file *m, struct pg_state *st,
-			   p4d_t *p4d, unsigned long addr)
-{
-	unsigned int prot;
-	pud_t *pud;
-	int i;
-
-#ifdef CONFIG_KASAN
-	if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_early_shadow_pud)) {
-		note_kasan_early_shadow_page(m, st);
-		return;
-	}
-#endif
-
-	pud = pud_offset(p4d, addr);
-	for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++, pud++) {
-		st->current_address = addr;
-		if (!pud_none(*pud))
-			if (pud_large(*pud)) {
-				prot = pud_val(*pud) &
-					(_REGION_ENTRY_PROTECT |
-					 _REGION_ENTRY_NOEXEC);
-				note_page(m, st, prot, 2);
-			} else
-				walk_pmd_level(m, st, pud, addr);
-		else
-			note_page(m, st, _PAGE_INVALID, 2);
-		addr += PUD_SIZE;
-	}
-}
-
-static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
-			   pgd_t *pgd, unsigned long addr)
-{
-	p4d_t *p4d;
-	int i;
-
-#ifdef CONFIG_KASAN
-	if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_early_shadow_p4d)) {
-		note_kasan_early_shadow_page(m, st);
-		return;
-	}
-#endif
-
-	p4d = p4d_offset(pgd, addr);
-	for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++, p4d++) {
-		st->current_address = addr;
-		if (!p4d_none(*p4d))
-			walk_pud_level(m, st, p4d, addr);
-		else
-			note_page(m, st, _PAGE_INVALID, 2);
-		addr += P4D_SIZE;
-	}
-}
-
-static void walk_pgd_level(struct seq_file *m)
-{
-	unsigned long addr = 0;
-	struct pg_state st;
-	pgd_t *pgd;
-	int i;
-
-	memset(&st, 0, sizeof(st));
-	for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
-		st.current_address = addr;
-		pgd = pgd_offset_k(addr);
-		if (!pgd_none(*pgd))
-			walk_p4d_level(m, &st, pgd, addr);
-		else
-			note_page(m, &st, _PAGE_INVALID, 1);
-		addr += PGDIR_SIZE;
-		cond_resched();
-	}
-	/* Flush out the last page */
-	st.current_address = max_addr;
-	note_page(m, &st, 0, 0);
-}
-
+#ifdef CONFIG_PTDUMP_DEBUGFS
 static int ptdump_show(struct seq_file *m, void *v)
 {
-	walk_pgd_level(m);
+	struct pg_state st = {
+		.ptdump = {
+			.note_page = note_page,
+			.range = (struct ptdump_range[]) {
+				{.start = 0, .end = max_addr},
+				{.start = 0, .end = 0},
+			}
+		},
+		.seq = m,
+		.level = -1,
+		.current_prot = 0,
+		.check_wx = false,
+		.wx_pages = 0,
+		.start_address = 0,
+		.marker = address_markers,
+	};
+
+	get_online_mems();
+	mutex_lock(&cpa_mutex);
+	ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+	mutex_unlock(&cpa_mutex);
+	put_online_mems();
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
 
-static int ptdump_open(struct inode *inode, struct file *filp)
+/*
+ * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple
+ * insertion sort to preserve the original order of markers with the same
+ * start address.
+ */
+static void sort_address_markers(void)
 {
-	return single_open(filp, ptdump_show, NULL);
-}
+	struct addr_marker tmp;
+	int i, j;
 
-static const struct file_operations ptdump_fops = {
-	.open		= ptdump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+	for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) {
+		tmp = address_markers[i];
+		for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--)
+			address_markers[j + 1] = address_markers[j];
+		address_markers[j + 1] = tmp;
+	}
+}
 
 static int pt_dump_init(void)
 {
@@ -283,10 +255,17 @@
 	 */
 	max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
 	max_addr = 1UL << (max_addr * 11 + 31);
+	address_markers[IDENTITY_AFTER_END_NR].start_address = memory_end;
 	address_markers[MODULES_NR].start_address = MODULES_VADDR;
+	address_markers[MODULES_END_NR].start_address = MODULES_END;
 	address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
+	address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
 	address_markers[VMALLOC_NR].start_address = VMALLOC_START;
+	address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+	sort_address_markers();
+#ifdef CONFIG_PTDUMP_DEBUGFS
 	debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+#endif /* CONFIG_PTDUMP_DEBUGFS */
 	return 0;
 }
 device_initcall(pt_dump_init);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index fd0dae9..5060956 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -20,9 +20,9 @@
 #include <linux/ctype.h>
 #include <linux/ioport.h>
 #include <linux/refcount.h>
+#include <linux/pgtable.h>
 #include <asm/diag.h>
 #include <asm/page.h>
-#include <asm/pgtable.h>
 #include <asm/ebcdic.h>
 #include <asm/errno.h>
 #include <asm/extmem.h>
@@ -313,15 +313,10 @@
 		goto out_free;
 	}
 
-	rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
-
-	if (rc)
-		goto out_free;
-
 	seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 	if (seg->res == NULL) {
 		rc = -ENOMEM;
-		goto out_shared;
+		goto out_free;
 	}
 	seg->res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
 	seg->res->start = seg->start_addr;
@@ -335,12 +330,17 @@
 	if (rc == SEG_TYPE_SC ||
 	    ((rc == SEG_TYPE_SR || rc == SEG_TYPE_ER) && !do_nonshared))
 		seg->res->flags |= IORESOURCE_READONLY;
+
+	/* Check for overlapping resources before adding the mapping. */
 	if (request_resource(&iomem_resource, seg->res)) {
 		rc = -EBUSY;
-		kfree(seg->res);
-		goto out_shared;
+		goto out_free_resource;
 	}
 
+	rc = vmem_add_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
+	if (rc)
+		goto out_resource;
+
 	if (do_nonshared)
 		diag_cc = dcss_diag(&loadnsr_scode, seg->dcss_name,
 				&start_addr, &end_addr);
@@ -351,14 +351,14 @@
 		dcss_diag(&purgeseg_scode, seg->dcss_name,
 				&dummy, &dummy);
 		rc = diag_cc;
-		goto out_resource;
+		goto out_mapping;
 	}
 	if (diag_cc > 1) {
 		pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr);
 		rc = dcss_diag_translate_rc(end_addr);
 		dcss_diag(&purgeseg_scode, seg->dcss_name,
 				&dummy, &dummy);
-		goto out_resource;
+		goto out_mapping;
 	}
 	seg->start_addr = start_addr;
 	seg->end = end_addr;
@@ -377,11 +377,12 @@
 			(void*) seg->end, segtype_string[seg->vm_segtype]);
 	}
 	goto out;
+ out_mapping:
+	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
  out_resource:
 	release_resource(seg->res);
+ out_free_resource:
 	kfree(seg->res);
- out_shared:
-	vmem_remove_mapping(seg->start_addr, seg->end - seg->start_addr + 1);
  out_free:
 	kfree(seg);
  out:
@@ -400,8 +401,7 @@
  * -EIO     : could not perform query or load diagnose
  * -ENOENT  : no such segment
  * -EOPNOTSUPP: multi-part segment cannot be used with linux
- * -ENOSPC  : segment cannot be used (overlaps with storage)
- * -EBUSY   : segment can temporarily not be used (overlaps with dcss)
+ * -EBUSY   : segment cannot be used (overlaps with dcss or storage)
  * -ERANGE  : segment cannot be used (exceeds kernel mapping range)
  * -EPERM   : segment is currently loaded with incompatible permissions
  * -ENOMEM  : out of memory
@@ -626,10 +626,6 @@
 		pr_err("DCSS %s has multiple page ranges and cannot be "
 		       "loaded or queried\n", seg_name);
 		break;
-	case -ENOSPC:
-		pr_err("DCSS %s overlaps with used storage and cannot "
-		       "be loaded\n", seg_name);
-		break;
 	case -EBUSY:
 		pr_err("%s needs used memory resources and cannot be "
 		       "loaded or queried\n", seg_name);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 7b0bb47..ed517fa 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -33,22 +33,22 @@
 #include <linux/hugetlb.h>
 #include <asm/asm-offsets.h>
 #include <asm/diag.h>
-#include <asm/pgtable.h>
 #include <asm/gmap.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
 #include <asm/facility.h>
+#include <asm/uv.h>
 #include "../kernel/entry.h"
 
 #define __FAIL_ADDR_MASK -4096L
 #define __SUBCODE_MASK 0x0600
 #define __PF_RES_FIELD 0x8000000000000000ULL
 
-#define VM_FAULT_BADCONTEXT	0x010000
-#define VM_FAULT_BADMAP		0x020000
-#define VM_FAULT_BADACCESS	0x040000
-#define VM_FAULT_SIGNAL		0x080000
-#define VM_FAULT_PFAULT		0x100000
+#define VM_FAULT_BADCONTEXT	((__force vm_fault_t) 0x010000)
+#define VM_FAULT_BADMAP		((__force vm_fault_t) 0x020000)
+#define VM_FAULT_BADACCESS	((__force vm_fault_t) 0x040000)
+#define VM_FAULT_SIGNAL		((__force vm_fault_t) 0x080000)
+#define VM_FAULT_PFAULT		((__force vm_fault_t) 0x100000)
 
 enum fault_type {
 	KERNEL_FAULT,
@@ -105,7 +105,7 @@
 {
 	unsigned long dummy;
 
-	return probe_kernel_address((unsigned long *)p, dummy);
+	return get_kernel_nofault(dummy, (unsigned long *)p);
 }
 
 static void dump_pagetable(unsigned long asce, unsigned long address)
@@ -122,7 +122,7 @@
 		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_REGION2:
 		table += (address & _REGION2_INDEX) >> _REGION2_SHIFT;
 		if (bad_address(table))
@@ -131,7 +131,7 @@
 		if (*table & _REGION_ENTRY_INVALID)
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_REGION3:
 		table += (address & _REGION3_INDEX) >> _REGION3_SHIFT;
 		if (bad_address(table))
@@ -140,7 +140,7 @@
 		if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
 			goto out;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_SEGMENT:
 		table += (address & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 		if (bad_address(table))
@@ -255,10 +255,8 @@
 
 	/* Are we prepared to handle this kernel fault?  */
 	fixup = s390_search_extables(regs->psw.addr);
-	if (fixup) {
-		regs->psw.addr = extable_fixup(fixup);
+	if (fixup && ex_handle(fixup, regs))
 		return;
-	}
 
 	/*
 	 * Oops. The kernel tried to access some bad page. We'll have to
@@ -327,7 +325,7 @@
 	case VM_FAULT_BADACCESS:
 		if (access == VM_EXEC && signal_return(regs) == 0)
 			break;
-		/* fallthrough */
+		fallthrough;
 	case VM_FAULT_BADMAP:
 		/* Bad memory access. Check if it is kernel or user space. */
 		if (user_mode(regs)) {
@@ -337,9 +335,8 @@
 			do_sigsegv(regs, si_code);
 			break;
 		}
-		/* fallthrough */
+		fallthrough;
 	case VM_FAULT_BADCONTEXT:
-		/* fallthrough */
 	case VM_FAULT_PFAULT:
 		do_no_context(regs);
 		break;
@@ -377,7 +374,7 @@
  * routines.
  *
  * interruption code (int_code):
- *   04       Protection           ->  Write-Protection  (suprression)
+ *   04       Protection           ->  Write-Protection  (suppression)
  *   10       Segment translation  ->  Not present       (nullification)
  *   11       Page translation     ->  Not present       (nullification)
  *   3b       Region third trans.  ->  Not present       (nullification)
@@ -429,12 +426,12 @@
 
 	address = trans_exc_code & __FAIL_ADDR_MASK;
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-	flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+	flags = FAULT_FLAG_DEFAULT;
 	if (user_mode(regs))
 		flags |= FAULT_FLAG_USER;
 	if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
 		flags |= FAULT_FLAG_WRITE;
-	down_read(&mm->mmap_sem);
+	mmap_read_lock(mm);
 
 	gmap = NULL;
 	if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
@@ -479,9 +476,8 @@
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(vma, address, flags);
-	/* No reason to continue if interrupted by SIGKILL. */
-	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+	fault = handle_mm_fault(vma, address, flags, regs);
+	if (fault_signal_pending(fault, regs)) {
 		fault = VM_FAULT_SIGNAL;
 		if (flags & FAULT_FLAG_RETRY_NOWAIT)
 			goto out_up;
@@ -490,36 +486,19 @@
 	if (unlikely(fault & VM_FAULT_ERROR))
 		goto out_up;
 
-	/*
-	 * Major/minor page fault accounting is only done on the
-	 * initial attempt. If we go through a retry, it is extremely
-	 * likely that the page will be found in page cache at that point.
-	 */
 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
-		if (fault & VM_FAULT_MAJOR) {
-			tsk->maj_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-				      regs, address);
-		} else {
-			tsk->min_flt++;
-			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
-				      regs, address);
-		}
 		if (fault & VM_FAULT_RETRY) {
 			if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
 			    (flags & FAULT_FLAG_RETRY_NOWAIT)) {
 				/* FAULT_FLAG_RETRY_NOWAIT has been set,
-				 * mmap_sem has not been released */
+				 * mmap_lock has not been released */
 				current->thread.gmap_pfault = 1;
 				fault = VM_FAULT_PFAULT;
 				goto out_up;
 			}
-			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
-			 * of starvation. */
-			flags &= ~(FAULT_FLAG_ALLOW_RETRY |
-				   FAULT_FLAG_RETRY_NOWAIT);
+			flags &= ~FAULT_FLAG_RETRY_NOWAIT;
 			flags |= FAULT_FLAG_TRIED;
-			down_read(&mm->mmap_sem);
+			mmap_read_lock(mm);
 			goto retry;
 		}
 	}
@@ -537,7 +516,7 @@
 	}
 	fault = 0;
 out_up:
-	up_read(&mm->mmap_sem);
+	mmap_read_unlock(mm);
 out:
 	return fault;
 }
@@ -584,7 +563,7 @@
 	int access;
 	vm_fault_t fault;
 
-	access = VM_READ | VM_EXEC | VM_WRITE;
+	access = VM_ACCESS_FLAGS;
 	fault = do_exception(regs, access);
 	if (unlikely(fault))
 		do_fault_error(regs, access, fault);
@@ -816,3 +795,124 @@
 early_initcall(pfault_irq_init);
 
 #endif /* CONFIG_PFAULT */
+
+#if IS_ENABLED(CONFIG_PGSTE)
+void do_secure_storage_access(struct pt_regs *regs)
+{
+	unsigned long addr = regs->int_parm_long & __FAIL_ADDR_MASK;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm;
+	struct page *page;
+	int rc;
+
+	/*
+	 * bit 61 tells us if the address is valid, if it's not we
+	 * have a major problem and should stop the kernel or send a
+	 * SIGSEGV to the process. Unfortunately bit 61 is not
+	 * reliable without the misc UV feature so we need to check
+	 * for that as well.
+	 */
+	if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+	    !test_bit_inv(61, &regs->int_parm_long)) {
+		/*
+		 * When this happens, userspace did something that it
+		 * was not supposed to do, e.g. branching into secure
+		 * memory. Trigger a segmentation fault.
+		 */
+		if (user_mode(regs)) {
+			send_sig(SIGSEGV, current, 0);
+			return;
+		}
+
+		/*
+		 * The kernel should never run into this case and we
+		 * have no way out of this situation.
+		 */
+		panic("Unexpected PGM 0x3d with TEID bit 61=0");
+	}
+
+	switch (get_fault_type(regs)) {
+	case USER_FAULT:
+		mm = current->mm;
+		mmap_read_lock(mm);
+		vma = find_vma(mm, addr);
+		if (!vma) {
+			mmap_read_unlock(mm);
+			do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+			break;
+		}
+		page = follow_page(vma, addr, FOLL_WRITE | FOLL_GET);
+		if (IS_ERR_OR_NULL(page)) {
+			mmap_read_unlock(mm);
+			break;
+		}
+		if (arch_make_page_accessible(page))
+			send_sig(SIGSEGV, current, 0);
+		put_page(page);
+		mmap_read_unlock(mm);
+		break;
+	case KERNEL_FAULT:
+		page = phys_to_page(addr);
+		if (unlikely(!try_get_page(page)))
+			break;
+		rc = arch_make_page_accessible(page);
+		put_page(page);
+		if (rc)
+			BUG();
+		break;
+	case VDSO_FAULT:
+	case GMAP_FAULT:
+	default:
+		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+		WARN_ON_ONCE(1);
+	}
+}
+NOKPROBE_SYMBOL(do_secure_storage_access);
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+	unsigned long gaddr = regs->int_parm_long & __FAIL_ADDR_MASK;
+	struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+
+	if (get_fault_type(regs) != GMAP_FAULT) {
+		do_fault_error(regs, VM_READ | VM_WRITE, VM_FAULT_BADMAP);
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	if (gmap_convert_to_secure(gmap, gaddr) == -EINVAL)
+		send_sig(SIGSEGV, current, 0);
+}
+NOKPROBE_SYMBOL(do_non_secure_storage_access);
+
+void do_secure_storage_violation(struct pt_regs *regs)
+{
+	/*
+	 * Either KVM messed up the secure guest mapping or the same
+	 * page is mapped into multiple secure guests.
+	 *
+	 * This exception is only triggered when a guest 2 is running
+	 * and can therefore never occur in kernel context.
+	 */
+	printk_ratelimited(KERN_WARNING
+			   "Secure storage violation in task: %s, pid %d\n",
+			   current->comm, current->pid);
+	send_sig(SIGSEGV, current, 0);
+}
+
+#else
+void do_secure_storage_access(struct pt_regs *regs)
+{
+	default_trap_handler(regs);
+}
+
+void do_non_secure_storage_access(struct pt_regs *regs)
+{
+	default_trap_handler(regs);
+}
+
+void do_secure_storage_violation(struct pt_regs *regs)
+{
+	default_trap_handler(regs);
+}
+#endif
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 4fa7a56..f2d19d4 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -17,8 +17,8 @@
 #include <linux/swapops.h>
 #include <linux/ksm.h>
 #include <linux/mman.h>
+#include <linux/pgtable.h>
 
-#include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/gmap.h>
 #include <asm/tlb.h>
@@ -300,7 +300,7 @@
 EXPORT_SYMBOL_GPL(gmap_get_enabled);
 
 /*
- * gmap_alloc_table is assumed to be called with mmap_sem held
+ * gmap_alloc_table is assumed to be called with mmap_lock held
  */
 static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 			    unsigned long init, unsigned long gaddr)
@@ -405,10 +405,10 @@
 		return -EINVAL;
 
 	flush = 0;
-	down_write(&gmap->mm->mmap_sem);
+	mmap_write_lock(gmap->mm);
 	for (off = 0; off < len; off += PMD_SIZE)
 		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
-	up_write(&gmap->mm->mmap_sem);
+	mmap_write_unlock(gmap->mm);
 	if (flush)
 		gmap_flush_tlb(gmap);
 	return 0;
@@ -438,7 +438,7 @@
 		return -EINVAL;
 
 	flush = 0;
-	down_write(&gmap->mm->mmap_sem);
+	mmap_write_lock(gmap->mm);
 	for (off = 0; off < len; off += PMD_SIZE) {
 		/* Remove old translation */
 		flush |= __gmap_unmap_by_gaddr(gmap, to + off);
@@ -448,7 +448,7 @@
 				      (void *) from + off))
 			break;
 	}
-	up_write(&gmap->mm->mmap_sem);
+	mmap_write_unlock(gmap->mm);
 	if (flush)
 		gmap_flush_tlb(gmap);
 	if (off >= len)
@@ -466,7 +466,7 @@
  * Returns user space address which corresponds to the guest address or
  * -EFAULT if no such mapping exists.
  * This function does not establish potentially missing page table entries.
- * The mmap_sem of the mm that belongs to the address space must be held
+ * The mmap_lock of the mm that belongs to the address space must be held
  * when this function gets called.
  *
  * Note: Can also be called for shadow gmaps.
@@ -495,9 +495,9 @@
 {
 	unsigned long rc;
 
-	down_read(&gmap->mm->mmap_sem);
+	mmap_read_lock(gmap->mm);
 	rc = __gmap_translate(gmap, gaddr);
-	up_read(&gmap->mm->mmap_sem);
+	mmap_read_unlock(gmap->mm);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_translate);
@@ -534,7 +534,7 @@
  *
  * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
  * if the vm address is already mapped to a different guest segment.
- * The mmap_sem of the mm that belongs to the address space must be held
+ * The mmap_lock of the mm that belongs to the address space must be held
  * when this function gets called.
  */
 int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
@@ -640,7 +640,7 @@
 	int rc;
 	bool unlocked;
 
-	down_read(&gmap->mm->mmap_sem);
+	mmap_read_lock(gmap->mm);
 
 retry:
 	unlocked = false;
@@ -649,13 +649,13 @@
 		rc = vmaddr;
 		goto out_up;
 	}
-	if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
+	if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
 			     &unlocked)) {
 		rc = -EFAULT;
 		goto out_up;
 	}
 	/*
-	 * In the case that fixup_user_fault unlocked the mmap_sem during
+	 * In the case that fixup_user_fault unlocked the mmap_lock during
 	 * faultin redo __gmap_translate to not race with a map/unmap_segment.
 	 */
 	if (unlocked)
@@ -663,13 +663,13 @@
 
 	rc = __gmap_link(gmap, gaddr, vmaddr);
 out_up:
-	up_read(&gmap->mm->mmap_sem);
+	mmap_read_unlock(gmap->mm);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_fault);
 
 /*
- * this function is assumed to be called with mmap_sem held
+ * this function is assumed to be called with mmap_lock held
  */
 void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
 {
@@ -684,9 +684,10 @@
 		vmaddr |= gaddr & ~PMD_MASK;
 		/* Get pointer to the page table entry */
 		ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
-		if (likely(ptep))
+		if (likely(ptep)) {
 			ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
-		pte_unmap_unlock(ptep, ptl);
+			pte_unmap_unlock(ptep, ptl);
+		}
 	}
 }
 EXPORT_SYMBOL_GPL(__gmap_zap);
@@ -696,7 +697,7 @@
 	unsigned long gaddr, vmaddr, size;
 	struct vm_area_struct *vma;
 
-	down_read(&gmap->mm->mmap_sem);
+	mmap_read_lock(gmap->mm);
 	for (gaddr = from; gaddr < to;
 	     gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
 		/* Find the vm address for the guest address */
@@ -719,7 +720,7 @@
 		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
 		zap_page_range(vma, vmaddr, size);
 	}
-	up_read(&gmap->mm->mmap_sem);
+	mmap_read_unlock(gmap->mm);
 }
 EXPORT_SYMBOL_GPL(gmap_discard);
 
@@ -788,19 +789,19 @@
 					     unsigned long gaddr, int level)
 {
 	const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
-	unsigned long *table;
+	unsigned long *table = gmap->table;
 
-	if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
-		return NULL;
 	if (gmap_is_shadow(gmap) && gmap->removed)
 		return NULL;
 
+	if (WARN_ON_ONCE(level > (asce_type >> 2) + 1))
+		return NULL;
+
 	if (asce_type != _ASCE_TYPE_REGION1 &&
 	    gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
 		return NULL;
 
-	table = gmap->table;
-	switch (gmap->asce & _ASCE_TYPE_MASK) {
+	switch (asce_type) {
 	case _ASCE_TYPE_REGION1:
 		table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT;
 		if (level == 4)
@@ -808,7 +809,7 @@
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* Fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_REGION2:
 		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
 		if (level == 3)
@@ -816,7 +817,7 @@
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* Fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_REGION3:
 		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
 		if (level == 2)
@@ -824,7 +825,7 @@
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
 		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
-		/* Fallthrough */
+		fallthrough;
 	case _ASCE_TYPE_SEGMENT:
 		table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 		if (level == 1)
@@ -879,10 +880,10 @@
 
 	BUG_ON(gmap_is_shadow(gmap));
 	fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
-	if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
+	if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
 		return -EFAULT;
 	if (unlocked)
-		/* lost mmap_sem, caller has to retry __gmap_translate */
+		/* lost mmap_lock, caller has to retry __gmap_translate */
 		return 0;
 	/* Connect the page tables */
 	return __gmap_link(gmap, gaddr, vmaddr);
@@ -953,7 +954,7 @@
  * -EAGAIN if a fixup is needed
  * -EINVAL if unsupported notifier bits have been specified
  *
- * Expected to be called with sg->mm->mmap_sem in read and
+ * Expected to be called with sg->mm->mmap_lock in read and
  * guest_table_lock held.
  */
 static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
@@ -999,7 +1000,7 @@
  * Returns 0 if successfully protected, -ENOMEM if out of memory and
  * -EAGAIN if a fixup is needed.
  *
- * Expected to be called with sg->mm->mmap_sem in read
+ * Expected to be called with sg->mm->mmap_lock in read
  */
 static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
 			    pmd_t *pmdp, int prot, unsigned long bits)
@@ -1035,7 +1036,7 @@
  * Returns 0 if successfully protected, -ENOMEM if out of memory and
  * -EFAULT if gaddr is invalid (or mapping for shadows is missing).
  *
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
  */
 static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
 			      unsigned long len, int prot, unsigned long bits)
@@ -1106,9 +1107,9 @@
 		return -EINVAL;
 	if (!MACHINE_HAS_ESOP && prot == PROT_READ)
 		return -EINVAL;
-	down_read(&gmap->mm->mmap_sem);
+	mmap_read_lock(gmap->mm);
 	rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
-	up_read(&gmap->mm->mmap_sem);
+	mmap_read_unlock(gmap->mm);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(gmap_mprotect_notify);
@@ -1124,7 +1125,7 @@
  * if reading using the virtual address failed. -EINVAL if called on a gmap
  * shadow.
  *
- * Called with gmap->mm->mmap_sem in read.
+ * Called with gmap->mm->mmap_lock in read.
  */
 int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
 {
@@ -1696,11 +1697,11 @@
 	}
 	spin_unlock(&parent->shadow_lock);
 	/* protect after insertion, so it will get properly invalidated */
-	down_read(&parent->mm->mmap_sem);
+	mmap_read_lock(parent->mm);
 	rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
 				((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
 				PROT_READ, GMAP_NOTIFY_SHADOW);
-	up_read(&parent->mm->mmap_sem);
+	mmap_read_unlock(parent->mm);
 	spin_lock(&parent->shadow_lock);
 	new->initialized = true;
 	if (rc) {
@@ -1729,7 +1730,7 @@
  * shadow table structure is incomplete, -ENOMEM if out of memory and
  * -EFAULT if an address in the parent gmap could not be resolved.
  *
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
  */
 int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 		    int fake)
@@ -1813,7 +1814,7 @@
  * shadow table structure is incomplete, -ENOMEM if out of memory and
  * -EFAULT if an address in the parent gmap could not be resolved.
  *
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
  */
 int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 		    int fake)
@@ -1897,7 +1898,7 @@
  * shadow table structure is incomplete, -ENOMEM if out of memory and
  * -EFAULT if an address in the parent gmap could not be resolved.
  *
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
  */
 int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 		    int fake)
@@ -1981,7 +1982,7 @@
  * Returns 0 if the shadow page table was found and -EAGAIN if the page
  * table was not found.
  *
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
  */
 int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
 			   unsigned long *pgt, int *dat_protection,
@@ -2021,7 +2022,7 @@
  * shadow table structure is incomplete, -ENOMEM if out of memory,
  * -EFAULT if an address in the parent gmap could not be resolved and
  *
- * Called with gmap->mm->mmap_sem in read
+ * Called with gmap->mm->mmap_lock in read
  */
 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 		    int fake)
@@ -2100,7 +2101,7 @@
  * shadow table structure is incomplete, -ENOMEM if out of memory and
  * -EFAULT if an address in the parent gmap could not be resolved.
  *
- * Called with sg->mm->mmap_sem in read.
+ * Called with sg->mm->mmap_lock in read.
  */
 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
 {
@@ -2556,16 +2557,33 @@
 	/* Fail if the page tables are 2K */
 	if (!mm_alloc_pgste(mm))
 		return -EINVAL;
-	down_write(&mm->mmap_sem);
+	mmap_write_lock(mm);
 	mm->context.has_pgste = 1;
 	/* split thp mappings and disable thp for future mappings */
 	thp_split_mm(mm);
 	walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
-	up_write(&mm->mmap_sem);
+	mmap_write_unlock(mm);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
 
+int gmap_mark_unmergeable(void)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int ret;
+
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+				  MADV_UNMERGEABLE, &vma->vm_flags);
+		if (ret)
+			return ret;
+	}
+	mm->def_flags &= ~VM_MERGEABLE;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
+
 /*
  * Enable storage key handling from now on and initialize the storage
  * keys with the default key.
@@ -2611,28 +2629,22 @@
 int s390_enable_skey(void)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
 	int rc = 0;
 
-	down_write(&mm->mmap_sem);
+	mmap_write_lock(mm);
 	if (mm_uses_skeys(mm))
 		goto out_up;
 
 	mm->context.uses_skeys = 1;
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
-				MADV_UNMERGEABLE, &vma->vm_flags)) {
-			mm->context.uses_skeys = 0;
-			rc = -ENOMEM;
-			goto out_up;
-		}
+	rc = gmap_mark_unmergeable();
+	if (rc) {
+		mm->context.uses_skeys = 0;
+		goto out_up;
 	}
-	mm->def_flags &= ~VM_MERGEABLE;
-
 	walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
 
 out_up:
-	up_write(&mm->mmap_sem);
+	mmap_write_unlock(mm);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(s390_enable_skey);
@@ -2653,8 +2665,45 @@
 
 void s390_reset_cmma(struct mm_struct *mm)
 {
-	down_write(&mm->mmap_sem);
+	mmap_write_lock(mm);
 	walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
-	up_write(&mm->mmap_sem);
+	mmap_write_unlock(mm);
 }
 EXPORT_SYMBOL_GPL(s390_reset_cmma);
+
+/*
+ * make inaccessible pages accessible again
+ */
+static int __s390_reset_acc(pte_t *ptep, unsigned long addr,
+			    unsigned long next, struct mm_walk *walk)
+{
+	pte_t pte = READ_ONCE(*ptep);
+
+	if (pte_present(pte))
+		WARN_ON_ONCE(uv_destroy_page(pte_val(pte) & PAGE_MASK));
+	return 0;
+}
+
+static const struct mm_walk_ops reset_acc_walk_ops = {
+	.pte_entry		= __s390_reset_acc,
+};
+
+#include <linux/sched/mm.h>
+void s390_reset_acc(struct mm_struct *mm)
+{
+	if (!mm_is_protected(mm))
+		return;
+	/*
+	 * we might be called during
+	 * reset:                             we walk the pages and clear
+	 * close of all kvm file descriptors: we walk the pages and clear
+	 * exit of process on fd closure:     vma already gone, do nothing
+	 */
+	if (!mmget_not_zero(mm))
+		return;
+	mmap_read_lock(mm);
+	walk_page_range(mm, 0, TASK_SIZE, &reset_acc_walk_ops, NULL);
+	mmap_read_unlock(mm);
+	mmput(mm);
+}
+EXPORT_SYMBOL_GPL(s390_reset_acc);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index ff8234b..3b5a4d2 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -254,25 +254,15 @@
 	return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
 }
 
-static __init int setup_hugepagesz(char *opt)
+bool __init arch_hugetlb_valid_size(unsigned long size)
 {
-	unsigned long size;
-	char *string = opt;
-
-	size = memparse(opt, &opt);
-	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
-		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
-	} else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
-		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else {
-		hugetlb_bad_size();
-		pr_err("hugepagesz= specifies an unsupported page size %s\n",
-			string);
-		return 0;
-	}
-	return 1;
+	if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)
+		return true;
+	else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE)
+		return true;
+	else
+		return false;
 }
-__setup("hugepagesz=", setup_hugepagesz);
 
 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 		unsigned long addr, unsigned long len,
@@ -329,7 +319,6 @@
 	struct hstate *h = hstate_file(file);
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	int rc;
 
 	if (len & ~huge_page_mask(h))
 		return -EINVAL;
@@ -356,15 +345,9 @@
 	else
 		addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
 				pgoff, flags);
-	if (addr & ~PAGE_MASK)
+	if (offset_in_page(addr))
 		return addr;
 
 check_asce_limit:
-	if (addr + len > current->mm->context.asce_limit &&
-	    addr + len <= TASK_SIZE) {
-		rc = crst_table_upgrade(mm, addr + len);
-		if (rc)
-			return (unsigned long) rc;
-	}
-	return addr;
+	return check_asce_limit(mm, addr, len);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 5521f59..9d5960b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -33,8 +33,8 @@
 #include <linux/dma-direct.h>
 #include <asm/processor.h>
 #include <linux/uaccess.h>
-#include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/ptdump.h>
 #include <asm/dma.h>
 #include <asm/lowcore.h>
 #include <asm/tlb.h>
@@ -46,8 +46,9 @@
 #include <asm/kasan.h>
 #include <asm/dma-mapping.h>
 #include <asm/uv.h>
+#include <linux/virtio_config.h>
 
-pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
 
 unsigned long empty_zero_page, zero_page_mask;
 EXPORT_SYMBOL(empty_zero_page);
@@ -116,12 +117,12 @@
 	__load_psw_mask(psw.mask);
 	kasan_free_early_identity();
 
-	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+	zone_dma_bits = 31;
 	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
 	max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
-	free_area_init_nodes(max_zone_pfns);
+	free_area_init(max_zone_pfns);
 }
 
 void mark_rodata_ro(void)
@@ -130,6 +131,7 @@
 
 	set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
 	pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
+	debug_checkwx();
 }
 
 int set_memory_encrypted(unsigned long addr, int numpages)
@@ -161,6 +163,16 @@
 	return is_prot_virt_guest();
 }
 
+#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
+
+int arch_has_restricted_virtio_memory_access(void)
+{
+	return is_prot_virt_guest();
+}
+EXPORT_SYMBOL(arch_has_restricted_virtio_memory_access);
+
+#endif
+
 /* protected virtualization */
 static void pv_init(void)
 {
@@ -267,20 +279,23 @@
 #endif /* CONFIG_CMA */
 
 int arch_add_memory(int nid, u64 start, u64 size,
-		struct mhp_restrictions *restrictions)
+		    struct mhp_params *params)
 {
 	unsigned long start_pfn = PFN_DOWN(start);
 	unsigned long size_pages = PFN_DOWN(size);
 	int rc;
 
-	if (WARN_ON_ONCE(restrictions->altmap))
+	if (WARN_ON_ONCE(params->altmap))
+		return -EINVAL;
+
+	if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
 		return -EINVAL;
 
 	rc = vmem_add_mapping(start, size);
 	if (rc)
 		return rc;
 
-	rc = __add_pages(nid, start_pfn, size_pages, restrictions);
+	rc = __add_pages(nid, start_pfn, size_pages, params);
 	if (rc)
 		vmem_remove_mapping(start, size);
 	return rc;
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 5182e08..e9a9b7b 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -2,8 +2,8 @@
 #include <linux/kasan.h>
 #include <linux/sched/task.h>
 #include <linux/memblock.h>
+#include <linux/pgtable.h>
 #include <asm/pgalloc.h>
-#include <asm/pgtable.h>
 #include <asm/kasan.h>
 #include <asm/mem_detect.h>
 #include <asm/processor.h>
@@ -11,7 +11,9 @@
 #include <asm/facility.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/uv.h>
 
+unsigned long kasan_vmax;
 static unsigned long segment_pos __initdata;
 static unsigned long segment_low __initdata;
 static unsigned long pgalloc_pos __initdata;
@@ -82,7 +84,8 @@
 enum populate_mode {
 	POPULATE_ONE2ONE,
 	POPULATE_MAP,
-	POPULATE_ZERO_SHADOW
+	POPULATE_ZERO_SHADOW,
+	POPULATE_SHALLOW
 };
 static void __init kasan_early_vmemmap_populate(unsigned long address,
 						unsigned long end,
@@ -98,8 +101,12 @@
 	pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
 	if (!has_nx)
 		pgt_prot_zero &= ~_PAGE_NOEXEC;
-	pgt_prot = pgprot_val(PAGE_KERNEL_EXEC);
-	sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC);
+	pgt_prot = pgprot_val(PAGE_KERNEL);
+	sgt_prot = pgprot_val(SEGMENT_KERNEL);
+	if (!has_nx || mode == POPULATE_ONE2ONE) {
+		pgt_prot &= ~_PAGE_NOEXEC;
+		sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
+	}
 
 	/*
 	 * The first 1MB of 1:1 mapping is mapped with 4KB pages
@@ -119,6 +126,12 @@
 			pgd_populate(&init_mm, pg_dir, p4_dir);
 		}
 
+		if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+		    mode == POPULATE_SHALLOW) {
+			address = (address + P4D_SIZE) & P4D_MASK;
+			continue;
+		}
+
 		p4_dir = p4d_offset(pg_dir, address);
 		if (p4d_none(*p4_dir)) {
 			if (mode == POPULATE_ZERO_SHADOW &&
@@ -133,6 +146,12 @@
 			p4d_populate(&init_mm, p4_dir, pu_dir);
 		}
 
+		if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+		    mode == POPULATE_SHALLOW) {
+			address = (address + PUD_SIZE) & PUD_MASK;
+			continue;
+		}
+
 		pu_dir = pud_offset(p4_dir, address);
 		if (pud_none(*pu_dir)) {
 			if (mode == POPULATE_ZERO_SHADOW &&
@@ -194,6 +213,9 @@
 				page = kasan_early_shadow_page;
 				pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
 				break;
+			case POPULATE_SHALLOW:
+				/* should never happen */
+				break;
 			}
 		}
 		address += PAGE_SIZE;
@@ -235,14 +257,31 @@
 	}
 }
 
+static bool __init has_uv_sec_stor_limit(void)
+{
+	/*
+	 * keep these conditions in line with setup_uv()
+	 */
+	if (!is_prot_virt_host())
+		return false;
+
+	if (is_prot_virt_guest())
+		return false;
+
+	if (!test_facility(158))
+		return false;
+
+	return !!uv_info.max_sec_stor_addr;
+}
+
 void __init kasan_early_init(void)
 {
 	unsigned long untracked_mem_end;
 	unsigned long shadow_alloc_size;
+	unsigned long vmax_unlimited;
 	unsigned long initrd_end;
 	unsigned long asce_type;
 	unsigned long memsize;
-	unsigned long vmax;
 	unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
 	pte_t pte_z;
 	pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
@@ -270,7 +309,9 @@
 		BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
 		crst_table_init((unsigned long *)early_pg_dir,
 				_REGION2_ENTRY_EMPTY);
-		untracked_mem_end = vmax = _REGION1_SIZE;
+		untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION1_SIZE;
+		if (has_uv_sec_stor_limit())
+			kasan_vmax = min(vmax_unlimited, uv_info.max_sec_stor_addr);
 		asce_type = _ASCE_TYPE_REGION2;
 	} else {
 		/* 3 level paging */
@@ -278,7 +319,7 @@
 		BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
 		crst_table_init((unsigned long *)early_pg_dir,
 				_REGION3_ENTRY_EMPTY);
-		untracked_mem_end = vmax = _REGION2_SIZE;
+		untracked_mem_end = kasan_vmax = vmax_unlimited = _REGION2_SIZE;
 		asce_type = _ASCE_TYPE_REGION3;
 	}
 
@@ -312,25 +353,56 @@
 	init_mm.pgd = early_pg_dir;
 	/*
 	 * Current memory layout:
-	 * +- 0 -------------+	 +- shadow start -+
-	 * | 1:1 ram mapping |	/| 1/8 ram	  |
-	 * +- end of ram ----+ / +----------------+
-	 * | ... gap ...     |/  |	kasan	  |
-	 * +- shadow start --+	 |	zero	  |
-	 * | 1/8 addr space  |	 |	page	  |
-	 * +- shadow end    -+	 |	mapping	  |
-	 * | ... gap ...     |\  |    (untracked) |
-	 * +- modules vaddr -+ \ +----------------+
-	 * | 2Gb	     |	\|	unmapped  | allocated per module
-	 * +-----------------+	 +- shadow end ---+
+	 * +- 0 -------------+	   +- shadow start -+
+	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
+	 * |		     |	 / |		    |
+	 * +- end of ram ----+	/  +----------------+
+	 * | ... gap ...     | /   |		    |
+	 * |		     |/    |	kasan	    |
+	 * +- shadow start --+	   |	zero	    |
+	 * | 1/8 addr space  |	   |	page	    |
+	 * +- shadow end    -+	   |	mapping	    |
+	 * | ... gap ...     |\    |  (untracked)   |
+	 * +- vmalloc area  -+ \   |		    |
+	 * | vmalloc_size    |	\  |		    |
+	 * +- modules vaddr -+	 \ +----------------+
+	 * | 2Gb	     |	  \|	  unmapped  | allocated per module
+	 * +-----------------+	   +- shadow end ---+
+	 *
+	 * Current memory layout (KASAN_VMALLOC):
+	 * +- 0 -------------+	   +- shadow start -+
+	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
+	 * |		     |	 / |		    |
+	 * +- end of ram ----+	/  +----------------+
+	 * | ... gap ...     | /   |	kasan	    |
+	 * |		     |/    |	zero	    |
+	 * +- shadow start --+	   |	page	    |
+	 * | 1/8 addr space  |	   |	mapping     |
+	 * +- shadow end    -+	   |  (untracked)   |
+	 * | ... gap ...     |\    |		    |
+	 * +- vmalloc area  -+ \   +- vmalloc area -+
+	 * | vmalloc_size    |	\  |shallow populate|
+	 * +- modules vaddr -+	 \ +- modules area -+
+	 * | 2Gb	     |	  \|shallow populate|
+	 * +-----------------+	   +- shadow end ---+
 	 */
 	/* populate kasan shadow (for identity mapping and zero page mapping) */
 	kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
 	if (IS_ENABLED(CONFIG_MODULES))
-		untracked_mem_end = vmax - MODULES_LEN;
+		untracked_mem_end = kasan_vmax - MODULES_LEN;
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		untracked_mem_end = kasan_vmax - vmalloc_size - MODULES_LEN;
+		/* shallowly populate kasan shadow for vmalloc and modules */
+		kasan_early_vmemmap_populate(__sha(untracked_mem_end),
+					     __sha(kasan_vmax), POPULATE_SHALLOW);
+	}
+	/* populate kasan shadow for untracked memory */
 	kasan_early_vmemmap_populate(__sha(max_physmem_end),
 				     __sha(untracked_mem_end),
 				     POPULATE_ZERO_SHADOW);
+	kasan_early_vmemmap_populate(__sha(kasan_vmax),
+				     __sha(vmax_unlimited),
+				     POPULATE_ZERO_SHADOW);
 	/* memory allocated for identity mapping structs will be freed later */
 	pgalloc_freeable = pgalloc_pos;
 	/* populate identity mapping */
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index cbc718b..e54f928 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -17,7 +17,6 @@
 #include <linux/random.h>
 #include <linux/compat.h>
 #include <linux/security.h>
-#include <asm/pgalloc.h>
 #include <asm/elf.h>
 
 static unsigned long stack_maxrandom_size(void)
@@ -72,14 +71,13 @@
 	return PAGE_ALIGN(STACK_TOP - gap - rnd);
 }
 
-unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
-		unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
+				     unsigned long len, unsigned long pgoff,
+				     unsigned long flags)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	struct vm_unmapped_area_info info;
-	int rc;
 
 	if (len > TASK_SIZE - mmap_min_addr)
 		return -ENOMEM;
@@ -105,30 +103,20 @@
 		info.align_mask = 0;
 	info.align_offset = pgoff << PAGE_SHIFT;
 	addr = vm_unmapped_area(&info);
-	if (addr & ~PAGE_MASK)
+	if (offset_in_page(addr))
 		return addr;
 
 check_asce_limit:
-	if (addr + len > current->mm->context.asce_limit &&
-	    addr + len <= TASK_SIZE) {
-		rc = crst_table_upgrade(mm, addr + len);
-		if (rc)
-			return (unsigned long) rc;
-	}
-
-	return addr;
+	return check_asce_limit(mm, addr, len);
 }
 
-unsigned long
-arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
-			  const unsigned long len, const unsigned long pgoff,
-			  const unsigned long flags)
+unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+					     unsigned long len, unsigned long pgoff,
+					     unsigned long flags)
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
-	unsigned long addr = addr0;
 	struct vm_unmapped_area_info info;
-	int rc;
 
 	/* requested length too big for entire address space */
 	if (len > TASK_SIZE - mmap_min_addr)
@@ -163,25 +151,18 @@
 	 * can happen with large stack limits and large mmap()
 	 * allocations.
 	 */
-	if (addr & ~PAGE_MASK) {
+	if (offset_in_page(addr)) {
 		VM_BUG_ON(addr != -ENOMEM);
 		info.flags = 0;
 		info.low_limit = TASK_UNMAPPED_BASE;
 		info.high_limit = TASK_SIZE;
 		addr = vm_unmapped_area(&info);
-		if (addr & ~PAGE_MASK)
+		if (offset_in_page(addr))
 			return addr;
 	}
 
 check_asce_limit:
-	if (addr + len > current->mm->context.asce_limit &&
-	    addr + len <= TASK_SIZE) {
-		rc = crst_table_upgrade(mm, addr + len);
-		if (rc)
-			return (unsigned long) rc;
-	}
-
-	return addr;
+	return check_asce_limit(mm, addr, len);
 }
 
 /*
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index fc14189..567c69f 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -183,9 +183,9 @@
 
 void __init cmma_init_nodat(void)
 {
-	struct memblock_region *reg;
 	struct page *page;
 	unsigned long start, end, ix;
+	int i;
 
 	if (cmma_flag < 2)
 		return;
@@ -193,9 +193,7 @@
 	mark_kernel_pgd();
 
 	/* Set all kernel pages not used for page tables to stable/no-dat */
-	for_each_memblock(memory, reg) {
-		start = memblock_region_memory_base_pfn(reg);
-		end = memblock_region_memory_end_pfn(reg);
+	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
 		page = pfn_to_page(start);
 		for (ix = start; ix < end; ix++, page++) {
 			if (__test_and_clear_bit(PG_arch_1, &page->flags))
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index f8c6faa..ed8e5b3 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -7,7 +7,6 @@
 #include <linux/mm.h>
 #include <asm/cacheflush.h>
 #include <asm/facility.h>
-#include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/page.h>
 #include <asm/set_memory.h>
@@ -86,7 +85,7 @@
 {
 	pte_t *ptep, new;
 
-	ptep = pte_offset(pmdp, addr);
+	ptep = pte_offset_kernel(pmdp, addr);
 	do {
 		new = *ptep;
 		if (pte_none(new))
@@ -279,7 +278,7 @@
 	return rc;
 }
 
-static DEFINE_MUTEX(cpa_mutex);
+DEFINE_MUTEX(cpa_mutex);
 
 static int change_page_attr(unsigned long addr, unsigned long end,
 			    unsigned long flags)
@@ -338,19 +337,11 @@
 {
 	unsigned long address;
 	int nr, i, j;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
 	pte_t *pte;
 
 	for (i = 0; i < numpages;) {
 		address = page_to_phys(page + i);
-		pgd = pgd_offset_k(address);
-		p4d = p4d_offset(pgd, address);
-		pud = pud_offset(p4d, address);
-		pmd = pmd_offset(pud, address);
-		pte = pte_offset_kernel(pmd, address);
+		pte = virt_to_kpte(address);
 		nr = (unsigned long)pte >> ilog2(sizeof(long));
 		nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
 		nr = min(numpages - i, nr);
@@ -367,20 +358,4 @@
 	}
 }
 
-#ifdef CONFIG_HIBERNATION
-bool kernel_page_present(struct page *page)
-{
-	unsigned long addr;
-	int cc;
-
-	addr = page_to_phys(page);
-	asm volatile(
-		"	lra	%1,0(%1)\n"
-		"	ipm	%0\n"
-		"	srl	%0,28"
-		: "=d" (cc), "+a" (addr) : : "cc");
-	return cc == 0;
-}
-#endif /* CONFIG_HIBERNATION */
-
 #endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 46071be..6d99b1b 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -89,67 +89,65 @@
 
 int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 {
-	unsigned long *table, *pgd;
-	int rc, notify;
+	unsigned long *pgd = NULL, *p4d = NULL, *__pgd;
+	unsigned long asce_limit = mm->context.asce_limit;
 
 	/* upgrade should only happen from 3 to 4, 3 to 5, or 4 to 5 levels */
-	VM_BUG_ON(mm->context.asce_limit < _REGION2_SIZE);
-	rc = 0;
-	notify = 0;
-	while (mm->context.asce_limit < end) {
-		table = crst_table_alloc(mm);
-		if (!table) {
-			rc = -ENOMEM;
-			break;
-		}
-		spin_lock_bh(&mm->page_table_lock);
-		pgd = (unsigned long *) mm->pgd;
-		if (mm->context.asce_limit == _REGION2_SIZE) {
-			crst_table_init(table, _REGION2_ENTRY_EMPTY);
-			p4d_populate(mm, (p4d_t *) table, (pud_t *) pgd);
-			mm->pgd = (pgd_t *) table;
-			mm->context.asce_limit = _REGION1_SIZE;
-			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
-			mm_inc_nr_puds(mm);
-		} else {
-			crst_table_init(table, _REGION1_ENTRY_EMPTY);
-			pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
-			mm->pgd = (pgd_t *) table;
-			mm->context.asce_limit = -PAGE_SIZE;
-			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-				_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
-		}
-		notify = 1;
-		spin_unlock_bh(&mm->page_table_lock);
+	VM_BUG_ON(asce_limit < _REGION2_SIZE);
+
+	if (end <= asce_limit)
+		return 0;
+
+	if (asce_limit == _REGION2_SIZE) {
+		p4d = crst_table_alloc(mm);
+		if (unlikely(!p4d))
+			goto err_p4d;
+		crst_table_init(p4d, _REGION2_ENTRY_EMPTY);
 	}
-	if (notify)
-		on_each_cpu(__crst_table_upgrade, mm, 0);
-	return rc;
-}
-
-void crst_table_downgrade(struct mm_struct *mm)
-{
-	pgd_t *pgd;
-
-	/* downgrade should only happen from 3 to 2 levels (compat only) */
-	VM_BUG_ON(mm->context.asce_limit != _REGION2_SIZE);
-
-	if (current->active_mm == mm) {
-		clear_user_asce();
-		__tlb_flush_mm(mm);
+	if (end > _REGION1_SIZE) {
+		pgd = crst_table_alloc(mm);
+		if (unlikely(!pgd))
+			goto err_pgd;
+		crst_table_init(pgd, _REGION1_ENTRY_EMPTY);
 	}
 
-	pgd = mm->pgd;
-	mm_dec_nr_pmds(mm);
-	mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
-	mm->context.asce_limit = _REGION3_SIZE;
-	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-			   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-	crst_table_free(mm, (unsigned long *) pgd);
+	spin_lock_bh(&mm->page_table_lock);
 
-	if (current->active_mm == mm)
-		set_user_asce(mm);
+	/*
+	 * This routine gets called with mmap_lock lock held and there is
+	 * no reason to optimize for the case of otherwise. However, if
+	 * that would ever change, the below check will let us know.
+	 */
+	VM_BUG_ON(asce_limit != mm->context.asce_limit);
+
+	if (p4d) {
+		__pgd = (unsigned long *) mm->pgd;
+		p4d_populate(mm, (p4d_t *) p4d, (pud_t *) __pgd);
+		mm->pgd = (pgd_t *) p4d;
+		mm->context.asce_limit = _REGION1_SIZE;
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+		mm_inc_nr_puds(mm);
+	}
+	if (pgd) {
+		__pgd = (unsigned long *) mm->pgd;
+		pgd_populate(mm, (pgd_t *) pgd, (p4d_t *) __pgd);
+		mm->pgd = (pgd_t *) pgd;
+		mm->context.asce_limit = TASK_SIZE_MAX;
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			_ASCE_USER_BITS | _ASCE_TYPE_REGION1;
+	}
+
+	spin_unlock_bh(&mm->page_table_lock);
+
+	on_each_cpu(__crst_table_upgrade, mm, 0);
+
+	return 0;
+
+err_pgd:
+	crst_table_free(mm, p4d);
+err_p4d:
+	return -ENOMEM;
 }
 
 static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
@@ -255,13 +253,15 @@
 		/* Free 2K page table fragment of a 4K page */
 		bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
 		spin_lock_bh(&mm->context.lock);
-		mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
+		mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
 		mask >>= 24;
 		if (mask & 3)
 			list_add(&page->lru, &mm->context.pgtable_list);
 		else
 			list_del(&page->lru);
 		spin_unlock_bh(&mm->context.lock);
+		mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24));
+		mask >>= 24;
 		if (mask != 0)
 			return;
 	} else {
@@ -316,7 +316,7 @@
 		mask >>= 24;
 		if (mask != 0)
 			break;
-		/* fallthrough */
+		fallthrough;
 	case 3:		/* 4K page table with pgstes */
 		if (mask & 3)
 			atomic_xor_bits(&page->_refcount, 3 << 24);
@@ -541,7 +541,7 @@
 		base_region2_walk(table, 0, _REGION1_SIZE, 0);
 		break;
 	case _ASCE_TYPE_REGION1:
-		base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+		base_region1_walk(table, 0, TASK_SIZE_MAX, 0);
 		break;
 	}
 	base_crst_free(table);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 9ebd012..fabaedd 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -19,13 +19,31 @@
 #include <linux/ksm.h>
 #include <linux/mman.h>
 
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/page-states.h>
 
+pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	/*
+	 * mio_wb_bit_mask may be set on a different CPU, but it is only set
+	 * once at init and only read afterwards.
+	 */
+	return __pgprot(pgprot_val(prot) | mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writecombine);
+
+pgprot_t pgprot_writethrough(pgprot_t prot)
+{
+	/*
+	 * mio_wb_bit_mask may be set on a different CPU, but it is only set
+	 * once at init and only read afterwards.
+	 */
+	return __pgprot(pgprot_val(prot) & ~mio_wb_bit_mask);
+}
+EXPORT_SYMBOL_GPL(pgprot_writethrough);
+
 static inline void ptep_ipte_local(struct mm_struct *mm, unsigned long addr,
 				   pte_t *ptep, int nodat)
 {
@@ -970,6 +988,7 @@
 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 			unsigned long *oldpte, unsigned long *oldpgste)
 {
+	struct vm_area_struct *vma;
 	unsigned long pgstev;
 	spinlock_t *ptl;
 	pgste_t pgste;
@@ -979,6 +998,10 @@
 	WARN_ON_ONCE(orc > ESSA_MAX);
 	if (unlikely(orc > ESSA_MAX))
 		return -EINVAL;
+
+	vma = find_vma(mm, hva);
+	if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+		return -EFAULT;
 	ptep = get_locked_pte(mm, hva, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;
@@ -1071,10 +1094,14 @@
 int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
 			unsigned long bits, unsigned long value)
 {
+	struct vm_area_struct *vma;
 	spinlock_t *ptl;
 	pgste_t new;
 	pte_t *ptep;
 
+	vma = find_vma(mm, hva);
+	if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+		return -EFAULT;
 	ptep = get_locked_pte(mm, hva, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;
@@ -1099,9 +1126,13 @@
  */
 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
 {
+	struct vm_area_struct *vma;
 	spinlock_t *ptl;
 	pte_t *ptep;
 
+	vma = find_vma(mm, hva);
+	if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma))
+		return -EFAULT;
 	ptep = get_locked_pte(mm, hva, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index f810930..b239f2b 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -13,7 +13,6 @@
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
-#include <asm/pgtable.h>
 #include <asm/setup.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
@@ -21,14 +20,6 @@
 
 static DEFINE_MUTEX(vmem_mutex);
 
-struct memory_segment {
-	struct list_head list;
-	unsigned long start;
-	unsigned long size;
-};
-
-static LIST_HEAD(mem_segs);
-
 static void __ref *vmem_alloc_pages(unsigned int order)
 {
 	unsigned long size = PAGE_SIZE << order;
@@ -38,6 +29,15 @@
 	return (void *) memblock_phys_alloc(size, size);
 }
 
+static void vmem_free_pages(unsigned long addr, int order)
+{
+	/* We don't expect boot memory to be removed ever. */
+	if (!slab_is_available() ||
+	    WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+		return;
+	free_pages(addr, order);
+}
+
 void *vmem_crst_alloc(unsigned long val)
 {
 	unsigned long *table;
@@ -63,332 +63,487 @@
 	return pte;
 }
 
-/*
- * Add a physical memory range to the 1:1 mapping.
- */
-static int vmem_add_mem(unsigned long start, unsigned long size)
+static void vmem_pte_free(unsigned long *table)
 {
-	unsigned long pgt_prot, sgt_prot, r3_prot;
-	unsigned long pages4k, pages1m, pages2g;
-	unsigned long end = start + size;
-	unsigned long address = start;
-	pgd_t *pg_dir;
-	p4d_t *p4_dir;
-	pud_t *pu_dir;
-	pmd_t *pm_dir;
-	pte_t *pt_dir;
-	int ret = -ENOMEM;
+	/* We don't expect boot memory to be removed ever. */
+	if (!slab_is_available() ||
+	    WARN_ON_ONCE(PageReserved(virt_to_page(table))))
+		return;
+	page_table_free(&init_mm, table);
+}
 
-	pgt_prot = pgprot_val(PAGE_KERNEL);
-	sgt_prot = pgprot_val(SEGMENT_KERNEL);
-	r3_prot = pgprot_val(REGION3_KERNEL);
-	if (!MACHINE_HAS_NX) {
-		pgt_prot &= ~_PAGE_NOEXEC;
-		sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
-		r3_prot &= ~_REGION_ENTRY_NOEXEC;
+#define PAGE_UNUSED 0xFD
+
+/*
+ * The unused vmemmap range, which was not yet memset(PAGE_UNUSED) ranges
+ * from unused_pmd_start to next PMD_SIZE boundary.
+ */
+static unsigned long unused_pmd_start;
+
+static void vmemmap_flush_unused_pmd(void)
+{
+	if (!unused_pmd_start)
+		return;
+	memset(__va(unused_pmd_start), PAGE_UNUSED,
+	       ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
+	unused_pmd_start = 0;
+}
+
+static void __vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+	/*
+	 * As we expect to add in the same granularity as we remove, it's
+	 * sufficient to mark only some piece used to block the memmap page from
+	 * getting removed (just in case the memmap never gets initialized,
+	 * e.g., because the memory block never gets onlined).
+	 */
+	memset(__va(start), 0, sizeof(struct page));
+}
+
+static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
+{
+	/*
+	 * We only optimize if the new used range directly follows the
+	 * previously unused range (esp., when populating consecutive sections).
+	 */
+	if (unused_pmd_start == start) {
+		unused_pmd_start = end;
+		if (likely(IS_ALIGNED(unused_pmd_start, PMD_SIZE)))
+			unused_pmd_start = 0;
+		return;
 	}
-	pages4k = pages1m = pages2g = 0;
-	while (address < end) {
-		pg_dir = pgd_offset_k(address);
-		if (pgd_none(*pg_dir)) {
-			p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
-			if (!p4_dir)
-				goto out;
-			pgd_populate(&init_mm, pg_dir, p4_dir);
-		}
-		p4_dir = p4d_offset(pg_dir, address);
-		if (p4d_none(*p4_dir)) {
-			pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
-			if (!pu_dir)
-				goto out;
-			p4d_populate(&init_mm, p4_dir, pu_dir);
-		}
-		pu_dir = pud_offset(p4_dir, address);
-		if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
-		    !(address & ~PUD_MASK) && (address + PUD_SIZE <= end) &&
-		     !debug_pagealloc_enabled()) {
-			pud_val(*pu_dir) = address | r3_prot;
-			address += PUD_SIZE;
-			pages2g++;
-			continue;
-		}
-		if (pud_none(*pu_dir)) {
-			pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
-			if (!pm_dir)
-				goto out;
-			pud_populate(&init_mm, pu_dir, pm_dir);
-		}
-		pm_dir = pmd_offset(pu_dir, address);
-		if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
-		    !(address & ~PMD_MASK) && (address + PMD_SIZE <= end) &&
-		    !debug_pagealloc_enabled()) {
-			pmd_val(*pm_dir) = address | sgt_prot;
-			address += PMD_SIZE;
-			pages1m++;
-			continue;
-		}
-		if (pmd_none(*pm_dir)) {
-			pt_dir = vmem_pte_alloc();
-			if (!pt_dir)
-				goto out;
-			pmd_populate(&init_mm, pm_dir, pt_dir);
-		}
+	vmemmap_flush_unused_pmd();
+	__vmemmap_use_sub_pmd(start, end);
+}
 
-		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte_val(*pt_dir) = address | pgt_prot;
-		address += PAGE_SIZE;
-		pages4k++;
+static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
+{
+	void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+	vmemmap_flush_unused_pmd();
+
+	/* Could be our memmap page is filled with PAGE_UNUSED already ... */
+	__vmemmap_use_sub_pmd(start, end);
+
+	/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
+	if (!IS_ALIGNED(start, PMD_SIZE))
+		memset(page, PAGE_UNUSED, start - __pa(page));
+	/*
+	 * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
+	 * consecutive sections. Remember for the last added PMD the last
+	 * unused range in the populated PMD.
+	 */
+	if (!IS_ALIGNED(end, PMD_SIZE))
+		unused_pmd_start = end;
+}
+
+/* Returns true if the PMD is completely unused and can be freed. */
+static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
+{
+	void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+
+	vmemmap_flush_unused_pmd();
+	memset(__va(start), PAGE_UNUSED, end - start);
+	return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
+				  unsigned long end, bool add, bool direct)
+{
+	unsigned long prot, pages = 0;
+	int ret = -ENOMEM;
+	pte_t *pte;
+
+	prot = pgprot_val(PAGE_KERNEL);
+	if (!MACHINE_HAS_NX)
+		prot &= ~_PAGE_NOEXEC;
+
+	pte = pte_offset_kernel(pmd, addr);
+	for (; addr < end; addr += PAGE_SIZE, pte++) {
+		if (!add) {
+			if (pte_none(*pte))
+				continue;
+			if (!direct)
+				vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
+			pte_clear(&init_mm, addr, pte);
+		} else if (pte_none(*pte)) {
+			if (!direct) {
+				void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+
+				if (!new_page)
+					goto out;
+				pte_val(*pte) = __pa(new_page) | prot;
+			} else {
+				pte_val(*pte) = addr | prot;
+			}
+		} else {
+			continue;
+		}
+		pages++;
 	}
 	ret = 0;
 out:
-	update_page_count(PG_DIRECT_MAP_4K, pages4k);
-	update_page_count(PG_DIRECT_MAP_1M, pages1m);
-	update_page_count(PG_DIRECT_MAP_2G, pages2g);
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_4K, add ? pages : -pages);
 	return ret;
 }
 
+static void try_free_pte_table(pmd_t *pmd, unsigned long start)
+{
+	pte_t *pte;
+	int i;
+
+	/* We can safely assume this is fully in 1:1 mapping & vmemmap area */
+	pte = pte_offset_kernel(pmd, start);
+	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+		if (!pte_none(*pte))
+			return;
+	}
+	vmem_pte_free(__va(pmd_deref(*pmd)));
+	pmd_clear(pmd);
+}
+
+/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
+static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
+				  unsigned long end, bool add, bool direct)
+{
+	unsigned long next, prot, pages = 0;
+	int ret = -ENOMEM;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	prot = pgprot_val(SEGMENT_KERNEL);
+	if (!MACHINE_HAS_NX)
+		prot &= ~_SEGMENT_ENTRY_NOEXEC;
+
+	pmd = pmd_offset(pud, addr);
+	for (; addr < end; addr = next, pmd++) {
+		next = pmd_addr_end(addr, end);
+		if (!add) {
+			if (pmd_none(*pmd))
+				continue;
+			if (pmd_large(*pmd) && !add) {
+				if (IS_ALIGNED(addr, PMD_SIZE) &&
+				    IS_ALIGNED(next, PMD_SIZE)) {
+					if (!direct)
+						vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+					pmd_clear(pmd);
+					pages++;
+				} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
+					vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+					pmd_clear(pmd);
+				}
+				continue;
+			}
+		} else if (pmd_none(*pmd)) {
+			if (IS_ALIGNED(addr, PMD_SIZE) &&
+			    IS_ALIGNED(next, PMD_SIZE) &&
+			    MACHINE_HAS_EDAT1 && addr && direct &&
+			    !debug_pagealloc_enabled()) {
+				pmd_val(*pmd) = addr | prot;
+				pages++;
+				continue;
+			} else if (!direct && MACHINE_HAS_EDAT1) {
+				void *new_page;
+
+				/*
+				 * Use 1MB frames for vmemmap if available. We
+				 * always use large frames even if they are only
+				 * partially used. Otherwise we would have also
+				 * page tables since vmemmap_populate gets
+				 * called for each section separately.
+				 */
+				new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+				if (new_page) {
+					pmd_val(*pmd) = __pa(new_page) | prot;
+					if (!IS_ALIGNED(addr, PMD_SIZE) ||
+					    !IS_ALIGNED(next, PMD_SIZE)) {
+						vmemmap_use_new_sub_pmd(addr, next);
+					}
+					continue;
+				}
+			}
+			pte = vmem_pte_alloc();
+			if (!pte)
+				goto out;
+			pmd_populate(&init_mm, pmd, pte);
+		} else if (pmd_large(*pmd)) {
+			if (!direct)
+				vmemmap_use_sub_pmd(addr, next);
+			continue;
+		}
+		ret = modify_pte_table(pmd, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_pte_table(pmd, addr & PMD_MASK);
+	}
+	ret = 0;
+out:
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_1M, add ? pages : -pages);
+	return ret;
+}
+
+static void try_free_pmd_table(pud_t *pud, unsigned long start)
+{
+	const unsigned long end = start + PUD_SIZE;
+	pmd_t *pmd;
+	int i;
+
+	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+	if (end > VMALLOC_START)
+		return;
+#ifdef CONFIG_KASAN
+	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+		return;
+#endif
+	pmd = pmd_offset(pud, start);
+	for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
+		if (!pmd_none(*pmd))
+			return;
+	vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+	pud_clear(pud);
+}
+
+static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
+			    bool add, bool direct)
+{
+	unsigned long next, prot, pages = 0;
+	int ret = -ENOMEM;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	prot = pgprot_val(REGION3_KERNEL);
+	if (!MACHINE_HAS_NX)
+		prot &= ~_REGION_ENTRY_NOEXEC;
+	pud = pud_offset(p4d, addr);
+	for (; addr < end; addr = next, pud++) {
+		next = pud_addr_end(addr, end);
+		if (!add) {
+			if (pud_none(*pud))
+				continue;
+			if (pud_large(*pud)) {
+				if (IS_ALIGNED(addr, PUD_SIZE) &&
+				    IS_ALIGNED(next, PUD_SIZE)) {
+					pud_clear(pud);
+					pages++;
+				}
+				continue;
+			}
+		} else if (pud_none(*pud)) {
+			if (IS_ALIGNED(addr, PUD_SIZE) &&
+			    IS_ALIGNED(next, PUD_SIZE) &&
+			    MACHINE_HAS_EDAT2 && addr && direct &&
+			    !debug_pagealloc_enabled()) {
+				pud_val(*pud) = addr | prot;
+				pages++;
+				continue;
+			}
+			pmd = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+			if (!pmd)
+				goto out;
+			pud_populate(&init_mm, pud, pmd);
+		} else if (pud_large(*pud)) {
+			continue;
+		}
+		ret = modify_pmd_table(pud, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_pmd_table(pud, addr & PUD_MASK);
+	}
+	ret = 0;
+out:
+	if (direct)
+		update_page_count(PG_DIRECT_MAP_2G, add ? pages : -pages);
+	return ret;
+}
+
+static void try_free_pud_table(p4d_t *p4d, unsigned long start)
+{
+	const unsigned long end = start + P4D_SIZE;
+	pud_t *pud;
+	int i;
+
+	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+	if (end > VMALLOC_START)
+		return;
+#ifdef CONFIG_KASAN
+	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+		return;
+#endif
+
+	pud = pud_offset(p4d, start);
+	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+		if (!pud_none(*pud))
+			return;
+	}
+	vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+	p4d_clear(p4d);
+}
+
+static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
+			    bool add, bool direct)
+{
+	unsigned long next;
+	int ret = -ENOMEM;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	p4d = p4d_offset(pgd, addr);
+	for (; addr < end; addr = next, p4d++) {
+		next = p4d_addr_end(addr, end);
+		if (!add) {
+			if (p4d_none(*p4d))
+				continue;
+		} else if (p4d_none(*p4d)) {
+			pud = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
+			if (!pud)
+				goto out;
+			p4d_populate(&init_mm, p4d, pud);
+		}
+		ret = modify_pud_table(p4d, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_pud_table(p4d, addr & P4D_MASK);
+	}
+	ret = 0;
+out:
+	return ret;
+}
+
+static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
+{
+	const unsigned long end = start + PGDIR_SIZE;
+	p4d_t *p4d;
+	int i;
+
+	/* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+	if (end > VMALLOC_START)
+		return;
+#ifdef CONFIG_KASAN
+	if (start < KASAN_SHADOW_END && KASAN_SHADOW_START > end)
+		return;
+#endif
+
+	p4d = p4d_offset(pgd, start);
+	for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
+		if (!p4d_none(*p4d))
+			return;
+	}
+	vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+	pgd_clear(pgd);
+}
+
+static int modify_pagetable(unsigned long start, unsigned long end, bool add,
+			    bool direct)
+{
+	unsigned long addr, next;
+	int ret = -ENOMEM;
+	pgd_t *pgd;
+	p4d_t *p4d;
+
+	if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
+		return -EINVAL;
+	for (addr = start; addr < end; addr = next) {
+		next = pgd_addr_end(addr, end);
+		pgd = pgd_offset_k(addr);
+
+		if (!add) {
+			if (pgd_none(*pgd))
+				continue;
+		} else if (pgd_none(*pgd)) {
+			p4d = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
+			if (!p4d)
+				goto out;
+			pgd_populate(&init_mm, pgd, p4d);
+		}
+		ret = modify_p4d_table(pgd, addr, next, add, direct);
+		if (ret)
+			goto out;
+		if (!add)
+			try_free_p4d_table(pgd, addr & PGDIR_MASK);
+	}
+	ret = 0;
+out:
+	if (!add)
+		flush_tlb_kernel_range(start, end);
+	return ret;
+}
+
+static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+	return modify_pagetable(start, end, true, direct);
+}
+
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+{
+	return modify_pagetable(start, end, false, direct);
+}
+
+/*
+ * Add a physical memory range to the 1:1 mapping.
+ */
+static int vmem_add_range(unsigned long start, unsigned long size)
+{
+	return add_pagetable(start, start + size, true);
+}
+
 /*
  * Remove a physical memory range from the 1:1 mapping.
- * Currently only invalidates page table entries.
  */
 static void vmem_remove_range(unsigned long start, unsigned long size)
 {
-	unsigned long pages4k, pages1m, pages2g;
-	unsigned long end = start + size;
-	unsigned long address = start;
-	pgd_t *pg_dir;
-	p4d_t *p4_dir;
-	pud_t *pu_dir;
-	pmd_t *pm_dir;
-	pte_t *pt_dir;
-
-	pages4k = pages1m = pages2g = 0;
-	while (address < end) {
-		pg_dir = pgd_offset_k(address);
-		if (pgd_none(*pg_dir)) {
-			address += PGDIR_SIZE;
-			continue;
-		}
-		p4_dir = p4d_offset(pg_dir, address);
-		if (p4d_none(*p4_dir)) {
-			address += P4D_SIZE;
-			continue;
-		}
-		pu_dir = pud_offset(p4_dir, address);
-		if (pud_none(*pu_dir)) {
-			address += PUD_SIZE;
-			continue;
-		}
-		if (pud_large(*pu_dir)) {
-			pud_clear(pu_dir);
-			address += PUD_SIZE;
-			pages2g++;
-			continue;
-		}
-		pm_dir = pmd_offset(pu_dir, address);
-		if (pmd_none(*pm_dir)) {
-			address += PMD_SIZE;
-			continue;
-		}
-		if (pmd_large(*pm_dir)) {
-			pmd_clear(pm_dir);
-			address += PMD_SIZE;
-			pages1m++;
-			continue;
-		}
-		pt_dir = pte_offset_kernel(pm_dir, address);
-		pte_clear(&init_mm, address, pt_dir);
-		address += PAGE_SIZE;
-		pages4k++;
-	}
-	flush_tlb_kernel_range(start, end);
-	update_page_count(PG_DIRECT_MAP_4K, -pages4k);
-	update_page_count(PG_DIRECT_MAP_1M, -pages1m);
-	update_page_count(PG_DIRECT_MAP_2G, -pages2g);
+	remove_pagetable(start, start + size, true);
 }
 
 /*
  * Add a backed mem_map array to the virtual mem_map array.
  */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
-		struct vmem_altmap *altmap)
+			       struct vmem_altmap *altmap)
 {
-	unsigned long pgt_prot, sgt_prot;
-	unsigned long address = start;
-	pgd_t *pg_dir;
-	p4d_t *p4_dir;
-	pud_t *pu_dir;
-	pmd_t *pm_dir;
-	pte_t *pt_dir;
-	int ret = -ENOMEM;
-
-	pgt_prot = pgprot_val(PAGE_KERNEL);
-	sgt_prot = pgprot_val(SEGMENT_KERNEL);
-	if (!MACHINE_HAS_NX) {
-		pgt_prot &= ~_PAGE_NOEXEC;
-		sgt_prot &= ~_SEGMENT_ENTRY_NOEXEC;
-	}
-	for (address = start; address < end;) {
-		pg_dir = pgd_offset_k(address);
-		if (pgd_none(*pg_dir)) {
-			p4_dir = vmem_crst_alloc(_REGION2_ENTRY_EMPTY);
-			if (!p4_dir)
-				goto out;
-			pgd_populate(&init_mm, pg_dir, p4_dir);
-		}
-
-		p4_dir = p4d_offset(pg_dir, address);
-		if (p4d_none(*p4_dir)) {
-			pu_dir = vmem_crst_alloc(_REGION3_ENTRY_EMPTY);
-			if (!pu_dir)
-				goto out;
-			p4d_populate(&init_mm, p4_dir, pu_dir);
-		}
-
-		pu_dir = pud_offset(p4_dir, address);
-		if (pud_none(*pu_dir)) {
-			pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
-			if (!pm_dir)
-				goto out;
-			pud_populate(&init_mm, pu_dir, pm_dir);
-		}
-
-		pm_dir = pmd_offset(pu_dir, address);
-		if (pmd_none(*pm_dir)) {
-			/* Use 1MB frames for vmemmap if available. We always
-			 * use large frames even if they are only partially
-			 * used.
-			 * Otherwise we would have also page tables since
-			 * vmemmap_populate gets called for each section
-			 * separately. */
-			if (MACHINE_HAS_EDAT1) {
-				void *new_page;
-
-				new_page = vmemmap_alloc_block(PMD_SIZE, node);
-				if (!new_page)
-					goto out;
-				pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
-				address = (address + PMD_SIZE) & PMD_MASK;
-				continue;
-			}
-			pt_dir = vmem_pte_alloc();
-			if (!pt_dir)
-				goto out;
-			pmd_populate(&init_mm, pm_dir, pt_dir);
-		} else if (pmd_large(*pm_dir)) {
-			address = (address + PMD_SIZE) & PMD_MASK;
-			continue;
-		}
-
-		pt_dir = pte_offset_kernel(pm_dir, address);
-		if (pte_none(*pt_dir)) {
-			void *new_page;
-
-			new_page = vmemmap_alloc_block(PAGE_SIZE, node);
-			if (!new_page)
-				goto out;
-			pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
-		}
-		address += PAGE_SIZE;
-	}
-	ret = 0;
-out:
-	return ret;
-}
-
-void vmemmap_free(unsigned long start, unsigned long end,
-		struct vmem_altmap *altmap)
-{
-}
-
-/*
- * Add memory segment to the segment list if it doesn't overlap with
- * an already present segment.
- */
-static int insert_memory_segment(struct memory_segment *seg)
-{
-	struct memory_segment *tmp;
-
-	if (seg->start + seg->size > VMEM_MAX_PHYS ||
-	    seg->start + seg->size < seg->start)
-		return -ERANGE;
-
-	list_for_each_entry(tmp, &mem_segs, list) {
-		if (seg->start >= tmp->start + tmp->size)
-			continue;
-		if (seg->start + seg->size <= tmp->start)
-			continue;
-		return -ENOSPC;
-	}
-	list_add(&seg->list, &mem_segs);
-	return 0;
-}
-
-/*
- * Remove memory segment from the segment list.
- */
-static void remove_memory_segment(struct memory_segment *seg)
-{
-	list_del(&seg->list);
-}
-
-static void __remove_shared_memory(struct memory_segment *seg)
-{
-	remove_memory_segment(seg);
-	vmem_remove_range(seg->start, seg->size);
-}
-
-int vmem_remove_mapping(unsigned long start, unsigned long size)
-{
-	struct memory_segment *seg;
 	int ret;
 
 	mutex_lock(&vmem_mutex);
-
-	ret = -ENOENT;
-	list_for_each_entry(seg, &mem_segs, list) {
-		if (seg->start == start && seg->size == size)
-			break;
-	}
-
-	if (seg->start != start || seg->size != size)
-		goto out;
-
-	ret = 0;
-	__remove_shared_memory(seg);
-	kfree(seg);
-out:
+	/* We don't care about the node, just use NUMA_NO_NODE on allocations */
+	ret = add_pagetable(start, end, false);
+	if (ret)
+		remove_pagetable(start, end, false);
 	mutex_unlock(&vmem_mutex);
 	return ret;
 }
 
+void vmemmap_free(unsigned long start, unsigned long end,
+		  struct vmem_altmap *altmap)
+{
+	mutex_lock(&vmem_mutex);
+	remove_pagetable(start, end, false);
+	mutex_unlock(&vmem_mutex);
+}
+
+void vmem_remove_mapping(unsigned long start, unsigned long size)
+{
+	mutex_lock(&vmem_mutex);
+	vmem_remove_range(start, size);
+	mutex_unlock(&vmem_mutex);
+}
+
 int vmem_add_mapping(unsigned long start, unsigned long size)
 {
-	struct memory_segment *seg;
 	int ret;
 
+	if (start + size > VMEM_MAX_PHYS ||
+	    start + size < start)
+		return -ERANGE;
+
 	mutex_lock(&vmem_mutex);
-	ret = -ENOMEM;
-	seg = kzalloc(sizeof(*seg), GFP_KERNEL);
-	if (!seg)
-		goto out;
-	seg->start = start;
-	seg->size = size;
-
-	ret = insert_memory_segment(seg);
+	ret = vmem_add_range(start, size);
 	if (ret)
-		goto out_free;
-
-	ret = vmem_add_mem(start, size);
-	if (ret)
-		goto out_remove;
-	goto out;
-
-out_remove:
-	__remove_shared_memory(seg);
-out_free:
-	kfree(seg);
-out:
+		vmem_remove_range(start, size);
 	mutex_unlock(&vmem_mutex);
 	return ret;
 }
@@ -400,10 +555,11 @@
  */
 void __init vmem_map_init(void)
 {
-	struct memblock_region *reg;
+	phys_addr_t base, end;
+	u64 i;
 
-	for_each_memblock(memory, reg)
-		vmem_add_mem(reg->base, reg->size);
+	for_each_mem_range(i, &base, &end)
+		vmem_add_range(base, end - base);
 	__set_memory((unsigned long)_stext,
 		     (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
 		     SET_MEMORY_RO | SET_MEMORY_X);
@@ -422,27 +578,3 @@
 	pr_info("Write protected kernel read-only data: %luk\n",
 		(unsigned long)(__end_rodata - _stext) >> 10);
 }
-
-/*
- * Convert memblock.memory  to a memory segment list so there is a single
- * list that contains all memory segments.
- */
-static int __init vmem_convert_memory_chunk(void)
-{
-	struct memblock_region *reg;
-	struct memory_segment *seg;
-
-	mutex_lock(&vmem_mutex);
-	for_each_memblock(memory, reg) {
-		seg = kzalloc(sizeof(*seg), GFP_KERNEL);
-		if (!seg)
-			panic("Out of memory...\n");
-		seg->start = reg->base;
-		seg->size = reg->size;
-		insert_memory_segment(seg);
-	}
-	mutex_unlock(&vmem_mutex);
-	return 0;
-}
-
-core_initcall(vmem_convert_memory_chunk);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 2d29966..cd0cbda 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/bpf.h>
 #include <linux/mm.h>
+#include <linux/kernel.h>
 #include <asm/cacheflush.h>
 #include <asm/dis.h>
 #include <asm/facility.h>
@@ -39,25 +40,22 @@
 	int size;		/* Size of program and literal pool */
 	int size_prg;		/* Size of program */
 	int prg;		/* Current position in program */
-	int lit_start;		/* Start of literal pool */
-	int lit;		/* Current position in literal pool */
+	int lit32_start;	/* Start of 32-bit literal pool */
+	int lit32;		/* Current position in 32-bit literal pool */
+	int lit64_start;	/* Start of 64-bit literal pool */
+	int lit64;		/* Current position in 64-bit literal pool */
 	int base_ip;		/* Base address for literal pool */
-	int ret0_ip;		/* Address of return 0 */
 	int exit_ip;		/* Address of exit */
 	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
 	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
 	int tail_call_start;	/* Tail call start offset */
-	int labels[1];		/* Labels for local jumps */
+	int excnt;		/* Number of exception table entries */
 };
 
-#define BPF_SIZE_MAX	0xffff	/* Max size for program (16 bit branches) */
-
-#define SEEN_MEM	(1 << 0)	/* use mem[] for temporary storage */
-#define SEEN_RET0	(1 << 1)	/* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL	(1 << 2)	/* code uses literals */
-#define SEEN_FUNC	(1 << 3)	/* calls C functions */
-#define SEEN_TAIL_CALL	(1 << 4)	/* code uses tail calls */
-#define SEEN_REG_AX	(1 << 5)	/* code uses constant blinding */
+#define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
+#define SEEN_LITERAL	BIT(1)		/* code uses literals */
+#define SEEN_FUNC	BIT(2)		/* calls C functions */
+#define SEEN_TAIL_CALL	BIT(3)		/* code uses tail calls */
 #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
 
 /*
@@ -132,13 +130,13 @@
 #define _EMIT2(op)						\
 ({								\
 	if (jit->prg_buf)					\
-		*(u16 *) (jit->prg_buf + jit->prg) = op;	\
+		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
 	jit->prg += 2;						\
 })
 
 #define EMIT2(op, b1, b2)					\
 ({								\
-	_EMIT2(op | reg(b1, b2));				\
+	_EMIT2((op) | reg(b1, b2));				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -146,20 +144,20 @@
 #define _EMIT4(op)						\
 ({								\
 	if (jit->prg_buf)					\
-		*(u32 *) (jit->prg_buf + jit->prg) = op;	\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
 	jit->prg += 4;						\
 })
 
 #define EMIT4(op, b1, b2)					\
 ({								\
-	_EMIT4(op | reg(b1, b2));				\
+	_EMIT4((op) | reg(b1, b2));				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
 #define EMIT4_RRF(op, b1, b2, b3)				\
 ({								\
-	_EMIT4(op | reg_high(b3) << 8 | reg(b1, b2));		\
+	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 	REG_SET_SEEN(b3);					\
@@ -168,13 +166,13 @@
 #define _EMIT4_DISP(op, disp)					\
 ({								\
 	unsigned int __disp = (disp) & 0xfff;			\
-	_EMIT4(op | __disp);					\
+	_EMIT4((op) | __disp);					\
 })
 
 #define EMIT4_DISP(op, b1, b2, disp)				\
 ({								\
-	_EMIT4_DISP(op | reg_high(b1) << 16 |			\
-		    reg_high(b2) << 8, disp);			\
+	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
+		    reg_high(b2) << 8, (disp));			\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
@@ -182,21 +180,27 @@
 #define EMIT4_IMM(op, b1, imm)					\
 ({								\
 	unsigned int __imm = (imm) & 0xffff;			\
-	_EMIT4(op | reg_high(b1) << 16 | __imm);		\
+	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
 	REG_SET_SEEN(b1);					\
 })
 
 #define EMIT4_PCREL(op, pcrel)					\
 ({								\
 	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
-	_EMIT4(op | __pcrel);					\
+	_EMIT4((op) | __pcrel);					\
+})
+
+#define EMIT4_PCREL_RIC(op, mask, target)			\
+({								\
+	int __rel = ((target) - jit->prg) / 2;			\
+	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
 })
 
 #define _EMIT6(op1, op2)					\
 ({								\
 	if (jit->prg_buf) {					\
-		*(u32 *) (jit->prg_buf + jit->prg) = op1;	\
-		*(u16 *) (jit->prg_buf + jit->prg + 4) = op2;	\
+		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
+		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
 	}							\
 	jit->prg += 6;						\
 })
@@ -204,98 +208,112 @@
 #define _EMIT6_DISP(op1, op2, disp)				\
 ({								\
 	unsigned int __disp = (disp) & 0xfff;			\
-	_EMIT6(op1 | __disp, op2);				\
+	_EMIT6((op1) | __disp, op2);				\
 })
 
 #define _EMIT6_DISP_LH(op1, op2, disp)				\
 ({								\
-	u32 _disp = (u32) disp;					\
+	u32 _disp = (u32) (disp);				\
 	unsigned int __disp_h = _disp & 0xff000;		\
 	unsigned int __disp_l = _disp & 0x00fff;		\
-	_EMIT6(op1 | __disp_l, op2 | __disp_h >> 4);		\
+	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
 })
 
 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
 ({								\
-	_EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 |		\
+	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
 		       reg_high(b3) << 8, op2, disp);		\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 	REG_SET_SEEN(b3);					\
 })
 
-#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask)	\
+#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
 ({								\
-	int rel = (jit->labels[label] - jit->prg) >> 1;		\
-	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff),	\
-	       op2 | mask << 12);				\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
+	       (op2) | (mask) << 12);				\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
-#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask)	\
+#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
 ({								\
-	int rel = (jit->labels[label] - jit->prg) >> 1;		\
-	_EMIT6(op1 | (reg_high(b1) | mask) << 16 |		\
-		(rel & 0xffff), op2 | (imm & 0xff) << 8);	\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
+		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
 	REG_SET_SEEN(b1);					\
-	BUILD_BUG_ON(((unsigned long) imm) > 0xff);		\
+	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
 })
 
 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
 ({								\
-	/* Branch instruction needs 6 bytes */			\
-	int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
-	_EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask);	\
+	int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2;	\
+	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
 	REG_SET_SEEN(b1);					\
 	REG_SET_SEEN(b2);					\
 })
 
 #define EMIT6_PCREL_RILB(op, b, target)				\
 ({								\
-	int rel = (target - jit->prg) / 2;			\
-	_EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff);	\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
 	REG_SET_SEEN(b);					\
 })
 
 #define EMIT6_PCREL_RIL(op, target)				\
 ({								\
-	int rel = (target - jit->prg) / 2;			\
-	_EMIT6(op | rel >> 16, rel & 0xffff);			\
+	unsigned int rel = (int)((target) - jit->prg) / 2;	\
+	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
+})
+
+#define EMIT6_PCREL_RILC(op, mask, target)			\
+({								\
+	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
 })
 
 #define _EMIT6_IMM(op, imm)					\
 ({								\
 	unsigned int __imm = (imm);				\
-	_EMIT6(op | (__imm >> 16), __imm & 0xffff);		\
+	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
 })
 
 #define EMIT6_IMM(op, b1, imm)					\
 ({								\
-	_EMIT6_IMM(op | reg_high(b1) << 16, imm);		\
+	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
 	REG_SET_SEEN(b1);					\
 })
 
+#define _EMIT_CONST_U32(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit32;					\
+	if (jit->prg_buf)					\
+		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+	jit->lit32 += 4;					\
+	ret;							\
+})
+
 #define EMIT_CONST_U32(val)					\
 ({								\
-	unsigned int ret;					\
-	ret = jit->lit - jit->base_ip;				\
 	jit->seen |= SEEN_LITERAL;				\
+	_EMIT_CONST_U32(val) - jit->base_ip;			\
+})
+
+#define _EMIT_CONST_U64(val)					\
+({								\
+	unsigned int ret;					\
+	ret = jit->lit64;					\
 	if (jit->prg_buf)					\
-		*(u32 *) (jit->prg_buf + jit->lit) = (u32) val;	\
-	jit->lit += 4;						\
+		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+	jit->lit64 += 8;					\
 	ret;							\
 })
 
 #define EMIT_CONST_U64(val)					\
 ({								\
-	unsigned int ret;					\
-	ret = jit->lit - jit->base_ip;				\
 	jit->seen |= SEEN_LITERAL;				\
-	if (jit->prg_buf)					\
-		*(u64 *) (jit->prg_buf + jit->lit) = (u64) val;	\
-	jit->lit += 8;						\
-	ret;							\
+	_EMIT_CONST_U64(val) - jit->base_ip;			\
 })
 
 #define EMIT_ZERO(b1)						\
@@ -308,6 +326,67 @@
 })
 
 /*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+	return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+	return jit->prg_buf;
+}
+
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+	return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+	return is_valid_rel(off - jit->prg);
+}
+
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+	return disp >= -524288 && disp <= 524287;
+}
+
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+	return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
+/*
  * Fill whole space with illegal instructions
  */
 static void jit_fill_hole(void *area, unsigned int size)
@@ -384,9 +463,18 @@
  */
 static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 {
-
+	const int last = 15, save_restore_size = 6;
 	int re = 6, rs;
 
+	if (is_first_pass(jit)) {
+		/*
+		 * We don't know yet which registers are used. Reserve space
+		 * conservatively.
+		 */
+		jit->prg += (last - re + 1) * save_restore_size;
+		return;
+	}
+
 	do {
 		rs = get_start(jit, re);
 		if (!rs)
@@ -397,7 +485,25 @@
 		else
 			restore_regs(jit, rs, re, stack_depth);
 		re++;
-	} while (re <= 15);
+	} while (re <= last);
+}
+
+static void bpf_skip(struct bpf_jit *jit, int size)
+{
+	if (size >= 6 && !is_valid_rel(size)) {
+		/* brcl 0xf,size */
+		EMIT6_PCREL_RIL(0xc0f4000000, size);
+		size -= 6;
+	} else if (size >= 4 && is_valid_rel(size)) {
+		/* brc 0xf,size */
+		EMIT4_PCREL(0xa7f40000, size);
+		size -= 4;
+	}
+	while (size >= 2) {
+		/* bcr 0,%0 */
+		_EMIT2(0x0700);
+		size -= 2;
+	}
 }
 
 /*
@@ -412,30 +518,39 @@
 		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
 		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
 	} else {
-		/* j tail_call_start: NOP if no tail calls are used */
-		EMIT4_PCREL(0xa7f40000, 6);
-		_EMIT2(0);
+		/*
+		 * There are no tail calls. Insert nops in order to have
+		 * tail_call_start at a predictable offset.
+		 */
+		bpf_skip(jit, 6);
 	}
 	/* Tail calls have to skip above initialization */
 	jit->tail_call_start = jit->prg;
 	/* Save registers */
 	save_restore_regs(jit, REGS_SAVE, stack_depth);
 	/* Setup literal pool */
-	if (jit->seen & SEEN_LITERAL) {
-		/* basr %r13,0 */
-		EMIT2(0x0d00, REG_L, REG_0);
-		jit->base_ip = jit->prg;
+	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
+		if (!is_first_pass(jit) &&
+		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
+			/* basr %l,0 */
+			EMIT2(0x0d00, REG_L, REG_0);
+			jit->base_ip = jit->prg;
+		} else {
+			/* larl %l,lit32_start */
+			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+			jit->base_ip = jit->lit32_start;
+		}
 	}
 	/* Setup stack and backchain */
-	if (jit->seen & SEEN_STACK) {
-		if (jit->seen & SEEN_FUNC)
+	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
 			/* lgr %w1,%r15 (backchain) */
 			EMIT4(0xb9040000, REG_W1, REG_15);
 		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
 		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
 		/* aghi %r15,-STK_OFF */
 		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
-		if (jit->seen & SEEN_FUNC)
+		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
 			/* stg %w1,152(%r15) (backchain) */
 			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
 				      REG_15, 152);
@@ -447,12 +562,6 @@
  */
 static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
 {
-	/* Return 0 */
-	if (jit->seen & SEEN_RET0) {
-		jit->ret0_ip = jit->prg;
-		/* lghi %b0,0 */
-		EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
-	}
 	jit->exit_ip = jit->prg;
 	/* Load exit code: lgr %r2,%b0 */
 	EMIT4(0xb9040000, REG_2, BPF_REG_0);
@@ -477,7 +586,7 @@
 	_EMIT2(0x07fe);
 
 	if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
-	    (jit->seen & SEEN_FUNC)) {
+	    (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
 		jit->r1_thunk_ip = jit->prg;
 		/* Generate __s390_indirect_jump_r1 thunk */
 		if (test_facility(35)) {
@@ -497,6 +606,84 @@
 	}
 }
 
+static int get_probe_mem_regno(const u8 *insn)
+{
+	/*
+	 * insn must point to llgc, llgh, llgf or lg, which have destination
+	 * register at the same position.
+	 */
+	if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
+		return -1;
+	if (insn[5] != 0x90 && /* llgc */
+	    insn[5] != 0x91 && /* llgh */
+	    insn[5] != 0x16 && /* llgf */
+	    insn[5] != 0x04) /* lg */
+		return -1;
+	return insn[1] >> 4;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+			   struct pt_regs *regs)
+{
+	int regno;
+	u8 *insn;
+
+	regs->psw.addr = extable_fixup(x);
+	insn = (u8 *)__rewind_psw(regs->psw, regs->int_code >> 16);
+	regno = get_probe_mem_regno(insn);
+	if (WARN_ON_ONCE(regno < 0))
+		/* JIT bug - unexpected instruction. */
+		return false;
+	regs->gprs[regno] = 0;
+	return true;
+}
+
+static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
+			     int probe_prg, int nop_prg)
+{
+	struct exception_table_entry *ex;
+	s64 delta;
+	u8 *insn;
+	int prg;
+	int i;
+
+	if (!fp->aux->extable)
+		/* Do nothing during early JIT passes. */
+		return 0;
+	insn = jit->prg_buf + probe_prg;
+	if (WARN_ON_ONCE(get_probe_mem_regno(insn) < 0))
+		/* JIT bug - unexpected probe instruction. */
+		return -1;
+	if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
+		/* JIT bug - gap between probe and nop instructions. */
+		return -1;
+	for (i = 0; i < 2; i++) {
+		if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
+			/* Verifier bug - not enough entries. */
+			return -1;
+		ex = &fp->aux->extable[jit->excnt];
+		/* Add extable entries for probe and nop instructions. */
+		prg = i == 0 ? probe_prg : nop_prg;
+		delta = jit->prg_buf + prg - (u8 *)&ex->insn;
+		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+			/* JIT bug - code and extable must be close. */
+			return -1;
+		ex->insn = delta;
+		/*
+		 * Always land on the nop. Note that extable infrastructure
+		 * ignores fixup field, it is handled by ex_handler_bpf().
+		 */
+		delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
+		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
+			/* JIT bug - landing pad and extable must be close. */
+			return -1;
+		ex->fixup = delta;
+		ex->handler = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+		jit->excnt++;
+	}
+	return 0;
+}
+
 /*
  * Compile one eBPF instruction into s390x code
  *
@@ -504,19 +691,24 @@
  * stack space for the large switch statement.
  */
 static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
-				 int i, bool extra_pass)
+				 int i, bool extra_pass, u32 stack_depth)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
-	int jmp_off, last, insn_count = 1;
 	u32 dst_reg = insn->dst_reg;
 	u32 src_reg = insn->src_reg;
+	int last, insn_count = 1;
 	u32 *addrs = jit->addrs;
 	s32 imm = insn->imm;
 	s16 off = insn->off;
+	int probe_prg = -1;
 	unsigned int mask;
+	int nop_prg;
+	int err;
 
-	if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
-		jit->seen |= SEEN_REG_AX;
+	if (BPF_CLASS(insn->code) == BPF_LDX &&
+	    BPF_MODE(insn->code) == BPF_PROBE_MEM)
+		probe_prg = jit->prg;
+
 	switch (insn->code) {
 	/*
 	 * BPF_MOV
@@ -550,9 +742,8 @@
 		u64 imm64;
 
 		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
-		/* lg %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm64));
+		/* lgrl %dst,imm */
+		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
 		insn_count = 2;
 		break;
 	}
@@ -688,9 +879,18 @@
 		EMIT4_IMM(0xa7080000, REG_W0, 0);
 		/* lr %w1,%dst */
 		EMIT2(0x1800, REG_W1, dst_reg);
-		/* dl %w0,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U32(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+			/* dl %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U32(imm));
+		} else {
+			/* lgfrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+					 _EMIT_CONST_U32(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlr %w0,%dst */
+			EMIT4(0xb9970000, REG_W0, dst_reg);
+		}
 		/* llgfr %dst,%rc */
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		if (insn_is_zext(&insn[1]))
@@ -712,9 +912,18 @@
 		EMIT4_IMM(0xa7090000, REG_W0, 0);
 		/* lgr %w1,%dst */
 		EMIT4(0xb9040000, REG_W1, dst_reg);
-		/* dlg %w0,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* dlg %w0,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %dst,imm */
+			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* dlgr %w0,%dst */
+			EMIT4(0xb9870000, REG_W0, dst_reg);
+		}
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
@@ -737,9 +946,19 @@
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
-		/* ng %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* ng %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0080,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ngr %dst,%w0 */
+			EMIT4(0xb9800000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_OR
@@ -759,9 +978,19 @@
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
-		/* og %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* og %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0081,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* ogr %dst,%w0 */
+			EMIT4(0xb9810000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_XOR
@@ -783,9 +1012,19 @@
 		EMIT_ZERO(dst_reg);
 		break;
 	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
-		/* xg %dst,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
-			      EMIT_CONST_U64(imm));
+		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+			/* xg %dst,<d(imm)>(%l) */
+			EMIT6_DISP_LH(0xe3000000, 0x0082,
+				      dst_reg, REG_0, REG_L,
+				      EMIT_CONST_U64(imm));
+		} else {
+			/* lgrl %w0,imm */
+			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+					 _EMIT_CONST_U64(imm));
+			jit->seen |= SEEN_LITERAL;
+			/* xgr %dst,%w0 */
+			EMIT4(0xb9820000, dst_reg, REG_W0);
+		}
 		break;
 	/*
 	 * BPF_LSH
@@ -995,6 +1234,7 @@
 	 * BPF_LDX
 	 */
 	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
 		/* llgc %dst,0(off,%src) */
 		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
 		jit->seen |= SEEN_MEM;
@@ -1002,6 +1242,7 @@
 			insn_count = 2;
 		break;
 	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
 		/* llgh %dst,0(off,%src) */
 		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
 		jit->seen |= SEEN_MEM;
@@ -1009,6 +1250,7 @@
 			insn_count = 2;
 		break;
 	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
 		/* llgf %dst,off(%src) */
 		jit->seen |= SEEN_MEM;
 		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
@@ -1016,6 +1258,7 @@
 			insn_count = 2;
 		break;
 	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
 		/* lg %dst,0(off,%src) */
 		jit->seen |= SEEN_MEM;
 		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
@@ -1036,9 +1279,8 @@
 
 		REG_SET_SEEN(BPF_REG_5);
 		jit->seen |= SEEN_FUNC;
-		/* lg %w1,<d(imm)>(%l) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
-			      EMIT_CONST_U64(func));
+		/* lgrl %w1,func */
+		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
 		if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
 			/* brasl %r14,__s390_indirect_jump_r1 */
 			EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
@@ -1050,7 +1292,9 @@
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
 		break;
 	}
-	case BPF_JMP | BPF_TAIL_CALL:
+	case BPF_JMP | BPF_TAIL_CALL: {
+		int patch_1_clrj, patch_2_clij, patch_3_brc;
+
 		/*
 		 * Implicit input:
 		 *  B1: pointer to ctx
@@ -1067,9 +1311,11 @@
 		/* llgf %w1,map.max_entries(%b2) */
 		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
 			      offsetof(struct bpf_array, map.max_entries));
-		/* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
-		EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
-				  REG_W1, 0, 0xa);
+		/* if ((u32)%b3 >= (u32)%w1) goto out; */
+		/* clrj %b3,%w1,0xa,out */
+		patch_1_clrj = jit->prg;
+		EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
+				 jit->prg);
 
 		/*
 		 * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1077,16 +1323,17 @@
 		 */
 
 		if (jit->seen & SEEN_STACK)
-			off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth;
+			off = STK_OFF_TCCNT + STK_OFF + stack_depth;
 		else
 			off = STK_OFF_TCCNT;
 		/* lhi %w0,1 */
 		EMIT4_IMM(0xa7080000, REG_W0, 1);
 		/* laal %w1,%w0,off(%r15) */
 		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
-		/* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
-		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
-				      MAX_TAIL_CALL_CNT, 0, 0x2);
+		/* clij %w1,MAX_TAIL_CALL_CNT,0x2,out */
+		patch_2_clij = jit->prg;
+		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT,
+				 2, jit->prg);
 
 		/*
 		 * prog = array->ptrs[index];
@@ -1098,16 +1345,17 @@
 		EMIT4(0xb9160000, REG_1, BPF_REG_3);
 		/* sllg %r1,%r1,3: %r1 *= 8 */
 		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
-		/* lg %r1,prog(%b2,%r1) */
-		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+		/* ltg %r1,prog(%b2,%r1) */
+		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
 			      REG_1, offsetof(struct bpf_array, ptrs));
-		/* clgij %r1,0,0x8,label0 */
-		EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+		/* brc 0x8,out */
+		patch_3_brc = jit->prg;
+		EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
 
 		/*
 		 * Restore registers before calling function
 		 */
-		save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth);
+		save_restore_regs(jit, REGS_RESTORE, stack_depth);
 
 		/*
 		 * goto *(prog->bpf_func + tail_call_start);
@@ -1119,14 +1367,26 @@
 		/* bc 0xf,tail_call_start(%r1) */
 		_EMIT4(0x47f01000 + jit->tail_call_start);
 		/* out: */
-		jit->labels[0] = jit->prg;
+		if (jit->prg_buf) {
+			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
+				(jit->prg - patch_1_clrj) >> 1;
+			*(u16 *)(jit->prg_buf + patch_2_clij + 2) =
+				(jit->prg - patch_2_clij) >> 1;
+			*(u16 *)(jit->prg_buf + patch_3_brc + 2) =
+				(jit->prg - patch_3_brc) >> 1;
+		}
 		break;
+	}
 	case BPF_JMP | BPF_EXIT: /* return b0 */
 		last = (i == fp->len - 1) ? 1 : 0;
-		if (last && !(jit->seen & SEEN_RET0))
+		if (last)
 			break;
-		/* j <exit> */
-		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
+			/* brc 0xf, <exit> */
+			EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
+		else
+			/* brcl 0xf, <exit> */
+			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
 		break;
 	/*
 	 * Branch relative (number of skipped instructions) to offset on
@@ -1259,69 +1519,175 @@
 		goto branch_oc;
 branch_ks:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* crj or cgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
-			    dst_reg, REG_W1, i, off, mask);
+		/* cfi or cgfi %dst,imm */
+		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+			  dst_reg, imm);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_ku:
-		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
 		/* lgfi %w1,imm (load sign extend imm) */
-		EMIT6_IMM(0xc0010000, REG_W1, imm);
-		/* clrj or clgrj %dst,%w1,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
-			    dst_reg, REG_W1, i, off, mask);
-		break;
+		src_reg = REG_1;
+		EMIT6_IMM(0xc0010000, src_reg, imm);
+		goto branch_xu;
 branch_xs:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* crj or cgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
-			    dst_reg, src_reg, i, off, mask);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* crj or cgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* cr or cgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1900, dst_reg, src_reg);
+			else
+				EMIT4(0xb9200000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_xu:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* clrj or clgrj %dst,%src,mask,off */
-		EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
-			    dst_reg, src_reg, i, off, mask);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* clrj or clgrj %dst,%src,mask,off */
+			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+				    dst_reg, src_reg, i, off, mask);
+		} else {
+			/* clr or clgr %dst,%src */
+			if (is_jmp32)
+				EMIT2(0x1500, dst_reg, src_reg);
+			else
+				EMIT4(0xb9210000, dst_reg, src_reg);
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 branch_oc:
-		/* brc mask,jmp_off (branch instruction needs 4 bytes) */
-		jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
-		EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+		if (!is_first_pass(jit) &&
+		    can_use_rel(jit, addrs[i + off + 1])) {
+			/* brc mask,off */
+			EMIT4_PCREL_RIC(0xa7040000,
+					mask >> 12, addrs[i + off + 1]);
+		} else {
+			/* brcl mask,off */
+			EMIT6_PCREL_RILC(0xc0040000,
+					 mask >> 12, addrs[i + off + 1]);
+		}
 		break;
 	}
 	default: /* too complex, give up */
 		pr_err("Unknown opcode %02x\n", insn->code);
 		return -1;
 	}
+
+	if (probe_prg != -1) {
+		/*
+		 * Handlers of certain exceptions leave psw.addr pointing to
+		 * the instruction directly after the failing one. Therefore,
+		 * create two exception table entries and also add a nop in
+		 * case two probing instructions come directly after each
+		 * other.
+		 */
+		nop_prg = jit->prg;
+		/* bcr 0,%0 */
+		_EMIT2(0x0700);
+		err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
+		if (err < 0)
+			return err;
+	}
+
 	return insn_count;
 }
 
 /*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+	/* On the first pass anything goes */
+	if (is_first_pass(jit))
+		return true;
+
+	/* The codegen pass must not change anything */
+	if (is_codegen_pass(jit))
+		return jit->addrs[i] == jit->prg;
+
+	/* Passes in between must not increase code size */
+	return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+	int delta;
+
+	if (is_codegen_pass(jit)) {
+		delta = jit->prg - jit->addrs[i];
+		if (delta < 0)
+			bpf_skip(jit, -delta);
+	}
+	if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
+		return -1;
+	jit->addrs[i] = jit->prg;
+	return 0;
+}
+
+/*
  * Compile eBPF program into s390x code
  */
 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
-			bool extra_pass)
+			bool extra_pass, u32 stack_depth)
 {
-	int i, insn_count;
+	int i, insn_count, lit32_size, lit64_size;
 
-	jit->lit = jit->lit_start;
+	jit->lit32 = jit->lit32_start;
+	jit->lit64 = jit->lit64_start;
 	jit->prg = 0;
+	jit->excnt = 0;
 
-	bpf_jit_prologue(jit, fp->aux->stack_depth);
+	bpf_jit_prologue(jit, stack_depth);
+	if (bpf_set_addr(jit, 0) < 0)
+		return -1;
 	for (i = 0; i < fp->len; i += insn_count) {
-		insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
+		insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
 		if (insn_count < 0)
 			return -1;
 		/* Next instruction address */
-		jit->addrs[i + insn_count] = jit->prg;
+		if (bpf_set_addr(jit, i + insn_count) < 0)
+			return -1;
 	}
-	bpf_jit_epilogue(jit, fp->aux->stack_depth);
+	bpf_jit_epilogue(jit, stack_depth);
 
-	jit->lit_start = jit->prg;
-	jit->size = jit->lit;
+	lit32_size = jit->lit32 - jit->lit32_start;
+	lit64_size = jit->lit64 - jit->lit64_start;
+	jit->lit32_start = jit->prg;
+	if (lit32_size)
+		jit->lit32_start = ALIGN(jit->lit32_start, 4);
+	jit->lit64_start = jit->lit32_start + lit32_size;
+	if (lit64_size)
+		jit->lit64_start = ALIGN(jit->lit64_start, 8);
+	jit->size = jit->lit64_start + lit64_size;
 	jit->size_prg = jit->prg;
+
+	if (WARN_ON_ONCE(fp->aux->extable &&
+			 jit->excnt != fp->aux->num_exentries))
+		/* Verifier bug - too many entries. */
+		return -1;
+
 	return 0;
 }
 
@@ -1336,11 +1702,35 @@
 	int pass;
 };
 
+static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
+					       struct bpf_prog *fp)
+{
+	struct bpf_binary_header *header;
+	u32 extable_size;
+	u32 code_size;
+
+	/* We need two entries per insn. */
+	fp->aux->num_exentries *= 2;
+
+	code_size = roundup(jit->size,
+			    __alignof__(struct exception_table_entry));
+	extable_size = fp->aux->num_exentries *
+		sizeof(struct exception_table_entry);
+	header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
+				      8, jit_fill_hole);
+	if (!header)
+		return NULL;
+	fp->aux->extable = (struct exception_table_entry *)
+		(jit->prg_buf + code_size);
+	return header;
+}
+
 /*
  * Compile eBPF program "fp"
  */
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
+	u32 stack_depth = round_up(fp->aux->stack_depth, 8);
 	struct bpf_prog *tmp, *orig_fp = fp;
 	struct bpf_binary_header *header;
 	struct s390_jit_data *jit_data;
@@ -1385,7 +1775,7 @@
 	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
 	if (jit.addrs == NULL) {
 		fp = orig_fp;
-		goto out;
+		goto free_addrs;
 	}
 	/*
 	 * Three initial passes:
@@ -1393,7 +1783,7 @@
 	 *   - 3:   Calculate program size and addrs arrray
 	 */
 	for (pass = 1; pass <= 3; pass++) {
-		if (bpf_jit_prog(&jit, fp, extra_pass)) {
+		if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
 			fp = orig_fp;
 			goto free_addrs;
 		}
@@ -1401,18 +1791,13 @@
 	/*
 	 * Final pass: Allocate and generate program
 	 */
-	if (jit.size >= BPF_SIZE_MAX) {
-		fp = orig_fp;
-		goto free_addrs;
-	}
-
-	header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+	header = bpf_jit_alloc(&jit, fp);
 	if (!header) {
 		fp = orig_fp;
 		goto free_addrs;
 	}
 skip_init_ctx:
-	if (bpf_jit_prog(&jit, fp, extra_pass)) {
+	if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
 		bpf_jit_binary_free(header);
 		fp = orig_fp;
 		goto free_addrs;
diff --git a/arch/s390/numa/Makefile b/arch/s390/numa/Makefile
deleted file mode 100644
index 66c2dff..0000000
--- a/arch/s390/numa/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-y			+= numa.o
-obj-y			+= toptree.o
-obj-$(CONFIG_NUMA_EMU)	+= mode_emu.o
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
deleted file mode 100644
index 72d742b..0000000
--- a/arch/s390/numa/mode_emu.c
+++ /dev/null
@@ -1,577 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * NUMA emulation (aka fake NUMA) distributes the available memory to nodes
- * without using real topology information about the physical memory of the
- * machine.
- *
- * It distributes the available CPUs to nodes while respecting the original
- * machine topology information. This is done by trying to avoid to separate
- * CPUs which reside on the same book or even on the same MC.
- *
- * Because the current Linux scheduler code requires a stable cpu to node
- * mapping, cores are pinned to nodes when the first CPU thread is set online.
- *
- * Copyright IBM Corp. 2015
- */
-
-#define KMSG_COMPONENT "numa_emu"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/cpumask.h>
-#include <linux/memblock.h>
-#include <linux/node.h>
-#include <linux/memory.h>
-#include <linux/slab.h>
-#include <asm/smp.h>
-#include <asm/topology.h>
-#include "numa_mode.h"
-#include "toptree.h"
-
-/* Distances between the different system components */
-#define DIST_EMPTY	0
-#define DIST_CORE	1
-#define DIST_MC		2
-#define DIST_BOOK	3
-#define DIST_DRAWER	4
-#define DIST_MAX	5
-
-/* Node distance reported to common code */
-#define EMU_NODE_DIST	10
-
-/* Node ID for free (not yet pinned) cores */
-#define NODE_ID_FREE	-1
-
-/* Different levels of toptree */
-enum toptree_level {CORE, MC, BOOK, DRAWER, NODE, TOPOLOGY};
-
-/* The two toptree IDs */
-enum {TOPTREE_ID_PHYS, TOPTREE_ID_NUMA};
-
-/* Number of NUMA nodes */
-static int emu_nodes = 1;
-/* NUMA stripe size */
-static unsigned long emu_size;
-
-/*
- * Node to core pinning information updates are protected by
- * "sched_domains_mutex".
- */
-static struct {
-	s32 to_node_id[CONFIG_NR_CPUS];	/* Pinned core to node mapping */
-	int total;			/* Total number of pinned cores */
-	int per_node_target;		/* Cores per node without extra cores */
-	int per_node[MAX_NUMNODES];	/* Number of cores pinned to node */
-} *emu_cores;
-
-/*
- * Pin a core to a node
- */
-static void pin_core_to_node(int core_id, int node_id)
-{
-	if (emu_cores->to_node_id[core_id] == NODE_ID_FREE) {
-		emu_cores->per_node[node_id]++;
-		emu_cores->to_node_id[core_id] = node_id;
-		emu_cores->total++;
-	} else {
-		WARN_ON(emu_cores->to_node_id[core_id] != node_id);
-	}
-}
-
-/*
- * Number of pinned cores of a node
- */
-static int cores_pinned(struct toptree *node)
-{
-	return emu_cores->per_node[node->id];
-}
-
-/*
- * ID of the node where the core is pinned (or NODE_ID_FREE)
- */
-static int core_pinned_to_node_id(struct toptree *core)
-{
-	return emu_cores->to_node_id[core->id];
-}
-
-/*
- * Number of cores in the tree that are not yet pinned
- */
-static int cores_free(struct toptree *tree)
-{
-	struct toptree *core;
-	int count = 0;
-
-	toptree_for_each(core, tree, CORE) {
-		if (core_pinned_to_node_id(core) == NODE_ID_FREE)
-			count++;
-	}
-	return count;
-}
-
-/*
- * Return node of core
- */
-static struct toptree *core_node(struct toptree *core)
-{
-	return core->parent->parent->parent->parent;
-}
-
-/*
- * Return drawer of core
- */
-static struct toptree *core_drawer(struct toptree *core)
-{
-	return core->parent->parent->parent;
-}
-
-/*
- * Return book of core
- */
-static struct toptree *core_book(struct toptree *core)
-{
-	return core->parent->parent;
-}
-
-/*
- * Return mc of core
- */
-static struct toptree *core_mc(struct toptree *core)
-{
-	return core->parent;
-}
-
-/*
- * Distance between two cores
- */
-static int dist_core_to_core(struct toptree *core1, struct toptree *core2)
-{
-	if (core_drawer(core1)->id != core_drawer(core2)->id)
-		return DIST_DRAWER;
-	if (core_book(core1)->id != core_book(core2)->id)
-		return DIST_BOOK;
-	if (core_mc(core1)->id != core_mc(core2)->id)
-		return DIST_MC;
-	/* Same core or sibling on same MC */
-	return DIST_CORE;
-}
-
-/*
- * Distance of a node to a core
- */
-static int dist_node_to_core(struct toptree *node, struct toptree *core)
-{
-	struct toptree *core_node;
-	int dist_min = DIST_MAX;
-
-	toptree_for_each(core_node, node, CORE)
-		dist_min = min(dist_min, dist_core_to_core(core_node, core));
-	return dist_min == DIST_MAX ? DIST_EMPTY : dist_min;
-}
-
-/*
- * Unify will delete empty nodes, therefore recreate nodes.
- */
-static void toptree_unify_tree(struct toptree *tree)
-{
-	int nid;
-
-	toptree_unify(tree);
-	for (nid = 0; nid < emu_nodes; nid++)
-		toptree_get_child(tree, nid);
-}
-
-/*
- * Find the best/nearest node for a given core and ensure that no node
- * gets more than "emu_cores->per_node_target + extra" cores.
- */
-static struct toptree *node_for_core(struct toptree *numa, struct toptree *core,
-				     int extra)
-{
-	struct toptree *node, *node_best = NULL;
-	int dist_cur, dist_best, cores_target;
-
-	cores_target = emu_cores->per_node_target + extra;
-	dist_best = DIST_MAX;
-	node_best = NULL;
-	toptree_for_each(node, numa, NODE) {
-		/* Already pinned cores must use their nodes */
-		if (core_pinned_to_node_id(core) == node->id) {
-			node_best = node;
-			break;
-		}
-		/* Skip nodes that already have enough cores */
-		if (cores_pinned(node) >= cores_target)
-			continue;
-		dist_cur = dist_node_to_core(node, core);
-		if (dist_cur < dist_best) {
-			dist_best = dist_cur;
-			node_best = node;
-		}
-	}
-	return node_best;
-}
-
-/*
- * Find the best node for each core with respect to "extra" core count
- */
-static void toptree_to_numa_single(struct toptree *numa, struct toptree *phys,
-				   int extra)
-{
-	struct toptree *node, *core, *tmp;
-
-	toptree_for_each_safe(core, tmp, phys, CORE) {
-		node = node_for_core(numa, core, extra);
-		if (!node)
-			return;
-		toptree_move(core, node);
-		pin_core_to_node(core->id, node->id);
-	}
-}
-
-/*
- * Move structures of given level to specified NUMA node
- */
-static void move_level_to_numa_node(struct toptree *node, struct toptree *phys,
-				    enum toptree_level level, bool perfect)
-{
-	int cores_free, cores_target = emu_cores->per_node_target;
-	struct toptree *cur, *tmp;
-
-	toptree_for_each_safe(cur, tmp, phys, level) {
-		cores_free = cores_target - toptree_count(node, CORE);
-		if (perfect) {
-			if (cores_free == toptree_count(cur, CORE))
-				toptree_move(cur, node);
-		} else {
-			if (cores_free >= toptree_count(cur, CORE))
-				toptree_move(cur, node);
-		}
-	}
-}
-
-/*
- * Move structures of a given level to NUMA nodes. If "perfect" is specified
- * move only perfectly fitting structures. Otherwise move also smaller
- * than needed structures.
- */
-static void move_level_to_numa(struct toptree *numa, struct toptree *phys,
-			       enum toptree_level level, bool perfect)
-{
-	struct toptree *node;
-
-	toptree_for_each(node, numa, NODE)
-		move_level_to_numa_node(node, phys, level, perfect);
-}
-
-/*
- * For the first run try to move the big structures
- */
-static void toptree_to_numa_first(struct toptree *numa, struct toptree *phys)
-{
-	struct toptree *core;
-
-	/* Always try to move perfectly fitting structures first */
-	move_level_to_numa(numa, phys, DRAWER, true);
-	move_level_to_numa(numa, phys, DRAWER, false);
-	move_level_to_numa(numa, phys, BOOK, true);
-	move_level_to_numa(numa, phys, BOOK, false);
-	move_level_to_numa(numa, phys, MC, true);
-	move_level_to_numa(numa, phys, MC, false);
-	/* Now pin all the moved cores */
-	toptree_for_each(core, numa, CORE)
-		pin_core_to_node(core->id, core_node(core)->id);
-}
-
-/*
- * Allocate new topology and create required nodes
- */
-static struct toptree *toptree_new(int id, int nodes)
-{
-	struct toptree *tree;
-	int nid;
-
-	tree = toptree_alloc(TOPOLOGY, id);
-	if (!tree)
-		goto fail;
-	for (nid = 0; nid < nodes; nid++) {
-		if (!toptree_get_child(tree, nid))
-			goto fail;
-	}
-	return tree;
-fail:
-	panic("NUMA emulation could not allocate topology");
-}
-
-/*
- * Allocate and initialize core to node mapping
- */
-static void __ref create_core_to_node_map(void)
-{
-	int i;
-
-	emu_cores = memblock_alloc(sizeof(*emu_cores), 8);
-	if (!emu_cores)
-		panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-		      __func__, sizeof(*emu_cores), 8);
-	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
-		emu_cores->to_node_id[i] = NODE_ID_FREE;
-}
-
-/*
- * Move cores from physical topology into NUMA target topology
- * and try to keep as much of the physical topology as possible.
- */
-static struct toptree *toptree_to_numa(struct toptree *phys)
-{
-	static int first = 1;
-	struct toptree *numa;
-	int cores_total;
-
-	cores_total = emu_cores->total + cores_free(phys);
-	emu_cores->per_node_target = cores_total / emu_nodes;
-	numa = toptree_new(TOPTREE_ID_NUMA, emu_nodes);
-	if (first) {
-		toptree_to_numa_first(numa, phys);
-		first = 0;
-	}
-	toptree_to_numa_single(numa, phys, 0);
-	toptree_to_numa_single(numa, phys, 1);
-	toptree_unify_tree(numa);
-
-	WARN_ON(cpumask_weight(&phys->mask));
-	return numa;
-}
-
-/*
- * Create a toptree out of the physical topology that we got from the hypervisor
- */
-static struct toptree *toptree_from_topology(void)
-{
-	struct toptree *phys, *node, *drawer, *book, *mc, *core;
-	struct cpu_topology_s390 *top;
-	int cpu;
-
-	phys = toptree_new(TOPTREE_ID_PHYS, 1);
-
-	for_each_cpu(cpu, &cpus_with_topology) {
-		top = &cpu_topology[cpu];
-		node = toptree_get_child(phys, 0);
-		drawer = toptree_get_child(node, top->drawer_id);
-		book = toptree_get_child(drawer, top->book_id);
-		mc = toptree_get_child(book, top->socket_id);
-		core = toptree_get_child(mc, smp_get_base_cpu(cpu));
-		if (!drawer || !book || !mc || !core)
-			panic("NUMA emulation could not allocate memory");
-		cpumask_set_cpu(cpu, &core->mask);
-		toptree_update_mask(mc);
-	}
-	return phys;
-}
-
-/*
- * Add toptree core to topology and create correct CPU masks
- */
-static void topology_add_core(struct toptree *core)
-{
-	struct cpu_topology_s390 *top;
-	int cpu;
-
-	for_each_cpu(cpu, &core->mask) {
-		top = &cpu_topology[cpu];
-		cpumask_copy(&top->thread_mask, &core->mask);
-		cpumask_copy(&top->core_mask, &core_mc(core)->mask);
-		cpumask_copy(&top->book_mask, &core_book(core)->mask);
-		cpumask_copy(&top->drawer_mask, &core_drawer(core)->mask);
-		cpumask_set_cpu(cpu, &node_to_cpumask_map[core_node(core)->id]);
-		top->node_id = core_node(core)->id;
-	}
-}
-
-/*
- * Apply toptree to topology and create CPU masks
- */
-static void toptree_to_topology(struct toptree *numa)
-{
-	struct toptree *core;
-	int i;
-
-	/* Clear all node masks */
-	for (i = 0; i < MAX_NUMNODES; i++)
-		cpumask_clear(&node_to_cpumask_map[i]);
-
-	/* Rebuild all masks */
-	toptree_for_each(core, numa, CORE)
-		topology_add_core(core);
-}
-
-/*
- * Show the node to core mapping
- */
-static void print_node_to_core_map(void)
-{
-	int nid, cid;
-
-	if (!numa_debug_enabled)
-		return;
-	printk(KERN_DEBUG "NUMA node to core mapping\n");
-	for (nid = 0; nid < emu_nodes; nid++) {
-		printk(KERN_DEBUG "  node %3d: ", nid);
-		for (cid = 0; cid < ARRAY_SIZE(emu_cores->to_node_id); cid++) {
-			if (emu_cores->to_node_id[cid] == nid)
-				printk(KERN_CONT "%d ", cid);
-		}
-		printk(KERN_CONT "\n");
-	}
-}
-
-static void pin_all_possible_cpus(void)
-{
-	int core_id, node_id, cpu;
-	static int initialized;
-
-	if (initialized)
-		return;
-	print_node_to_core_map();
-	node_id = 0;
-	for_each_possible_cpu(cpu) {
-		core_id = smp_get_base_cpu(cpu);
-		if (emu_cores->to_node_id[core_id] != NODE_ID_FREE)
-			continue;
-		pin_core_to_node(core_id, node_id);
-		cpu_topology[cpu].node_id = node_id;
-		node_id = (node_id + 1) % emu_nodes;
-	}
-	print_node_to_core_map();
-	initialized = 1;
-}
-
-/*
- * Transfer physical topology into a NUMA topology and modify CPU masks
- * according to the NUMA topology.
- *
- * Must be called with "sched_domains_mutex" lock held.
- */
-static void emu_update_cpu_topology(void)
-{
-	struct toptree *phys, *numa;
-
-	if (emu_cores == NULL)
-		create_core_to_node_map();
-	phys = toptree_from_topology();
-	numa = toptree_to_numa(phys);
-	toptree_free(phys);
-	toptree_to_topology(numa);
-	toptree_free(numa);
-	pin_all_possible_cpus();
-}
-
-/*
- * If emu_size is not set, use CONFIG_EMU_SIZE. Then round to minimum
- * alignment (needed for memory hotplug).
- */
-static unsigned long emu_setup_size_adjust(unsigned long size)
-{
-	unsigned long size_new;
-
-	size = size ? : CONFIG_EMU_SIZE;
-	size_new = roundup(size, memory_block_size_bytes());
-	if (size_new == size)
-		return size;
-	pr_warn("Increasing memory stripe size from %ld MB to %ld MB\n",
-		size >> 20, size_new >> 20);
-	return size_new;
-}
-
-/*
- * If we have not enough memory for the specified nodes, reduce the node count.
- */
-static int emu_setup_nodes_adjust(int nodes)
-{
-	int nodes_max;
-
-	nodes_max = memblock.memory.total_size / emu_size;
-	nodes_max = max(nodes_max, 1);
-	if (nodes_max >= nodes)
-		return nodes;
-	pr_warn("Not enough memory for %d nodes, reducing node count\n", nodes);
-	return nodes_max;
-}
-
-/*
- * Early emu setup
- */
-static void emu_setup(void)
-{
-	int nid;
-
-	emu_size = emu_setup_size_adjust(emu_size);
-	emu_nodes = emu_setup_nodes_adjust(emu_nodes);
-	for (nid = 0; nid < emu_nodes; nid++)
-		node_set(nid, node_possible_map);
-	pr_info("Creating %d nodes with memory stripe size %ld MB\n",
-		emu_nodes, emu_size >> 20);
-}
-
-/*
- * Return node id for given page number
- */
-static int emu_pfn_to_nid(unsigned long pfn)
-{
-	return (pfn / (emu_size >> PAGE_SHIFT)) % emu_nodes;
-}
-
-/*
- * Return stripe size
- */
-static unsigned long emu_align(void)
-{
-	return emu_size;
-}
-
-/*
- * Return distance between two nodes
- */
-static int emu_distance(int node1, int node2)
-{
-	return (node1 != node2) * EMU_NODE_DIST;
-}
-
-/*
- * Define callbacks for generic s390 NUMA infrastructure
- */
-const struct numa_mode numa_mode_emu = {
-	.name = "emu",
-	.setup = emu_setup,
-	.update_cpu_topology = emu_update_cpu_topology,
-	.__pfn_to_nid = emu_pfn_to_nid,
-	.align = emu_align,
-	.distance = emu_distance,
-};
-
-/*
- * Kernel parameter: emu_nodes=<n>
- */
-static int __init early_parse_emu_nodes(char *p)
-{
-	int count;
-
-	if (!p || kstrtoint(p, 0, &count) != 0 || count <= 0)
-		return 0;
-	emu_nodes = min(count, MAX_NUMNODES);
-	return 0;
-}
-early_param("emu_nodes", early_parse_emu_nodes);
-
-/*
- * Kernel parameter: emu_size=[<n>[k|M|G|T]]
- */
-static int __init early_parse_emu_size(char *p)
-{
-	if (p)
-		emu_size = memparse(p, NULL);
-	return 0;
-}
-early_param("emu_size", early_parse_emu_size);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
deleted file mode 100644
index 8386c58..0000000
--- a/arch/s390/numa/numa.c
+++ /dev/null
@@ -1,165 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * Implement NUMA core code.
- *
- * Copyright IBM Corp. 2015
- */
-
-#define KMSG_COMPONENT "numa"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/mmzone.h>
-#include <linux/cpumask.h>
-#include <linux/memblock.h>
-#include <linux/slab.h>
-#include <linux/node.h>
-
-#include <asm/numa.h>
-#include "numa_mode.h"
-
-pg_data_t *node_data[MAX_NUMNODES];
-EXPORT_SYMBOL(node_data);
-
-cpumask_t node_to_cpumask_map[MAX_NUMNODES];
-EXPORT_SYMBOL(node_to_cpumask_map);
-
-static void plain_setup(void)
-{
-	node_set(0, node_possible_map);
-}
-
-const struct numa_mode numa_mode_plain = {
-	.name = "plain",
-	.setup = plain_setup,
-};
-
-static const struct numa_mode *mode = &numa_mode_plain;
-
-int numa_pfn_to_nid(unsigned long pfn)
-{
-	return mode->__pfn_to_nid ? mode->__pfn_to_nid(pfn) : 0;
-}
-
-void numa_update_cpu_topology(void)
-{
-	if (mode->update_cpu_topology)
-		mode->update_cpu_topology();
-}
-
-int numa_debug_enabled;
-
-/*
- * numa_setup_memory() - Assign bootmem to nodes
- *
- * The memory is first added to memblock without any respect to nodes.
- * This is fixed before remaining memblock memory is handed over to the
- * buddy allocator.
- * An important side effect is that large bootmem allocations might easily
- * cross node boundaries, which can be needed for large allocations with
- * smaller memory stripes in each node (i.e. when using NUMA emulation).
- *
- * Memory defines nodes:
- * Therefore this routine also sets the nodes online with memory.
- */
-static void __init numa_setup_memory(void)
-{
-	unsigned long cur_base, align, end_of_dram;
-	int nid = 0;
-
-	end_of_dram = memblock_end_of_DRAM();
-	align = mode->align ? mode->align() : ULONG_MAX;
-
-	/*
-	 * Step through all available memory and assign it to the nodes
-	 * indicated by the mode implementation.
-	 * All nodes which are seen here will be set online.
-	 */
-	cur_base = 0;
-	do {
-		nid = numa_pfn_to_nid(PFN_DOWN(cur_base));
-		node_set_online(nid);
-		memblock_set_node(cur_base, align, &memblock.memory, nid);
-		cur_base += align;
-	} while (cur_base < end_of_dram);
-
-	/* Allocate and fill out node_data */
-	for (nid = 0; nid < MAX_NUMNODES; nid++) {
-		NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
-		if (!NODE_DATA(nid))
-			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
-			      __func__, sizeof(pg_data_t), 8);
-	}
-
-	for_each_online_node(nid) {
-		unsigned long start_pfn, end_pfn;
-		unsigned long t_start, t_end;
-		int i;
-
-		start_pfn = ULONG_MAX;
-		end_pfn = 0;
-		for_each_mem_pfn_range(i, nid, &t_start, &t_end, NULL) {
-			if (t_start < start_pfn)
-				start_pfn = t_start;
-			if (t_end > end_pfn)
-				end_pfn = t_end;
-		}
-		NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
-		NODE_DATA(nid)->node_id = nid;
-	}
-}
-
-/*
- * numa_setup() - Earliest initialization
- *
- * Assign the mode and call the mode's setup routine.
- */
-void __init numa_setup(void)
-{
-	pr_info("NUMA mode: %s\n", mode->name);
-	nodes_clear(node_possible_map);
-	/* Initially attach all possible CPUs to node 0. */
-	cpumask_copy(&node_to_cpumask_map[0], cpu_possible_mask);
-	if (mode->setup)
-		mode->setup();
-	numa_setup_memory();
-	memblock_dump_all();
-}
-
-/*
- * numa_init_late() - Initialization initcall
- *
- * Register NUMA nodes.
- */
-static int __init numa_init_late(void)
-{
-	int nid;
-
-	for_each_online_node(nid)
-		register_one_node(nid);
-	return 0;
-}
-arch_initcall(numa_init_late);
-
-static int __init parse_debug(char *parm)
-{
-	numa_debug_enabled = 1;
-	return 0;
-}
-early_param("numa_debug", parse_debug);
-
-static int __init parse_numa(char *parm)
-{
-	if (!parm)
-		return 1;
-	if (strcmp(parm, numa_mode_plain.name) == 0)
-		mode = &numa_mode_plain;
-#ifdef CONFIG_NUMA_EMU
-	if (strcmp(parm, numa_mode_emu.name) == 0)
-		mode = &numa_mode_emu;
-#endif
-	return 0;
-}
-early_param("numa", parse_numa);
diff --git a/arch/s390/numa/numa_mode.h b/arch/s390/numa/numa_mode.h
deleted file mode 100644
index dfd3e27..0000000
--- a/arch/s390/numa/numa_mode.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * Define declarations used for communication between NUMA mode
- * implementations and NUMA core functionality.
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef __S390_NUMA_MODE_H
-#define __S390_NUMA_MODE_H
-
-struct numa_mode {
-	char *name;				/* Name of mode */
-	void (*setup)(void);			/* Initizalize mode */
-	void (*update_cpu_topology)(void);	/* Called by topology code */
-	int (*__pfn_to_nid)(unsigned long pfn);	/* PFN to node ID */
-	unsigned long (*align)(void);		/* Minimum node alignment */
-	int (*distance)(int a, int b);		/* Distance between two nodes */
-};
-
-extern const struct numa_mode numa_mode_plain;
-extern const struct numa_mode numa_mode_emu;
-
-#endif /* __S390_NUMA_MODE_H */
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
deleted file mode 100644
index 71a608c..0000000
--- a/arch/s390/numa/toptree.c
+++ /dev/null
@@ -1,351 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-
-#include <linux/kernel.h>
-#include <linux/memblock.h>
-#include <linux/cpumask.h>
-#include <linux/list.h>
-#include <linux/list_sort.h>
-#include <linux/slab.h>
-#include <asm/numa.h>
-
-#include "toptree.h"
-
-/**
- * toptree_alloc - Allocate and initialize a new tree node.
- * @level: The node's vertical level; level 0 contains the leaves.
- * @id: ID number, explicitly not unique beyond scope of node's siblings
- *
- * Allocate a new tree node and initialize it.
- *
- * RETURNS:
- * Pointer to the new tree node or NULL on error
- */
-struct toptree __ref *toptree_alloc(int level, int id)
-{
-	struct toptree *res;
-
-	if (slab_is_available())
-		res = kzalloc(sizeof(*res), GFP_KERNEL);
-	else
-		res = memblock_alloc(sizeof(*res), 8);
-	if (!res)
-		return res;
-
-	INIT_LIST_HEAD(&res->children);
-	INIT_LIST_HEAD(&res->sibling);
-	cpumask_clear(&res->mask);
-	res->level = level;
-	res->id = id;
-	return res;
-}
-
-/**
- * toptree_remove - Remove a tree node from a tree
- * @cand: Pointer to the node to remove
- *
- * The node is detached from its parent node. The parent node's
- * masks will be updated to reflect the loss of the child.
- */
-static void toptree_remove(struct toptree *cand)
-{
-	struct toptree *oldparent;
-
-	list_del_init(&cand->sibling);
-	oldparent = cand->parent;
-	cand->parent = NULL;
-	toptree_update_mask(oldparent);
-}
-
-/**
- * toptree_free - discard a tree node
- * @cand: Pointer to the tree node to discard
- *
- * Checks if @cand is attached to a parent node. Detaches it
- * cleanly using toptree_remove. Possible children are freed
- * recursively. In the end @cand itself is freed.
- */
-void __ref toptree_free(struct toptree *cand)
-{
-	struct toptree *child, *tmp;
-
-	if (cand->parent)
-		toptree_remove(cand);
-	toptree_for_each_child_safe(child, tmp, cand)
-		toptree_free(child);
-	if (slab_is_available())
-		kfree(cand);
-	else
-		memblock_free_early((unsigned long)cand, sizeof(*cand));
-}
-
-/**
- * toptree_update_mask - Update node bitmasks
- * @cand: Pointer to a tree node
- *
- * The node's cpumask will be updated by combining all children's
- * masks. Then toptree_update_mask is called recursively for the
- * parent if applicable.
- *
- * NOTE:
- * This must not be called on leaves. If called on a leaf, its
- * CPU mask is cleared and lost.
- */
-void toptree_update_mask(struct toptree *cand)
-{
-	struct toptree *child;
-
-	cpumask_clear(&cand->mask);
-	list_for_each_entry(child, &cand->children, sibling)
-		cpumask_or(&cand->mask, &cand->mask, &child->mask);
-	if (cand->parent)
-		toptree_update_mask(cand->parent);
-}
-
-/**
- * toptree_insert - Insert a tree node into tree
- * @cand: Pointer to the node to insert
- * @target: Pointer to the node to which @cand will added as a child
- *
- * Insert a tree node into a tree. Masks will be updated automatically.
- *
- * RETURNS:
- * 0 on success, -1 if NULL is passed as argument or the node levels
- * don't fit.
- */
-static int toptree_insert(struct toptree *cand, struct toptree *target)
-{
-	if (!cand || !target)
-		return -1;
-	if (target->level != (cand->level + 1))
-		return -1;
-	list_add_tail(&cand->sibling, &target->children);
-	cand->parent = target;
-	toptree_update_mask(target);
-	return 0;
-}
-
-/**
- * toptree_move_children - Move all child nodes of a node to a new place
- * @cand: Pointer to the node whose children are to be moved
- * @target: Pointer to the node to which @cand's children will be attached
- *
- * Take all child nodes of @cand and move them using toptree_move.
- */
-static void toptree_move_children(struct toptree *cand, struct toptree *target)
-{
-	struct toptree *child, *tmp;
-
-	toptree_for_each_child_safe(child, tmp, cand)
-		toptree_move(child, target);
-}
-
-/**
- * toptree_unify - Merge children with same ID
- * @cand: Pointer to node whose direct children should be made unique
- *
- * When mangling the tree it is possible that a node has two or more children
- * which have the same ID. This routine merges these children into one and
- * moves all children of the merged nodes into the unified node.
- */
-void toptree_unify(struct toptree *cand)
-{
-	struct toptree *child, *tmp, *cand_copy;
-
-	/* Threads cannot be split, cores are not split */
-	if (cand->level < 2)
-		return;
-
-	cand_copy = toptree_alloc(cand->level, 0);
-	toptree_for_each_child_safe(child, tmp, cand) {
-		struct toptree *tmpchild;
-
-		if (!cpumask_empty(&child->mask)) {
-			tmpchild = toptree_get_child(cand_copy, child->id);
-			toptree_move_children(child, tmpchild);
-		}
-		toptree_free(child);
-	}
-	toptree_move_children(cand_copy, cand);
-	toptree_free(cand_copy);
-
-	toptree_for_each_child(child, cand)
-		toptree_unify(child);
-}
-
-/**
- * toptree_move - Move a node to another context
- * @cand: Pointer to the node to move
- * @target: Pointer to the node where @cand should go
- *
- * In the easiest case @cand is exactly on the level below @target
- * and will be immediately moved to the target.
- *
- * If @target's level is not the direct parent level of @cand,
- * nodes for the missing levels are created and put between
- * @cand and @target. The "stacking" nodes' IDs are taken from
- * @cand's parents.
- *
- * After this it is likely to have redundant nodes in the tree
- * which are addressed by means of toptree_unify.
- */
-void toptree_move(struct toptree *cand, struct toptree *target)
-{
-	struct toptree *stack_target, *real_insert_point, *ptr, *tmp;
-
-	if (cand->level + 1 == target->level) {
-		toptree_remove(cand);
-		toptree_insert(cand, target);
-		return;
-	}
-
-	real_insert_point = NULL;
-	ptr = cand;
-	stack_target = NULL;
-
-	do {
-		tmp = stack_target;
-		stack_target = toptree_alloc(ptr->level + 1,
-					     ptr->parent->id);
-		toptree_insert(tmp, stack_target);
-		if (!real_insert_point)
-			real_insert_point = stack_target;
-		ptr = ptr->parent;
-	} while (stack_target->level < (target->level - 1));
-
-	toptree_remove(cand);
-	toptree_insert(cand, real_insert_point);
-	toptree_insert(stack_target, target);
-}
-
-/**
- * toptree_get_child - Access a tree node's child by its ID
- * @cand: Pointer to tree node whose child is to access
- * @id: The desired child's ID
- *
- * @cand's children are searched for a child with matching ID.
- * If no match can be found, a new child with the desired ID
- * is created and returned.
- */
-struct toptree *toptree_get_child(struct toptree *cand, int id)
-{
-	struct toptree *child;
-
-	toptree_for_each_child(child, cand)
-		if (child->id == id)
-			return child;
-	child = toptree_alloc(cand->level-1, id);
-	toptree_insert(child, cand);
-	return child;
-}
-
-/**
- * toptree_first - Find the first descendant on specified level
- * @context: Pointer to tree node whose descendants are to be used
- * @level: The level of interest
- *
- * RETURNS:
- * @context's first descendant on the specified level, or NULL
- * if there is no matching descendant
- */
-struct toptree *toptree_first(struct toptree *context, int level)
-{
-	struct toptree *child, *tmp;
-
-	if (context->level == level)
-		return context;
-
-	if (!list_empty(&context->children)) {
-		list_for_each_entry(child, &context->children, sibling) {
-			tmp = toptree_first(child, level);
-			if (tmp)
-				return tmp;
-		}
-	}
-	return NULL;
-}
-
-/**
- * toptree_next_sibling - Return next sibling
- * @cur: Pointer to a tree node
- *
- * RETURNS:
- * If @cur has a parent and is not the last in the parent's children list,
- * the next sibling is returned. Or NULL when there are no siblings left.
- */
-static struct toptree *toptree_next_sibling(struct toptree *cur)
-{
-	if (cur->parent == NULL)
-		return NULL;
-
-	if (cur == list_last_entry(&cur->parent->children,
-				   struct toptree, sibling))
-		return NULL;
-	return (struct toptree *) list_next_entry(cur, sibling);
-}
-
-/**
- * toptree_next - Tree traversal function
- * @cur: Pointer to current element
- * @context: Pointer to the root node of the tree or subtree to
- * be traversed.
- * @level: The level of interest.
- *
- * RETURNS:
- * Pointer to the next node on level @level
- * or NULL when there is no next node.
- */
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
-			     int level)
-{
-	struct toptree *cur_context, *tmp;
-
-	if (!cur)
-		return NULL;
-
-	if (context->level == level)
-		return NULL;
-
-	tmp = toptree_next_sibling(cur);
-	if (tmp != NULL)
-		return tmp;
-
-	cur_context = cur;
-	while (cur_context->level < context->level - 1) {
-		/* Step up */
-		cur_context = cur_context->parent;
-		/* Step aside */
-		tmp = toptree_next_sibling(cur_context);
-		if (tmp != NULL) {
-			/* Step down */
-			tmp = toptree_first(tmp, level);
-			if (tmp != NULL)
-				return tmp;
-		}
-	}
-	return NULL;
-}
-
-/**
- * toptree_count - Count descendants on specified level
- * @context: Pointer to node whose descendants are to be considered
- * @level: Only descendants on the specified level will be counted
- *
- * RETURNS:
- * Number of descendants on the specified level
- */
-int toptree_count(struct toptree *context, int level)
-{
-	struct toptree *cur;
-	int cnt = 0;
-
-	toptree_for_each(cur, context, level)
-		cnt++;
-	return cnt;
-}
diff --git a/arch/s390/numa/toptree.h b/arch/s390/numa/toptree.h
deleted file mode 100644
index 5246371..0000000
--- a/arch/s390/numa/toptree.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * NUMA support for s390
- *
- * A tree structure used for machine topology mangling
- *
- * Copyright IBM Corp. 2015
- */
-#ifndef S390_TOPTREE_H
-#define S390_TOPTREE_H
-
-#include <linux/cpumask.h>
-#include <linux/list.h>
-
-struct toptree {
-	int level;
-	int id;
-	cpumask_t mask;
-	struct toptree *parent;
-	struct list_head sibling;
-	struct list_head children;
-};
-
-struct toptree *toptree_alloc(int level, int id);
-void toptree_free(struct toptree *cand);
-void toptree_update_mask(struct toptree *cand);
-void toptree_unify(struct toptree *cand);
-struct toptree *toptree_get_child(struct toptree *cand, int id);
-void toptree_move(struct toptree *cand, struct toptree *target);
-int toptree_count(struct toptree *context, int level);
-
-struct toptree *toptree_first(struct toptree *context, int level);
-struct toptree *toptree_next(struct toptree *cur, struct toptree *context,
-			     int level);
-
-#define toptree_for_each_child(child, ptree)				\
-	list_for_each_entry(child,  &ptree->children, sibling)
-
-#define toptree_for_each_child_safe(child, ptmp, ptree)			\
-	list_for_each_entry_safe(child, ptmp, &ptree->children, sibling)
-
-#define toptree_is_last(ptree)					\
-	((ptree->parent == NULL) ||				\
-	 (ptree->parent->children.prev == &ptree->sibling))
-
-#define toptree_for_each(ptree, cont, ttype)		\
-	for (ptree = toptree_first(cont, ttype);	\
-	     ptree != NULL;				\
-	     ptree = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_safe(ptree, tmp, cont, ttype)		\
-	for (ptree = toptree_first(cont, ttype),		\
-		     tmp = toptree_next(ptree, cont, ttype);	\
-	     ptree != NULL;					\
-	     ptree = tmp,					\
-		     tmp = toptree_next(ptree, cont, ttype))
-
-#define toptree_for_each_sibling(ptree, start)			\
-	toptree_for_each(ptree, start->parent, start->level)
-
-#endif /* S390_TOPTREE_H */
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 748626a..bf557a1 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -4,4 +4,6 @@
 #
 
 obj-$(CONFIG_PCI)	+= pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
-			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o
+			   pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
+			   pci_bus.o
+obj-$(CONFIG_PCI_IOV)	+= pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index b8ddacf..e14e4a3 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <linux/jump_label.h>
 #include <linux/pci.h>
+#include <linux/printk.h>
 
 #include <asm/isc.h>
 #include <asm/airq.h>
@@ -35,17 +36,22 @@
 #include <asm/pci_clp.h>
 #include <asm/pci_dma.h>
 
+#include "pci_bus.h"
+#include "pci_iov.h"
+
 /* list of all detected zpci devices */
 static LIST_HEAD(zpci_list);
 static DEFINE_SPINLOCK(zpci_list_lock);
 
-static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
+static DECLARE_BITMAP(zpci_domain, ZPCI_DOMAIN_BITMAP_SIZE);
 static DEFINE_SPINLOCK(zpci_domain_lock);
 
 #define ZPCI_IOMAP_ENTRIES						\
-	min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2),	\
+	min(((unsigned long) ZPCI_NR_DEVICES * PCI_STD_NUM_BARS / 2),	\
 	    ZPCI_IOMAP_MAX_ENTRIES)
 
+unsigned int s390_pci_no_rid;
+
 static DEFINE_SPINLOCK(zpci_iomap_lock);
 static unsigned long *zpci_iomap_bitmap;
 struct zpci_iomap_entry *zpci_iomap_start;
@@ -86,17 +92,12 @@
 	spin_unlock(&zpci_list_lock);
 
 	list_for_each_entry_safe(zdev, tmp, &remove, entry)
-		zpci_remove_device(zdev);
-}
-
-static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus)
-{
-	return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL;
+		zpci_device_reserved(zdev);
 }
 
 int pci_domain_nr(struct pci_bus *bus)
 {
-	return ((struct zpci_dev *) bus->sysdata)->domain;
+	return ((struct zpci_bus *) bus->sysdata)->domain_nr;
 }
 EXPORT_SYMBOL_GPL(pci_domain_nr);
 
@@ -226,34 +227,58 @@
        zpci_memcpy_toio(to, from, count);
 }
 
-void __iomem *ioremap(unsigned long ioaddr, unsigned long size)
+static void __iomem *__ioremap(phys_addr_t addr, size_t size, pgprot_t prot)
 {
+	unsigned long offset, vaddr;
 	struct vm_struct *area;
-	unsigned long offset;
+	phys_addr_t last_addr;
 
-	if (!size)
+	last_addr = addr + size - 1;
+	if (!size || last_addr < addr)
 		return NULL;
 
 	if (!static_branch_unlikely(&have_mio))
-		return (void __iomem *) ioaddr;
+		return (void __iomem *) addr;
 
-	offset = ioaddr & ~PAGE_MASK;
-	ioaddr &= PAGE_MASK;
+	offset = addr & ~PAGE_MASK;
+	addr &= PAGE_MASK;
 	size = PAGE_ALIGN(size + offset);
 	area = get_vm_area(size, VM_IOREMAP);
 	if (!area)
 		return NULL;
 
-	if (ioremap_page_range((unsigned long) area->addr,
-			       (unsigned long) area->addr + size,
-			       ioaddr, PAGE_KERNEL)) {
-		vunmap(area->addr);
+	vaddr = (unsigned long) area->addr;
+	if (ioremap_page_range(vaddr, vaddr + size, addr, prot)) {
+		free_vm_area(area);
 		return NULL;
 	}
 	return (void __iomem *) ((unsigned long) area->addr + offset);
 }
+
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
+{
+	return __ioremap(addr, size, __pgprot(prot));
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+void __iomem *ioremap(phys_addr_t addr, size_t size)
+{
+	return __ioremap(addr, size, PAGE_KERNEL);
+}
 EXPORT_SYMBOL(ioremap);
 
+void __iomem *ioremap_wc(phys_addr_t addr, size_t size)
+{
+	return __ioremap(addr, size, pgprot_writecombine(PAGE_KERNEL));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_wt(phys_addr_t addr, size_t size)
+{
+	return __ioremap(addr, size, pgprot_writethrough(PAGE_KERNEL));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
 void iounmap(volatile void __iomem *addr)
 {
 	if (static_branch_likely(&have_mio))
@@ -294,7 +319,7 @@
 void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
 			      unsigned long offset, unsigned long max)
 {
-	if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
 		return NULL;
 
 	if (static_branch_likely(&have_mio))
@@ -324,7 +349,7 @@
 void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
 				 unsigned long offset, unsigned long max)
 {
-	if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+	if (bar >= PCI_STD_NUM_BARS || !pci_resource_len(pdev, bar))
 		return NULL;
 
 	if (static_branch_likely(&have_mio))
@@ -371,29 +396,17 @@
 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
 		    int size, u32 *val)
 {
-	struct zpci_dev *zdev = get_zdev_by_bus(bus);
-	int ret;
+	struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
 
-	if (!zdev || devfn != ZPCI_DEVFN)
-		ret = -ENODEV;
-	else
-		ret = zpci_cfg_load(zdev, where, val, size);
-
-	return ret;
+	return (zdev) ? zpci_cfg_load(zdev, where, val, size) : -ENODEV;
 }
 
 static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
 		     int size, u32 val)
 {
-	struct zpci_dev *zdev = get_zdev_by_bus(bus);
-	int ret;
+	struct zpci_dev *zdev = get_zdev_by_bus(bus, devfn);
 
-	if (!zdev || devfn != ZPCI_DEVFN)
-		ret = -ENODEV;
-	else
-		ret = zpci_cfg_store(zdev, where, val, size);
-
-	return ret;
+	return (zdev) ? zpci_cfg_store(zdev, where, val, size) : -ENODEV;
 }
 
 static struct pci_ops pci_root_ops = {
@@ -401,22 +414,13 @@
 	.write = pci_write,
 };
 
-#ifdef CONFIG_PCI_IOV
-static struct resource iov_res = {
-	.name	= "PCI IOV res",
-	.start	= 0,
-	.end	= -1,
-	.flags	= IORESOURCE_MEM,
-};
-#endif
-
 static void zpci_map_resources(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = to_zpci(pdev);
 	resource_size_t len;
 	int i;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		len = pci_resource_len(pdev, i);
 		if (!len)
 			continue;
@@ -430,16 +434,7 @@
 		pdev->resource[i].end = pdev->resource[i].start + len - 1;
 	}
 
-#ifdef CONFIG_PCI_IOV
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		int bar = i + PCI_IOV_RESOURCES;
-
-		len = pci_resource_len(pdev, bar);
-		if (!len)
-			continue;
-		pdev->resource[bar].parent = &iov_res;
-	}
-#endif
+	zpci_iov_map_resources(pdev);
 }
 
 static void zpci_unmap_resources(struct pci_dev *pdev)
@@ -451,7 +446,7 @@
 	if (zpci_use_mio(zdev))
 		return;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		len = pci_resource_len(pdev, i);
 		if (!len)
 			continue;
@@ -504,17 +499,17 @@
 	return r;
 }
 
-static int zpci_setup_bus_resources(struct zpci_dev *zdev,
-				    struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev,
+			     struct list_head *resources)
 {
 	unsigned long addr, size, flags;
 	struct resource *res;
 	int i, entry;
 
 	snprintf(zdev->res_name, sizeof(zdev->res_name),
-		 "PCI Bus %04x:%02x", zdev->domain, ZPCI_BUS_NR);
+		 "PCI Bus %04x:%02x", zdev->uid, ZPCI_BUS_NR);
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (!zdev->bars[i].size)
 			continue;
 		entry = zpci_alloc_iomap(zdev);
@@ -551,7 +546,7 @@
 {
 	int i;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (!zdev->bars[i].size || !zdev->bars[i].res)
 			continue;
 
@@ -563,9 +558,12 @@
 
 int pcibios_add_device(struct pci_dev *pdev)
 {
+	struct zpci_dev *zdev = to_zpci(pdev);
 	struct resource *res;
 	int i;
 
+	/* The pdev has a reference to the zdev via its bus */
+	zpci_zdev_get(zdev);
 	if (pdev->is_physfn)
 		pdev->no_vf_scan = 1;
 
@@ -573,7 +571,7 @@
 	pdev->dev.dma_ops = &s390_pci_dma_ops;
 	zpci_map_resources(pdev);
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		res = &pdev->resource[i];
 		if (res->parent || !res->flags)
 			continue;
@@ -585,7 +583,10 @@
 
 void pcibios_release_device(struct pci_dev *pdev)
 {
+	struct zpci_dev *zdev = to_zpci(pdev);
+
 	zpci_unmap_resources(pdev);
+	zpci_zdev_put(zdev);
 }
 
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
@@ -606,136 +607,62 @@
 	zpci_debug_exit_device(zdev);
 }
 
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-static int zpci_restore(struct device *dev)
+static int __zpci_register_domain(int domain)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = to_zpci(pdev);
-	int ret = 0;
-
-	if (zdev->state != ZPCI_FN_STATE_ONLINE)
-		goto out;
-
-	ret = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
-	if (ret)
-		goto out;
-
-	zpci_map_resources(pdev);
-	zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-			   (u64) zdev->dma_table);
-
-out:
-	return ret;
+	spin_lock(&zpci_domain_lock);
+	if (test_bit(domain, zpci_domain)) {
+		spin_unlock(&zpci_domain_lock);
+		pr_err("Domain %04x is already assigned\n", domain);
+		return -EEXIST;
+	}
+	set_bit(domain, zpci_domain);
+	spin_unlock(&zpci_domain_lock);
+	return domain;
 }
 
-static int zpci_freeze(struct device *dev)
+static int __zpci_alloc_domain(void)
 {
-	struct pci_dev *pdev = to_pci_dev(dev);
-	struct zpci_dev *zdev = to_zpci(pdev);
+	int domain;
 
-	if (zdev->state != ZPCI_FN_STATE_ONLINE)
-		return 0;
-
-	zpci_unregister_ioat(zdev, 0);
-	zpci_unmap_resources(pdev);
-	return clp_disable_fh(zdev);
+	spin_lock(&zpci_domain_lock);
+	/*
+	 * We can always auto allocate domains below ZPCI_NR_DEVICES.
+	 * There is either a free domain or we have reached the maximum in
+	 * which case we would have bailed earlier.
+	 */
+	domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
+	set_bit(domain, zpci_domain);
+	spin_unlock(&zpci_domain_lock);
+	return domain;
 }
 
-struct dev_pm_ops pcibios_pm_ops = {
-	.thaw_noirq = zpci_restore,
-	.freeze_noirq = zpci_freeze,
-	.restore_noirq = zpci_restore,
-	.poweroff_noirq = zpci_freeze,
-};
-#endif /* CONFIG_HIBERNATE_CALLBACKS */
-
-static int zpci_alloc_domain(struct zpci_dev *zdev)
+int zpci_alloc_domain(int domain)
 {
 	if (zpci_unique_uid) {
-		zdev->domain = (u16) zdev->uid;
-		if (zdev->domain >= ZPCI_NR_DEVICES)
-			return 0;
-
-		spin_lock(&zpci_domain_lock);
-		if (test_bit(zdev->domain, zpci_domain)) {
-			spin_unlock(&zpci_domain_lock);
-			return -EEXIST;
-		}
-		set_bit(zdev->domain, zpci_domain);
-		spin_unlock(&zpci_domain_lock);
-		return 0;
+		if (domain)
+			return __zpci_register_domain(domain);
+		pr_warn("UID checking was active but no UID is provided: switching to automatic domain allocation\n");
+		update_uid_checking(false);
 	}
-
-	spin_lock(&zpci_domain_lock);
-	zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
-	if (zdev->domain == ZPCI_NR_DEVICES) {
-		spin_unlock(&zpci_domain_lock);
-		return -ENOSPC;
-	}
-	set_bit(zdev->domain, zpci_domain);
-	spin_unlock(&zpci_domain_lock);
-	return 0;
+	return __zpci_alloc_domain();
 }
 
-static void zpci_free_domain(struct zpci_dev *zdev)
+void zpci_free_domain(int domain)
 {
-	if (zdev->domain >= ZPCI_NR_DEVICES)
-		return;
-
 	spin_lock(&zpci_domain_lock);
-	clear_bit(zdev->domain, zpci_domain);
+	clear_bit(domain, zpci_domain);
 	spin_unlock(&zpci_domain_lock);
 }
 
-void pcibios_remove_bus(struct pci_bus *bus)
-{
-	struct zpci_dev *zdev = get_zdev_by_bus(bus);
-
-	zpci_exit_slot(zdev);
-	zpci_cleanup_bus_resources(zdev);
-	zpci_destroy_iommu(zdev);
-	zpci_free_domain(zdev);
-
-	spin_lock(&zpci_list_lock);
-	list_del(&zdev->entry);
-	spin_unlock(&zpci_list_lock);
-
-	zpci_dbg(3, "rem fid:%x\n", zdev->fid);
-	kfree(zdev);
-}
-
-static int zpci_scan_bus(struct zpci_dev *zdev)
-{
-	LIST_HEAD(resources);
-	int ret;
-
-	ret = zpci_setup_bus_resources(zdev, &resources);
-	if (ret)
-		goto error;
-
-	zdev->bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, &pci_root_ops,
-				      zdev, &resources);
-	if (!zdev->bus) {
-		ret = -EIO;
-		goto error;
-	}
-	zdev->bus->max_bus_speed = zdev->max_bus_speed;
-	pci_bus_add_devices(zdev->bus);
-	return 0;
-
-error:
-	zpci_cleanup_bus_resources(zdev);
-	pci_free_resource_list(&resources);
-	return ret;
-}
 
 int zpci_enable_device(struct zpci_dev *zdev)
 {
 	int rc;
 
-	rc = clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES);
-	if (rc)
+	if (clp_enable_fh(zdev, ZPCI_NR_DMA_SPACES)) {
+		rc = -EIO;
 		goto out;
+	}
 
 	rc = zpci_dma_init_device(zdev);
 	if (rc)
@@ -754,58 +681,175 @@
 int zpci_disable_device(struct zpci_dev *zdev)
 {
 	zpci_dma_exit_device(zdev);
-	return clp_disable_fh(zdev);
+	/*
+	 * The zPCI function may already be disabled by the platform, this is
+	 * detected in clp_disable_fh() which becomes a no-op.
+	 */
+	return clp_disable_fh(zdev) ? -EIO : 0;
 }
 EXPORT_SYMBOL_GPL(zpci_disable_device);
 
-int zpci_create_device(struct zpci_dev *zdev)
+/* zpci_remove_device - Removes the given zdev from the PCI core
+ * @zdev: the zdev to be removed from the PCI core
+ * @set_error: if true the device's error state is set to permanent failure
+ *
+ * Sets a zPCI device to a configured but offline state; the zPCI
+ * device is still accessible through its hotplug slot and the zPCI
+ * API but is removed from the common code PCI bus, making it
+ * no longer available to drivers.
+ */
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error)
 {
+	struct zpci_bus *zbus = zdev->zbus;
+	struct pci_dev *pdev;
+
+	if (!zdev->zbus->bus)
+		return;
+
+	pdev = pci_get_slot(zbus->bus, zdev->devfn);
+	if (pdev) {
+		if (set_error)
+			pdev->error_state = pci_channel_io_perm_failure;
+		if (pdev->is_virtfn) {
+			zpci_iov_remove_virtfn(pdev, zdev->vfn);
+			/* balance pci_get_slot */
+			pci_dev_put(pdev);
+			return;
+		}
+		pci_stop_and_remove_bus_device_locked(pdev);
+		/* balance pci_get_slot */
+		pci_dev_put(pdev);
+	}
+}
+
+/**
+ * zpci_create_device() - Create a new zpci_dev and add it to the zbus
+ * @fid: Function ID of the device to be created
+ * @fh: Current Function Handle of the device to be created
+ * @state: Initial state after creation either Standby or Configured
+ *
+ * Creates a new zpci device and adds it to its, possibly newly created, zbus
+ * as well as zpci_list.
+ *
+ * Returns: 0 on success, an error value otherwise
+ */
+int zpci_create_device(u32 fid, u32 fh, enum zpci_state state)
+{
+	struct zpci_dev *zdev;
 	int rc;
 
-	rc = zpci_alloc_domain(zdev);
+	zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state);
+	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
+	if (!zdev)
+		return -ENOMEM;
+
+	/* FID and Function Handle are the static/dynamic identifiers */
+	zdev->fid = fid;
+	zdev->fh = fh;
+
+	/* Query function properties and update zdev */
+	rc = clp_query_pci_fn(zdev);
 	if (rc)
-		goto out;
+		goto error;
+	zdev->state =  state;
+
+	kref_init(&zdev->kref);
+	mutex_init(&zdev->lock);
 
 	rc = zpci_init_iommu(zdev);
 	if (rc)
-		goto out_free;
+		goto error;
 
-	mutex_init(&zdev->lock);
 	if (zdev->state == ZPCI_FN_STATE_CONFIGURED) {
 		rc = zpci_enable_device(zdev);
 		if (rc)
-			goto out_destroy_iommu;
+			goto error_destroy_iommu;
 	}
-	rc = zpci_scan_bus(zdev);
+
+	rc = zpci_bus_device_register(zdev, &pci_root_ops);
 	if (rc)
-		goto out_disable;
+		goto error_disable;
 
 	spin_lock(&zpci_list_lock);
 	list_add_tail(&zdev->entry, &zpci_list);
 	spin_unlock(&zpci_list_lock);
 
-	zpci_init_slot(zdev);
-
 	return 0;
 
-out_disable:
+error_disable:
 	if (zdev->state == ZPCI_FN_STATE_ONLINE)
 		zpci_disable_device(zdev);
-out_destroy_iommu:
+error_destroy_iommu:
 	zpci_destroy_iommu(zdev);
-out_free:
-	zpci_free_domain(zdev);
-out:
+error:
+	zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
+	kfree(zdev);
 	return rc;
 }
 
-void zpci_remove_device(struct zpci_dev *zdev)
+bool zpci_is_device_configured(struct zpci_dev *zdev)
 {
-	if (!zdev->bus)
-		return;
+	enum zpci_state state = zdev->state;
 
-	pci_stop_root_bus(zdev->bus);
-	pci_remove_root_bus(zdev->bus);
+	return state != ZPCI_FN_STATE_RESERVED &&
+		state != ZPCI_FN_STATE_STANDBY;
+}
+
+/**
+ * zpci_device_reserved() - Mark device as resverved
+ * @zdev: the zpci_dev that was reserved
+ *
+ * Handle the case that a given zPCI function was reserved by another system.
+ * After a call to this function the zpci_dev can not be found via
+ * get_zdev_by_fid() anymore but may still be accessible via existing
+ * references though it will not be functional anymore.
+ */
+void zpci_device_reserved(struct zpci_dev *zdev)
+{
+	if (zdev->has_hp_slot)
+		zpci_exit_slot(zdev);
+	/*
+	 * Remove device from zpci_list as it is going away. This also
+	 * makes sure we ignore subsequent zPCI events for this device.
+	 */
+	spin_lock(&zpci_list_lock);
+	list_del(&zdev->entry);
+	spin_unlock(&zpci_list_lock);
+	zdev->state = ZPCI_FN_STATE_RESERVED;
+	zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+	zpci_zdev_put(zdev);
+}
+
+void zpci_release_device(struct kref *kref)
+{
+	struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
+
+	if (zdev->zbus->bus)
+		zpci_remove_device(zdev, false);
+
+	switch (zdev->state) {
+	case ZPCI_FN_STATE_ONLINE:
+	case ZPCI_FN_STATE_CONFIGURED:
+		zpci_disable_device(zdev);
+		fallthrough;
+	case ZPCI_FN_STATE_STANDBY:
+		if (zdev->has_hp_slot)
+			zpci_exit_slot(zdev);
+		spin_lock(&zpci_list_lock);
+		list_del(&zdev->entry);
+		spin_unlock(&zpci_list_lock);
+		zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
+		fallthrough;
+	case ZPCI_FN_STATE_RESERVED:
+		zpci_cleanup_bus_resources(zdev);
+		zpci_bus_device_unregister(zdev);
+		zpci_destroy_iommu(zdev);
+		fallthrough;
+	default:
+		break;
+	}
+	zpci_dbg(3, "rem fid:%x\n", zdev->fid);
+	kfree(zdev);
 }
 
 int zpci_report_error(struct pci_dev *pdev,
@@ -837,6 +881,9 @@
 	if (!zpci_iomap_bitmap)
 		goto error_iomap_bitmap;
 
+	if (static_branch_likely(&have_mio))
+		clp_setup_writeback_mio();
+
 	return 0;
 error_iomap_bitmap:
 	kfree(zpci_iomap_start);
@@ -871,6 +918,10 @@
 		s390_pci_force_floating = 1;
 		return NULL;
 	}
+	if (!strcmp(str, "norid")) {
+		s390_pci_no_rid = 1;
+		return NULL;
+	}
 	return str;
 }
 
@@ -929,9 +980,3 @@
 	return rc;
 }
 subsys_initcall_sync(pci_base_init);
-
-void zpci_rescan(void)
-{
-	if (zpci_is_enabled())
-		clp_rescan_pci_devices_simple(NULL);
-}
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
new file mode 100644
index 0000000..755b46f
--- /dev/null
+++ b/arch/s390/pci/pci_bus.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/seq_file.h>
+#include <linux/jump_label.h>
+#include <linux/pci.h>
+#include <linux/printk.h>
+
+#include <asm/pci_clp.h>
+#include <asm/pci_dma.h>
+
+#include "pci_bus.h"
+#include "pci_iov.h"
+
+static LIST_HEAD(zbus_list);
+static DEFINE_SPINLOCK(zbus_list_lock);
+static int zpci_nb_devices;
+
+/* zpci_bus_scan
+ * @zbus: the zbus holding the zdevices
+ * @ops: the pci operations
+ *
+ * The domain number must be set before pci_scan_root_bus is called.
+ * This function can be called once the domain is known, hence
+ * when the function_0 is dicovered.
+ */
+static int zpci_bus_scan(struct zpci_bus *zbus, int domain, struct pci_ops *ops)
+{
+	struct pci_bus *bus;
+	int rc;
+
+	rc = zpci_alloc_domain(domain);
+	if (rc < 0)
+		return rc;
+	zbus->domain_nr = rc;
+
+	bus = pci_scan_root_bus(NULL, ZPCI_BUS_NR, ops, zbus, &zbus->resources);
+	if (!bus) {
+		zpci_free_domain(zbus->domain_nr);
+		return -EFAULT;
+	}
+
+	zbus->bus = bus;
+	pci_bus_add_devices(bus);
+	return 0;
+}
+
+static void zpci_bus_release(struct kref *kref)
+{
+	struct zpci_bus *zbus = container_of(kref, struct zpci_bus, kref);
+
+	if (zbus->bus) {
+		pci_lock_rescan_remove();
+		pci_stop_root_bus(zbus->bus);
+
+		zpci_free_domain(zbus->domain_nr);
+		pci_free_resource_list(&zbus->resources);
+
+		pci_remove_root_bus(zbus->bus);
+		pci_unlock_rescan_remove();
+	}
+
+	spin_lock(&zbus_list_lock);
+	list_del(&zbus->bus_next);
+	spin_unlock(&zbus_list_lock);
+	kfree(zbus);
+}
+
+static void zpci_bus_put(struct zpci_bus *zbus)
+{
+	kref_put(&zbus->kref, zpci_bus_release);
+}
+
+static struct zpci_bus *zpci_bus_get(int pchid)
+{
+	struct zpci_bus *zbus;
+
+	spin_lock(&zbus_list_lock);
+	list_for_each_entry(zbus, &zbus_list, bus_next) {
+		if (pchid == zbus->pchid) {
+			kref_get(&zbus->kref);
+			goto out_unlock;
+		}
+	}
+	zbus = NULL;
+out_unlock:
+	spin_unlock(&zbus_list_lock);
+	return zbus;
+}
+
+static struct zpci_bus *zpci_bus_alloc(int pchid)
+{
+	struct zpci_bus *zbus;
+
+	zbus = kzalloc(sizeof(*zbus), GFP_KERNEL);
+	if (!zbus)
+		return NULL;
+
+	zbus->pchid = pchid;
+	INIT_LIST_HEAD(&zbus->bus_next);
+	spin_lock(&zbus_list_lock);
+	list_add_tail(&zbus->bus_next, &zbus_list);
+	spin_unlock(&zbus_list_lock);
+
+	kref_init(&zbus->kref);
+	INIT_LIST_HEAD(&zbus->resources);
+
+	zbus->bus_resource.start = 0;
+	zbus->bus_resource.end = ZPCI_BUS_NR;
+	zbus->bus_resource.flags = IORESOURCE_BUS;
+	pci_add_resource(&zbus->resources, &zbus->bus_resource);
+
+	return zbus;
+}
+
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	/*
+	 * With pdev->no_vf_scan the common PCI probing code does not
+	 * perform PF/VF linking.
+	 */
+	if (zdev->vfn) {
+		zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
+		pdev->no_command_memory = 1;
+	}
+}
+
+static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
+{
+	struct pci_bus *bus;
+	struct resource_entry *window, *n;
+	struct resource *res;
+	struct pci_dev *pdev;
+	int rc;
+
+	bus = zbus->bus;
+	if (!bus)
+		return -EINVAL;
+
+	pdev = pci_get_slot(bus, zdev->devfn);
+	if (pdev) {
+		/* Device is already known. */
+		pci_dev_put(pdev);
+		return 0;
+	}
+
+	rc = zpci_init_slot(zdev);
+	if (rc)
+		return rc;
+	zdev->has_hp_slot = 1;
+
+	resource_list_for_each_entry_safe(window, n, &zbus->resources) {
+		res = window->res;
+		pci_bus_add_resource(bus, res, 0);
+	}
+
+	pdev = pci_scan_single_device(bus, zdev->devfn);
+	if (pdev)
+		pci_bus_add_device(pdev);
+
+	return 0;
+}
+
+static void zpci_bus_add_devices(struct zpci_bus *zbus)
+{
+	int i;
+
+	for (i = 1; i < ZPCI_FUNCTIONS_PER_BUS; i++)
+		if (zbus->function[i])
+			zpci_bus_add_device(zbus, zbus->function[i]);
+
+	pci_lock_rescan_remove();
+	pci_bus_add_devices(zbus->bus);
+	pci_unlock_rescan_remove();
+}
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops)
+{
+	struct zpci_bus *zbus = NULL;
+	int rc = -EBADF;
+
+	if (zpci_nb_devices == ZPCI_NR_DEVICES) {
+		pr_warn("Adding PCI function %08x failed because the configured limit of %d is reached\n",
+			zdev->fid, ZPCI_NR_DEVICES);
+		return -ENOSPC;
+	}
+	zpci_nb_devices++;
+
+	if (zdev->devfn >= ZPCI_FUNCTIONS_PER_BUS)
+		return -EINVAL;
+
+	if (!s390_pci_no_rid && zdev->rid_available)
+		zbus = zpci_bus_get(zdev->pchid);
+
+	if (!zbus) {
+		zbus = zpci_bus_alloc(zdev->pchid);
+		if (!zbus)
+			return -ENOMEM;
+	}
+
+	zdev->zbus = zbus;
+	if (zbus->function[zdev->devfn]) {
+		pr_err("devfn %04x is already assigned\n", zdev->devfn);
+		goto error; /* rc already set */
+	}
+	zbus->function[zdev->devfn] = zdev;
+
+	zpci_setup_bus_resources(zdev, &zbus->resources);
+
+	if (zbus->bus) {
+		if (!zbus->multifunction) {
+			WARN_ONCE(1, "zbus is not multifunction\n");
+			goto error_bus;
+		}
+		if (!zdev->rid_available) {
+			WARN_ONCE(1, "rid_available not set for multifunction\n");
+			goto error_bus;
+		}
+		rc = zpci_bus_add_device(zbus, zdev);
+		if (rc)
+			goto error_bus;
+	} else if (zdev->devfn == 0) {
+		if (zbus->multifunction && !zdev->rid_available) {
+			WARN_ONCE(1, "rid_available not set on function 0 for multifunction\n");
+			goto error_bus;
+		}
+		rc = zpci_bus_scan(zbus, (u16)zdev->uid, ops);
+		if (rc)
+			goto error_bus;
+		zpci_bus_add_devices(zbus);
+		rc = zpci_init_slot(zdev);
+		if (rc)
+			goto error_bus;
+		zdev->has_hp_slot = 1;
+		zbus->multifunction = zdev->rid_available;
+		zbus->max_bus_speed = zdev->max_bus_speed;
+	} else {
+		zbus->multifunction = 1;
+	}
+
+	return 0;
+
+error_bus:
+	zpci_nb_devices--;
+	zbus->function[zdev->devfn] = NULL;
+error:
+	pr_err("Adding PCI function %08x failed\n", zdev->fid);
+	zpci_bus_put(zbus);
+	return rc;
+}
+
+void zpci_bus_device_unregister(struct zpci_dev *zdev)
+{
+	struct zpci_bus *zbus = zdev->zbus;
+
+	zpci_nb_devices--;
+	zbus->function[zdev->devfn] = NULL;
+	zpci_bus_put(zbus);
+}
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
new file mode 100644
index 0000000..55c9488
--- /dev/null
+++ b/arch/s390/pci/pci_bus.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+
+int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops);
+void zpci_bus_device_unregister(struct zpci_dev *zdev);
+
+void zpci_release_device(struct kref *kref);
+static inline void zpci_zdev_put(struct zpci_dev *zdev)
+{
+	kref_put(&zdev->kref, zpci_release_device);
+}
+
+static inline void zpci_zdev_get(struct zpci_dev *zdev)
+{
+	kref_get(&zdev->kref);
+}
+
+int zpci_alloc_domain(int domain);
+void zpci_free_domain(int domain);
+int zpci_setup_bus_resources(struct zpci_dev *zdev,
+			     struct list_head *resources);
+
+static inline struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus,
+					       unsigned int devfn)
+{
+	struct zpci_bus *zbus = bus->sysdata;
+
+	return (devfn >= ZPCI_FUNCTIONS_PER_BUS) ? NULL : zbus->function[devfn];
+}
+
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 20e093f..0a0e8b8 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -24,7 +24,7 @@
 
 bool zpci_unique_uid;
 
-static void update_uid_checking(bool new)
+void update_uid_checking(bool new)
 {
 	if (zpci_unique_uid != new)
 		zpci_dbg(1, "uid checking:%d\n", new);
@@ -102,6 +102,7 @@
 	zdev->msi_addr = response->msia;
 	zdev->max_msi = response->noi;
 	zdev->fmb_update = response->mui;
+	zdev->version = response->version;
 
 	switch (response->version) {
 	case 1:
@@ -145,7 +146,7 @@
 {
 	int i;
 
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		zdev->bars[i].val = le32_to_cpu(response->bar[i]);
 		zdev->bars[i].size = response->bar_size[i];
 	}
@@ -155,17 +156,23 @@
 	zdev->pfgid = response->pfgid;
 	zdev->pft = response->pft;
 	zdev->vfn = response->vfn;
+	zdev->port = response->port;
 	zdev->uid = response->uid;
 	zdev->fmb_length = sizeof(u32) * response->fmb_len;
+	zdev->rid_available = response->rid_avail;
+	zdev->is_physfn = response->is_physfn;
+	if (!s390_pci_no_rid && zdev->rid_available)
+		zdev->devfn = response->rid & ZPCI_RID_MASK_DEVFN;
 
 	memcpy(zdev->pfip, response->pfip, sizeof(zdev->pfip));
 	if (response->util_str_avail) {
 		memcpy(zdev->util_str, response->util_str,
 		       sizeof(zdev->util_str));
+		zdev->util_str_avail = 1;
 	}
 	zdev->mio_capable = response->mio_addr_avail;
-	for (i = 0; i < PCI_BAR_COUNT; i++) {
-		if (!(response->mio.valid & (1 << (PCI_BAR_COUNT - i - 1))))
+	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+		if (!(response->mio.valid & (1 << (PCI_STD_NUM_BARS - i - 1))))
 			continue;
 
 		zdev->bars[i].mio_wb = (void __iomem *) response->mio.addr[i].wb;
@@ -174,7 +181,7 @@
 	return 0;
 }
 
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int clp_query_pci_fn(struct zpci_dev *zdev)
 {
 	struct clp_req_rsp_query_pci *rrb;
 	int rc;
@@ -187,7 +194,7 @@
 	rrb->request.hdr.len = sizeof(rrb->request);
 	rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
 	rrb->response.hdr.len = sizeof(rrb->response);
-	rrb->request.fh = fh;
+	rrb->request.fh = zdev->fh;
 
 	rc = clp_req(rrb, CLP_LPS_PCI);
 	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
@@ -205,49 +212,20 @@
 	return rc;
 }
 
-int clp_add_pci_device(u32 fid, u32 fh, int configured)
-{
-	struct zpci_dev *zdev;
-	int rc = -ENOMEM;
-
-	zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured);
-	zdev = kzalloc(sizeof(*zdev), GFP_KERNEL);
-	if (!zdev)
-		goto error;
-
-	zdev->fh = fh;
-	zdev->fid = fid;
-
-	/* Query function properties and update zdev */
-	rc = clp_query_pci_fn(zdev, fh);
-	if (rc)
-		goto error;
-
-	if (configured)
-		zdev->state = ZPCI_FN_STATE_CONFIGURED;
-	else
-		zdev->state = ZPCI_FN_STATE_STANDBY;
-
-	rc = zpci_create_device(zdev);
-	if (rc)
-		goto error;
-	return 0;
-
-error:
-	zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc);
-	kfree(zdev);
-	return rc;
-}
-
-/*
- * Enable/Disable a given PCI function and update its function handle if
- * necessary
+static int clp_refresh_fh(u32 fid);
+/**
+ * clp_set_pci_fn() - Execute a command on a PCI function
+ * @zdev: Function that will be affected
+ * @nr_dma_as: DMA address space number
+ * @command: The command code to execute
+ *
+ * Returns: 0 on success, < 0 for Linux errors (e.g. -ENOMEM), and
+ * > 0 for non-success platform responses
  */
 static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command)
 {
 	struct clp_req_rsp_set_pci *rrb;
 	int rc, retries = 100;
-	u32 fid = zdev->fid;
 
 	rrb = clp_alloc_block(GFP_KERNEL);
 	if (!rrb)
@@ -271,17 +249,50 @@
 		}
 	} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
 
-	if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
-		zpci_err("Set PCI FN:\n");
-		zpci_err_clp(rrb->response.hdr.rsp, rc);
-	}
-
 	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
 		zdev->fh = rrb->response.fh;
-	} else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY &&
-			rrb->response.fh == 0) {
+	} else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY) {
 		/* Function is already in desired state - update handle */
-		rc = clp_rescan_pci_devices_simple(&fid);
+		rc = clp_refresh_fh(zdev->fid);
+	} else {
+		zpci_err("Set PCI FN:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		if (!rc)
+			rc = rrb->response.hdr.rsp;
+	}
+	clp_free_block(rrb);
+	return rc;
+}
+
+int clp_setup_writeback_mio(void)
+{
+	struct clp_req_rsp_slpc_pci *rrb;
+	u8  wb_bit_pos;
+	int rc;
+
+	rrb = clp_alloc_block(GFP_KERNEL);
+	if (!rrb)
+		return -ENOMEM;
+
+	memset(rrb, 0, sizeof(*rrb));
+	rrb->request.hdr.len = sizeof(rrb->request);
+	rrb->request.hdr.cmd = CLP_SLPC;
+	rrb->response.hdr.len = sizeof(rrb->response);
+
+	rc = clp_req(rrb, CLP_LPS_PCI);
+	if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+		if (rrb->response.vwb) {
+			wb_bit_pos = rrb->response.mio_wb;
+			set_bit_inv(wb_bit_pos, &mio_wb_bit_mask);
+			zpci_dbg(3, "wb bit: %d\n", wb_bit_pos);
+		} else {
+			zpci_dbg(3, "wb bit: n.a.\n");
+		}
+
+	} else {
+		zpci_err("SLPC PCI:\n");
+		zpci_err_clp(rrb->response.hdr.rsp, rc);
+		rc = -EIO;
 	}
 	clp_free_block(rrb);
 	return rc;
@@ -293,17 +304,13 @@
 
 	rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
 	zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
-	if (rc)
-		goto out;
-
-	if (zpci_use_mio(zdev)) {
+	if (!rc && zpci_use_mio(zdev)) {
 		rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO);
 		zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n",
 				zdev->fid, zdev->fh, rc);
 		if (rc)
 			clp_disable_fh(zdev);
 	}
-out:
 	return rc;
 }
 
@@ -366,25 +373,7 @@
 
 	zdev = get_zdev_by_fid(entry->fid);
 	if (!zdev)
-		clp_add_pci_device(entry->fid, entry->fh, entry->config_state);
-}
-
-static void __clp_update(struct clp_fh_list_entry *entry, void *data)
-{
-	struct zpci_dev *zdev;
-	u32 *fid = data;
-
-	if (!entry->vendor_id)
-		return;
-
-	if (fid && *fid != entry->fid)
-		return;
-
-	zdev = get_zdev_by_fid(entry->fid);
-	if (!zdev)
-		return;
-
-	zdev->fh = entry->fh;
+		zpci_create_device(entry->fid, entry->fh, entry->config_state);
 }
 
 int clp_scan_pci_devices(void)
@@ -402,27 +391,25 @@
 	return rc;
 }
 
-int clp_rescan_pci_devices(void)
+static void __clp_refresh_fh(struct clp_fh_list_entry *entry, void *data)
 {
-	struct clp_req_rsp_list_pci *rrb;
-	int rc;
+	struct zpci_dev *zdev;
+	u32 fid = *((u32 *)data);
 
-	zpci_remove_reserved_devices();
+	if (!entry->vendor_id || fid != entry->fid)
+		return;
 
-	rrb = clp_alloc_block(GFP_KERNEL);
-	if (!rrb)
-		return -ENOMEM;
+	zdev = get_zdev_by_fid(fid);
+	if (!zdev)
+		return;
 
-	rc = clp_list_pci(rrb, NULL, __clp_add);
-
-	clp_free_block(rrb);
-	return rc;
+	zdev->fh = entry->fh;
 }
 
-/* Rescan PCI functions and refresh function handles. If fid is non-NULL only
- * refresh the handle of the function matching @fid
+/*
+ * Refresh the function handle of the function matching @fid
  */
-int clp_rescan_pci_devices_simple(u32 *fid)
+static int clp_refresh_fh(u32 fid)
 {
 	struct clp_req_rsp_list_pci *rrb;
 	int rc;
@@ -431,7 +418,7 @@
 	if (!rrb)
 		return -ENOMEM;
 
-	rc = clp_list_pci(rrb, fid, __clp_update);
+	rc = clp_list_pci(rrb, &fid, __clp_refresh_fh);
 
 	clp_free_block(rrb);
 	return rc;
@@ -490,7 +477,7 @@
 	}
 }
 
-static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb)
+static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc_pci *lpcb)
 {
 	unsigned long limit = PAGE_SIZE - sizeof(lpcb->request);
 
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 64b1399..ebc9a49 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/iommu-helper.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
 #include <linux/vmalloc.h>
 #include <linux/pci.h>
 #include <asm/pci_dma.h>
@@ -261,13 +261,11 @@
 				       unsigned long start, int size)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long boundary_size;
 
-	boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-			      PAGE_SIZE) >> PAGE_SHIFT;
 	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
 				start, size, zdev->start_dma >> PAGE_SHIFT,
-				boundary_size, 0);
+				dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
+				0);
 }
 
 static dma_addr_t dma_alloc_address(struct device *dev, int size)
@@ -670,6 +668,8 @@
 	.unmap_page	= s390_dma_unmap_pages,
 	.mmap		= dma_common_mmap,
 	.get_sgtable	= dma_common_get_sgtable,
+	.alloc_pages	= dma_common_alloc_pages,
+	.free_pages	= dma_common_free_pages,
 	/* dma_supported is unconditionally true without a callback */
 };
 EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 8d6ee4a..b7cfde7 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -14,6 +14,8 @@
 #include <asm/pci_debug.h>
 #include <asm/sclp.h>
 
+#include "pci_bus.h"
+
 /* Content Code Description for PCI Function Error */
 struct zpci_ccdf_err {
 	u32 reserved1;
@@ -53,7 +55,7 @@
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
 	if (zdev)
-		pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
+		pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
 
 	pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
 	       pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -74,46 +76,52 @@
 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 {
 	struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
-	struct pci_dev *pdev = NULL;
 	enum zpci_state state;
+	struct pci_dev *pdev;
 	int ret;
 
-	if (zdev)
-		pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN);
-
-	pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n",
-		pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
 	zpci_err("avail CCDF:\n");
 	zpci_err_hex(ccdf, sizeof(*ccdf));
 
 	switch (ccdf->pec) {
 	case 0x0301: /* Reserved|Standby -> Configured */
 		if (!zdev) {
-			ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
-			if (ret)
-				break;
-			zdev = get_zdev_by_fid(ccdf->fid);
-		}
-		if (!zdev || zdev->state != ZPCI_FN_STATE_STANDBY)
+			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED);
 			break;
-		zdev->state = ZPCI_FN_STATE_CONFIGURED;
+		}
+		/* the configuration request may be stale */
+		if (zdev->state != ZPCI_FN_STATE_STANDBY)
+			break;
 		zdev->fh = ccdf->fh;
+		zdev->state = ZPCI_FN_STATE_CONFIGURED;
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			break;
+
+		/* the PCI function will be scanned once function 0 appears */
+		if (!zdev->zbus->bus)
+			break;
+
+		pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);
+		if (!pdev)
+			break;
+
+		pci_bus_add_device(pdev);
 		pci_lock_rescan_remove();
-		pci_rescan_bus(zdev->bus);
+		pci_bus_add_devices(zdev->zbus->bus);
 		pci_unlock_rescan_remove();
 		break;
 	case 0x0302: /* Reserved -> Standby */
-		if (!zdev)
-			clp_add_pci_device(ccdf->fid, ccdf->fh, 0);
+		if (!zdev) {
+			zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
+			break;
+		}
+		zdev->fh = ccdf->fh;
 		break;
 	case 0x0303: /* Deconfiguration requested */
 		if (!zdev)
 			break;
-		if (pdev)
-			pci_stop_and_remove_bus_device_locked(pdev);
+		zpci_remove_device(zdev, false);
 
 		ret = zpci_disable_device(zdev);
 		if (ret)
@@ -128,33 +136,31 @@
 	case 0x0304: /* Configured -> Standby|Reserved */
 		if (!zdev)
 			break;
-		if (pdev) {
-			/* Give the driver a hint that the function is
-			 * already unusable. */
-			pdev->error_state = pci_channel_io_perm_failure;
-			pci_stop_and_remove_bus_device_locked(pdev);
-		}
+		/* Give the driver a hint that the function is
+		 * already unusable.
+		 */
+		zpci_remove_device(zdev, true);
 
 		zdev->fh = ccdf->fh;
 		zpci_disable_device(zdev);
 		zdev->state = ZPCI_FN_STATE_STANDBY;
 		if (!clp_get_state(ccdf->fid, &state) &&
 		    state == ZPCI_FN_STATE_RESERVED) {
-			zpci_remove_device(zdev);
+			zpci_device_reserved(zdev);
 		}
 		break;
 	case 0x0306: /* 0x308 or 0x302 for multiple devices */
-		clp_rescan_pci_devices();
+		zpci_remove_reserved_devices();
+		clp_scan_pci_devices();
 		break;
 	case 0x0308: /* Standby -> Reserved */
 		if (!zdev)
 			break;
-		zpci_remove_device(zdev);
+		zpci_device_reserved(zdev);
 		break;
 	default:
 		break;
 	}
-	pci_dev_put(pdev);
 }
 
 void zpci_event_availability(void *data)
diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c
new file mode 100644
index 0000000..ead062b
--- /dev/null
+++ b/arch/s390/pci/pci_iov.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+#include "pci_iov.h"
+
+static struct resource iov_res = {
+	.name	= "PCI IOV res",
+	.start	= 0,
+	.end	= -1,
+	.flags	= IORESOURCE_MEM,
+};
+
+void zpci_iov_map_resources(struct pci_dev *pdev)
+{
+	resource_size_t len;
+	int i;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		int bar = i + PCI_IOV_RESOURCES;
+
+		len = pci_resource_len(pdev, bar);
+		if (!len)
+			continue;
+		pdev->resource[bar].parent = &iov_res;
+	}
+}
+
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn)
+{
+	pci_lock_rescan_remove();
+	/* Linux' vfid's start at 0 vfn at 1 */
+	pci_iov_remove_virtfn(pdev->physfn, vfn - 1);
+	pci_unlock_rescan_remove();
+}
+
+static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, int vfid)
+{
+	int rc;
+
+	rc = pci_iov_sysfs_link(pdev, virtfn, vfid);
+	if (rc)
+		return rc;
+
+	virtfn->is_virtfn = 1;
+	virtfn->multifunction = 0;
+	virtfn->physfn = pci_dev_get(pdev);
+
+	return 0;
+}
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+	int i, cand_devfn;
+	struct zpci_dev *zdev;
+	struct pci_dev *pdev;
+	int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/
+	int rc = 0;
+
+	if (!zbus->multifunction)
+		return 0;
+
+	/* If the parent PF for the given VF is also configured in the
+	 * instance, it must be on the same zbus.
+	 * We can then identify the parent PF by checking what
+	 * devfn the VF would have if it belonged to that PF using the PF's
+	 * stride and offset. Only if this candidate devfn matches the
+	 * actual devfn will we link both functions.
+	 */
+	for (i = 0; i < ZPCI_FUNCTIONS_PER_BUS; i++) {
+		zdev = zbus->function[i];
+		if (zdev && zdev->is_physfn) {
+			pdev = pci_get_slot(zbus->bus, zdev->devfn);
+			if (!pdev)
+				continue;
+			cand_devfn = pci_iov_virtfn_devfn(pdev, vfid);
+			if (cand_devfn == virtfn->devfn) {
+				rc = zpci_iov_link_virtfn(pdev, virtfn, vfid);
+				/* balance pci_get_slot() */
+				pci_dev_put(pdev);
+				break;
+			}
+			/* balance pci_get_slot() */
+			pci_dev_put(pdev);
+		}
+	}
+	return rc;
+}
diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h
new file mode 100644
index 0000000..b2c8280
--- /dev/null
+++ b/arch/s390/pci/pci_iov.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s):
+ *   Niklas Schnelle <schnelle@linux.ibm.com>
+ *
+ */
+
+#ifndef __S390_PCI_IOV_H
+#define __S390_PCI_IOV_H
+
+#ifdef CONFIG_PCI_IOV
+void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn);
+
+void zpci_iov_map_resources(struct pci_dev *pdev);
+
+int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn);
+
+#else /* CONFIG_PCI_IOV */
+static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {}
+
+static inline void zpci_iov_map_resources(struct pci_dev *pdev) {}
+
+static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn)
+{
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+#endif /* __S390_PCI_IOV_h */
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index 743f257..75217fb 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -103,9 +103,10 @@
 {
 	struct msi_desc *entry = irq_get_msi_desc(data->irq);
 	struct msi_msg msg = entry->msg;
+	int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest));
 
 	msg.address_lo &= 0xff0000ff;
-	msg.address_lo |= (cpumask_first(dest) << 8);
+	msg.address_lo |= (cpu_addr << 8);
 	pci_write_msi_msg(data->irq, &msg);
 
 	return IRQ_SET_MASK_OK;
@@ -238,6 +239,7 @@
 	unsigned long bit;
 	struct msi_desc *msi;
 	struct msi_msg msg;
+	int cpu_addr;
 	int rc, irq;
 
 	zdev->aisb = -1UL;
@@ -287,9 +289,15 @@
 					 handle_percpu_irq);
 		msg.data = hwirq - bit;
 		if (irq_delivery == DIRECTED) {
+			if (msi->affinity)
+				cpu = cpumask_first(&msi->affinity->mask);
+			else
+				cpu = 0;
+			cpu_addr = smp_cpu_get_cpu_address(cpu);
+
 			msg.address_lo = zdev->msi_addr & 0xff0000ff;
-			msg.address_lo |= msi->affinity ?
-				(cpumask_first(&msi->affinity->mask) << 8) : 0;
+			msg.address_lo |= (cpu_addr << 8);
+
 			for_each_possible_cpu(cpu) {
 				airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
 			}
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 020a2c5..37b1bbd 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -125,17 +125,17 @@
 	struct vm_area_struct *vma;
 	long ret;
 
-	down_read(&current->mm->mmap_sem);
+	mmap_read_lock(current->mm);
 	ret = -EINVAL;
 	vma = find_vma(current->mm, user_addr);
-	if (!vma)
+	if (!vma || user_addr < vma->vm_start)
 		goto out;
 	ret = -EACCES;
 	if (!(vma->vm_flags & access))
 		goto out;
 	ret = follow_pfn(vma, user_addr, pfn);
 out:
-	up_read(&current->mm->mmap_sem);
+	mmap_read_unlock(current->mm);
 	return ret;
 }
 
@@ -155,10 +155,12 @@
 		return -EINVAL;
 
 	/*
-	 * Only support read access to MIO capable devices on a MIO enabled
-	 * system. Otherwise we would have to check for every address if it is
-	 * a special ZPCI_ADDR and we would have to do a get_pfn() which we
-	 * don't need for MIO capable devices.
+	 * We only support write access to MIO capable devices if we are on
+	 * a MIO enabled system. Otherwise we would have to check for every
+	 * address if it is a special ZPCI_ADDR and would have to do
+	 * a get_pfn() which we don't need for MIO capable devices.  Currently
+	 * ISM devices are the only devices without MIO support and there is no
+	 * known need for accessing these from userspace.
 	 */
 	if (static_branch_likely(&have_mio)) {
 		ret = __memcpy_toio_inuser((void  __iomem *) mmio_addr,
@@ -282,10 +284,12 @@
 		return -EINVAL;
 
 	/*
-	 * Only support write access to MIO capable devices on a MIO enabled
-	 * system. Otherwise we would have to check for every address if it is
-	 * a special ZPCI_ADDR and we would have to do a get_pfn() which we
-	 * don't need for MIO capable devices.
+	 * We only support read access to MIO capable devices if we are on
+	 * a MIO enabled system. Otherwise we would have to check for every
+	 * address if it is a special ZPCI_ADDR and would have to do
+	 * a get_pfn() which we don't need for MIO capable devices.  Currently
+	 * ISM devices are the only devices without MIO support and there is no
+	 * known need for accessing these from userspace.
 	 */
 	if (static_branch_likely(&have_mio)) {
 		ret = __memcpy_fromio_inuser(
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 215f174..5c028be 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -33,6 +33,7 @@
 zpci_attr(pfgid, "0x%02x\n", pfgid);
 zpci_attr(vfn, "0x%04x\n", vfn);
 zpci_attr(pft, "0x%02x\n", pft);
+zpci_attr(port, "%d\n", port);
 zpci_attr(uid, "0x%x\n", uid);
 zpci_attr(segment0, "0x%02x\n", pfip[0]);
 zpci_attr(segment1, "0x%02x\n", pfip[1]);
@@ -88,7 +89,7 @@
 		ret = zpci_enable_device(zdev);
 		if (ret)
 			goto out;
-		pci_rescan_bus(zdev->bus);
+		pci_rescan_bus(zdev->zbus->bus);
 	}
 out:
 	pci_unlock_rescan_remove();
@@ -142,6 +143,7 @@
 	&dev_attr_pchid.attr,
 	&dev_attr_pfgid.attr,
 	&dev_attr_pft.attr,
+	&dev_attr_port.attr,
 	&dev_attr_vfn.attr,
 	&dev_attr_uid.attr,
 	&dev_attr_recover.attr,
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
index 04a0343..97ca527 100644
--- a/arch/s390/purgatory/.gitignore
+++ b/arch/s390/purgatory/.gitignore
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
 purgatory
+purgatory.chk
 purgatory.lds
 purgatory.ro
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 9de5606..21c4ebe 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@
 
 purgatory-y := head.o purgatory.o string.o sha256.o mem.o
 
-targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
 $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
@@ -24,19 +24,27 @@
 KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
 KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
 KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += -fno-stack-protector
 KBUILD_CFLAGS += $(CLANG_FLAGS)
 KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
 
-LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T
+# Since we link purgatory with -r unresolved symbols are not checked, so we
+# also link a purgatory.chk binary without -r to check for unresolved symbols.
+PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib
+LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T
+LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS)
 $(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
 		$(call if_changed,ld)
 
+$(obj)/purgatory.chk: $(obj)/purgatory FORCE
+		$(call if_changed,ld)
+
 OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
 OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
 OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
 OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
-$(obj)/purgatory.ro: $(obj)/purgatory FORCE
+$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
 		$(call if_changed,objcopy)
 
 $(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
deleted file mode 100644
index f4f4c2c..0000000
--- a/arch/s390/scripts/Makefile.chkbss
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-chkbss-target ?= built-in.a
-$(obj)/$(chkbss-target): chkbss
-
-chkbss-files := $(addsuffix .chkbss, $(chkbss))
-clean-files += $(chkbss-files)
-
-PHONY += chkbss
-chkbss: $(addprefix $(obj)/, $(chkbss-files))
-
-quiet_cmd_chkbss = CHKBSS  $<
-      cmd_chkbss = \
-	if ! $(OBJSIZE) --common $< | $(AWK) 'END { if ($$3) exit 1 }'; then \
-		echo "error: $< .bss section is not empty" >&2; exit 1; \
-	fi; \
-	touch $@;
-
-$(obj)/%.o.chkbss: $(obj)/%.o
-	$(call cmd,chkbss)
diff --git a/arch/s390/tools/.gitignore b/arch/s390/tools/.gitignore
index 71bd6f8..ea62f37 100644
--- a/arch/s390/tools/.gitignore
+++ b/arch/s390/tools/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
 gen_facilities
 gen_opcode_table
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
index b5e35e8..f9dd47f 100644
--- a/arch/s390/tools/Makefile
+++ b/arch/s390/tools/Makefile
@@ -10,8 +10,8 @@
 
 kapi:	$(kapi-hdrs-y)
 
-hostprogs-y		    += gen_facilities
-hostprogs-y		    += gen_opcode_table
+hostprogs		    += gen_facilities
+hostprogs		    += gen_opcode_table
 
 HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE)