v4.19.13 snapshot.
diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild
new file mode 100644
index 0000000..b4d0f57
--- /dev/null
+++ b/arch/sparc/Kbuild
@@ -0,0 +1,10 @@
+#
+# core part of the sparc kernel
+#
+
+obj-y += kernel/
+obj-y += mm/
+obj-y += math-emu/
+obj-y += net/
+obj-y += crypto/
+obj-$(CONFIG_SPARC64) += vdso/
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
new file mode 100644
index 0000000..e6f2a38
--- /dev/null
+++ b/arch/sparc/Kconfig
@@ -0,0 +1,567 @@
+config 64BIT
+	bool "64-bit kernel" if "$(ARCH)" = "sparc"
+	default "$(ARCH)" = "sparc64"
+	help
+	  SPARC is a family of RISC microprocessors designed and marketed by
+	  Sun Microsystems, incorporated.  They are very widely found in Sun
+	  workstations and clones.
+
+	  Say yes to build a 64-bit kernel - formerly known as sparc64
+	  Say no to build a 32-bit kernel - formerly known as sparc
+
+config SPARC
+	bool
+	default y
+	select ARCH_MIGHT_HAVE_PC_PARPORT if SPARC64 && PCI
+	select ARCH_MIGHT_HAVE_PC_SERIO
+	select OF
+	select OF_PROMTREE
+	select HAVE_IDE
+	select HAVE_OPROFILE
+	select HAVE_ARCH_KGDB if !SMP || SPARC64
+	select HAVE_ARCH_TRACEHOOK
+	select HAVE_EXIT_THREAD
+	select SYSCTL_EXCEPTION_TRACE
+	select RTC_CLASS
+	select RTC_DRV_M48T59
+	select RTC_SYSTOHC
+	select HAVE_ARCH_JUMP_LABEL if SPARC64
+	select GENERIC_IRQ_SHOW
+	select ARCH_WANT_IPC_PARSE_VERSION
+	select GENERIC_PCI_IOMAP
+	select HAVE_NMI_WATCHDOG if SPARC64
+	select HAVE_CBPF_JIT if SPARC32
+	select HAVE_EBPF_JIT if SPARC64
+	select HAVE_DEBUG_BUGVERBOSE
+	select GENERIC_SMP_IDLE_THREAD
+	select GENERIC_CLOCKEVENTS
+	select GENERIC_STRNCPY_FROM_USER
+	select GENERIC_STRNLEN_USER
+	select MODULES_USE_ELF_RELA
+	select ODD_RT_SIGACTION
+	select OLD_SIGSUSPEND
+	select ARCH_HAS_SG_CHAIN
+	select CPU_NO_EFFICIENT_FFS
+	select LOCKDEP_SMALL if LOCKDEP
+	select NEED_DMA_MAP_STATE
+	select NEED_SG_DMA_LENGTH
+	select HAVE_MEMBLOCK
+	select NO_BOOTMEM
+
+config SPARC32
+	def_bool !64BIT
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select DMA_NONCOHERENT_OPS
+	select GENERIC_ATOMIC64
+	select CLZ_TAB
+	select HAVE_UID16
+	select OLD_SIGACTION
+
+config SPARC64
+	def_bool 64BIT
+	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_KRETPROBES
+	select HAVE_KPROBES
+	select HAVE_RCU_TABLE_FREE if SMP
+	select HAVE_MEMBLOCK_NODE_MAP
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_CONTEXT_TRACKING
+	select HAVE_DEBUG_KMEMLEAK
+	select IOMMU_HELPER
+	select SPARSE_IRQ
+	select RTC_DRV_CMOS
+	select RTC_DRV_BQ4802
+	select RTC_DRV_SUN4V
+	select RTC_DRV_STARFIRE
+	select HAVE_PERF_EVENTS
+	select PERF_USE_VMALLOC
+	select IRQ_PREFLOW_FASTEOI
+	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select HAVE_C_RECORDMCOUNT
+	select HAVE_ARCH_AUDITSYSCALL
+	select ARCH_SUPPORTS_ATOMIC_RMW
+	select HAVE_NMI
+	select HAVE_REGS_AND_STACK_ACCESS_API
+	select ARCH_USE_QUEUED_RWLOCKS
+	select ARCH_USE_QUEUED_SPINLOCKS
+	select GENERIC_TIME_VSYSCALL
+	select ARCH_CLOCKSOURCE_DATA
+	select ARCH_HAS_PTE_SPECIAL
+
+config ARCH_DEFCONFIG
+	string
+	default "arch/sparc/configs/sparc32_defconfig" if SPARC32
+	default "arch/sparc/configs/sparc64_defconfig" if SPARC64
+
+config ARCH_PROC_KCORE_TEXT
+	def_bool y
+
+config CPU_BIG_ENDIAN
+	def_bool y
+
+config ARCH_ATU
+	bool
+	default y if SPARC64
+
+config STACKTRACE_SUPPORT
+	bool
+	default y if SPARC64
+
+config LOCKDEP_SUPPORT
+	bool
+	default y if SPARC64
+
+config ARCH_HIBERNATION_POSSIBLE
+	def_bool y if SPARC64
+
+config AUDIT_ARCH
+	bool
+	default y
+
+config HAVE_SETUP_PER_CPU_AREA
+	def_bool y if SPARC64
+
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	def_bool y if SPARC64
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+	def_bool y if SPARC64
+
+config MMU
+	bool
+	default y
+
+config HIGHMEM
+	bool
+	default y if SPARC32
+
+config ZONE_DMA
+	bool
+	default y if SPARC32
+
+config GENERIC_ISA_DMA
+	bool
+	default y if SPARC32
+
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	def_bool y if SPARC64
+
+config PGTABLE_LEVELS
+	default 4 if 64BIT
+	default 3
+
+config ARCH_SUPPORTS_UPROBES
+	def_bool y if SPARC64
+
+menu "Processor type and features"
+
+config SMP
+	bool "Symmetric multi-processing support"
+	---help---
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N. If you have a system with more
+	  than one CPU, say Y.
+
+	  If you say N here, the kernel will run on uni- and multiprocessor
+	  machines, but will use only one CPU of a multiprocessor machine. If
+	  you say Y here, the kernel will run on many, but not all,
+	  uniprocessor machines. On a uniprocessor machine, the kernel
+	  will run faster if you say N here.
+
+	  People using multiprocessor machines who say Y here should also say
+	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
+	  Management" code will be disabled if you say Y here.
+
+	  See also <file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO
+	  available at <http://www.tldp.org/docs.html#howto>.
+
+	  If you don't know what to do here, say N.
+
+config NR_CPUS
+	int "Maximum number of CPUs"
+	depends on SMP
+	range 2 32 if SPARC32
+	range 2 4096 if SPARC64
+	default 32 if SPARC32
+	default 4096 if SPARC64
+
+source kernel/Kconfig.hz
+
+config RWSEM_GENERIC_SPINLOCK
+	bool
+	default y if SPARC32
+
+config RWSEM_XCHGADD_ALGORITHM
+	bool
+	default y if SPARC64
+
+config GENERIC_HWEIGHT
+	bool
+	default y
+
+config GENERIC_CALIBRATE_DELAY
+	bool
+	default y
+
+config ARCH_MAY_HAVE_PC_FDC
+	bool
+	default y
+
+config EMULATED_CMPXCHG
+	bool
+	default y if SPARC32
+	help
+	  Sparc32 does not have a CAS instruction like sparc64. cmpxchg()
+	  is emulated, and therefore it is not completely atomic.
+
+# Makefile helpers
+config SPARC32_SMP
+	bool
+	default y
+	depends on SPARC32 && SMP
+
+config SPARC64_SMP
+	bool
+	default y
+	depends on SPARC64 && SMP
+
+config EARLYFB
+	bool "Support for early boot text console"
+	default y
+	depends on SPARC64
+	help
+	  Say Y here to enable a faster early framebuffer boot console.
+
+config SECCOMP
+	bool "Enable seccomp to safely compute untrusted bytecode"
+	depends on SPARC64 && PROC_FS
+	default y
+	help
+	  This kernel feature is useful for number crunching applications
+	  that may need to compute untrusted bytecode during their
+	  execution. By using pipes or other transports made available to
+	  the process as file descriptors supporting the read/write
+	  syscalls, it's possible to isolate those applications in
+	  their own address space using seccomp. Once seccomp is
+	  enabled via /proc/<pid>/seccomp, it cannot be disabled
+	  and the task is only allowed to execute a few safe syscalls
+	  defined by each seccomp mode.
+
+	  If unsure, say Y. Only embedded should say N here.
+
+config HOTPLUG_CPU
+	bool "Support for hot-pluggable CPUs"
+	depends on SPARC64 && SMP
+	help
+	  Say Y here to experiment with turning CPUs off and on.  CPUs
+	  can be controlled through /sys/devices/system/cpu/cpu#.
+	  Say N if you want to disable CPU hotplug.
+
+if SPARC64
+source "drivers/cpufreq/Kconfig"
+endif
+
+config US3_MC
+	tristate "UltraSPARC-III Memory Controller driver"
+	depends on SPARC64
+	default y
+	help
+	  This adds a driver for the UltraSPARC-III memory controller.
+	  Loading this driver allows exact mnemonic strings to be
+	  printed in the event of a memory error, so that the faulty DIMM
+	  on the motherboard can be matched to the error.
+
+	  If in doubt, say Y, as this information can be very useful.
+
+# Global things across all Sun machines.
+config GENERIC_LOCKBREAK
+	bool
+	default y
+	depends on SPARC64 && SMP && PREEMPT
+
+config NUMA
+	bool "NUMA support"
+	depends on SPARC64 && SMP
+
+config NODES_SHIFT
+	int "Maximum NUMA Nodes (as a power of 2)"
+	range 4 5 if SPARC64
+	default "5"
+	depends on NEED_MULTIPLE_NODES
+	help
+	  Specify the maximum number of NUMA Nodes available on the target
+	  system.  Increases memory reserved to accommodate various tables.
+
+# Some NUMA nodes have memory ranges that span
+# other nodes.  Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node.  See memmap_init_zone()
+# for details.
+config NODES_SPAN_OTHER_NODES
+	def_bool y
+	depends on NEED_MULTIPLE_NODES
+
+config ARCH_SELECT_MEMORY_MODEL
+	def_bool y if SPARC64
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y if SPARC64
+	select SPARSEMEM_VMEMMAP_ENABLE
+
+config ARCH_SPARSEMEM_DEFAULT
+	def_bool y if SPARC64
+
+config FORCE_MAX_ZONEORDER
+	int "Maximum zone order"
+	default "13"
+	help
+	  The kernel memory allocator divides physically contiguous memory
+	  blocks into "zones", where each zone is a power of two number of
+	  pages.  This option selects the largest power of two that the kernel
+	  keeps in the memory allocator.  If you need to allocate very large
+	  blocks of physically contiguous memory, then you may need to
+	  increase this value.
+
+	  This config option is actually maximum order plus one. For example,
+	  a value of 13 means that the largest free memory block is 2^12 pages.
+
+if SPARC64
+source "kernel/power/Kconfig"
+endif
+
+config SCHED_SMT
+	bool "SMT (Hyperthreading) scheduler support"
+	depends on SPARC64 && SMP
+	default y
+	help
+	  SMT scheduler support improves the CPU scheduler's decision making
+	  when dealing with SPARC cpus at a cost of slightly increased overhead
+	  in some places. If unsure say N here.
+
+config SCHED_MC
+	bool "Multi-core scheduler support"
+	depends on SPARC64 && SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places. If unsure say N here.
+
+config CMDLINE_BOOL
+	bool "Default bootloader kernel arguments"
+	depends on SPARC64
+
+config CMDLINE
+	string "Initial kernel command string"
+	depends on CMDLINE_BOOL
+	default "console=ttyS0,9600 root=/dev/sda1"
+	help
+	  Say Y here if you want to be able to pass default arguments to
+	  the kernel. This will be overridden by the bootloader, if you
+	  use one (such as SILO). This is most useful if you want to boot
+	  a kernel from TFTP, and want default options to be available
+	  with having them passed on the command line.
+
+	  NOTE: This option WILL override the PROM bootargs setting!
+
+config SUN_PM
+	bool
+	default y if SPARC32
+	help
+	  Enable power management and CPU standby features on supported
+	  SPARC platforms.
+
+config SPARC_LED
+	tristate "Sun4m LED driver"
+	depends on SPARC32
+	help
+	  This driver toggles the front-panel LED on sun4m systems
+	  in a user-specifiable manner.  Its state can be probed
+	  by reading /proc/led and its blinking mode can be changed
+	  via writes to /proc/led
+
+config SERIAL_CONSOLE
+	bool
+	depends on SPARC32
+	default y
+	---help---
+	  If you say Y here, it will be possible to use a serial port as the
+	  system console (the system console is the device which receives all
+	  kernel messages and warnings and which allows logins in single user
+	  mode). This could be useful if some terminal or printer is connected
+	  to that serial port.
+
+	  Even if you say Y here, the currently visible virtual console
+	  (/dev/tty0) will still be used as the system console by default, but
+	  you can alter that using a kernel command line option such as
+	  "console=ttyS1". (Try "man bootparam" or see the documentation of
+	  your boot loader (silo) about how to pass options to the kernel at
+	  boot time.)
+
+	  If you don't have a graphics card installed and you say Y here, the
+	  kernel will automatically use the first serial line, /dev/ttyS0, as
+	  system console.
+
+	  If unsure, say N.
+
+config SPARC_LEON
+	bool "Sparc Leon processor family"
+	depends on SPARC32
+	select USB_EHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_DESC
+	select USB_UHCI_BIG_ENDIAN_MMIO
+	select USB_UHCI_BIG_ENDIAN_DESC
+	---help---
+	  If you say Y here if you are running on a SPARC-LEON processor.
+	  The LEON processor is a synthesizable VHDL model of the
+	  SPARC-v8 standard. LEON is  part of the GRLIB collection of
+	  IP cores that are distributed under GPL. GRLIB can be downloaded
+	  from www.gaisler.com. You can download a sparc-linux cross-compilation
+	  toolchain at www.gaisler.com.
+
+if SPARC_LEON
+menu "U-Boot options"
+
+config UBOOT_LOAD_ADDR
+	hex "uImage Load Address"
+	default 0x40004000
+	---help---
+	 U-Boot kernel load address, the address in physical address space
+	 where u-boot will place the Linux kernel before booting it.
+	 This address is normally the base address of main memory + 0x4000.
+
+config UBOOT_FLASH_ADDR
+	hex "uImage.o Load Address"
+	default 0x00080000
+	---help---
+	 Optional setting only affecting the uImage.o ELF-image used to
+	 download the uImage file to the target using a ELF-loader other than
+	 U-Boot. It may for example be used to download an uImage to FLASH with
+	 the GRMON utility before even starting u-boot.
+
+config UBOOT_ENTRY_ADDR
+	hex "uImage Entry Address"
+	default 0xf0004000
+	---help---
+	 Do not change this unless you know what you're doing. This is
+	 hardcoded by the SPARC32 and LEON port.
+
+	 This is the virtual address u-boot jumps to when booting the Linux
+	 Kernel.
+
+endmenu
+endif
+
+endmenu
+
+menu "Bus options (PCI etc.)"
+config SBUS
+	bool
+	default y
+
+config SBUSCHAR
+	bool
+	default y
+
+config SUN_LDOMS
+	bool "Sun Logical Domains support"
+	depends on SPARC64
+	help
+	  Say Y here is you want to support virtual devices via
+	  Logical Domains.
+
+config PCI
+	bool "Support for PCI and PS/2 keyboard/mouse"
+	help
+	  Find out whether your system includes a PCI bus. PCI is the name of
+	  a bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box.  If you say Y here, the kernel will include drivers and
+	  infrastructure code to support PCI bus devices.
+
+	  CONFIG_PCI is needed for all JavaStation's (including MrCoffee),
+	  CP-1200, JavaEngine-1, Corona, Red October, and Serengeti SGSC.
+	  All of these platforms are extremely obscure, so say N if unsure.
+
+config PCI_DOMAINS
+	def_bool PCI if SPARC64
+
+config PCI_SYSCALL
+	def_bool PCI
+
+config PCIC_PCI
+	bool
+	depends on PCI && SPARC32 && !SPARC_LEON
+	default y
+
+config LEON_PCI
+	bool
+	depends on PCI && SPARC_LEON
+	default y
+
+config SPARC_GRPCI1
+	bool "GRPCI Host Bridge Support"
+	depends on LEON_PCI
+	default y
+	help
+	  Say Y here to include the GRPCI Host Bridge Driver. The GRPCI
+	  PCI host controller is typically found in GRLIB SPARC32/LEON
+	  systems. The driver has one property (all_pci_errors) controlled
+	  from the bootloader that makes the GRPCI to generate interrupts
+	  on detected PCI Parity and System errors.
+
+config SPARC_GRPCI2
+	bool "GRPCI2 Host Bridge Support"
+	depends on LEON_PCI
+	default y
+	help
+	  Say Y here to include the GRPCI2 Host Bridge Driver.
+
+source "drivers/pci/Kconfig"
+
+source "drivers/pcmcia/Kconfig"
+
+config SUN_OPENPROMFS
+	tristate "Openprom tree appears in /proc/openprom"
+	help
+	  If you say Y, the OpenPROM device tree will be available as a
+	  virtual file system, which you can mount to /proc/openprom by "mount
+	  -t openpromfs none /proc/openprom".
+
+	  To compile the /proc/openprom support as a module, choose M here: the
+	  module will be called openpromfs.
+
+	  Only choose N if you know in advance that you will not need to modify
+	  OpenPROM settings on the running system.
+
+# Makefile helpers
+config SPARC64_PCI
+	bool
+	default y
+	depends on SPARC64 && PCI
+
+config SPARC64_PCI_MSI
+	bool
+	default y
+	depends on SPARC64_PCI && PCI_MSI
+
+endmenu
+
+config COMPAT
+	bool
+	depends on SPARC64
+	default y
+	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
+	select ARCH_WANT_OLD_COMPAT_IPC
+	select COMPAT_OLD_SIGACTION
+
+config SYSVIPC_COMPAT
+	bool
+	depends on COMPAT && SYSVIPC
+	default y
+
+source "drivers/sbus/char/Kconfig"
diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug
new file mode 100644
index 0000000..50a918d
--- /dev/null
+++ b/arch/sparc/Kconfig.debug
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
+config DEBUG_DCFLUSH
+	bool "D-cache flush debugging"
+	depends on SPARC64 && DEBUG_KERNEL
+
+config MCOUNT
+	bool
+	depends on SPARC64
+	depends on FUNCTION_TRACER
+	default y
+
+config FRAME_POINTER
+	bool
+	depends on MCOUNT
+	default y
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
new file mode 100644
index 0000000..048a033
--- /dev/null
+++ b/arch/sparc/Makefile
@@ -0,0 +1,106 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# sparc/Makefile
+#
+# Makefile for the architecture dependent flags and dependencies on the
+# Sparc and sparc64.
+#
+# Copyright (C) 1994,1996,1998 David S. Miller (davem@caip.rutgers.edu)
+# Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
+
+# We are not yet configured - so test on arch
+ifeq ($(ARCH),sparc64)
+        KBUILD_DEFCONFIG := sparc64_defconfig
+else
+        KBUILD_DEFCONFIG := sparc32_defconfig
+endif
+
+ifeq ($(CONFIG_SPARC32),y)
+#####
+# sparc32
+#
+
+CHECKFLAGS     += -D__sparc__
+KBUILD_LDFLAGS := -m elf32_sparc
+export BITS    := 32
+UTS_MACHINE    := sparc
+
+# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+# versions of gcc.  Some gcc versions won't pass -Av8 to binutils when you
+# give -mcpu=v8.  This silently worked with older bintutils versions but
+# does not any more.
+KBUILD_CFLAGS  += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+KBUILD_CFLAGS  += -Wa,-Av8
+
+KBUILD_AFLAGS  += -m32 -Wa,-Av8
+
+else
+#####
+# sparc64
+#
+
+CHECKFLAGS    += -D__sparc__ -D__sparc_v9__ -D__arch64__
+KBUILD_LDFLAGS := -m elf64_sparc
+export BITS   := 64
+UTS_MACHINE   := sparc64
+
+KBUILD_CFLAGS += -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow
+KBUILD_CFLAGS += -ffixed-g4 -ffixed-g5 -fcall-used-g7 -Wno-sign-compare
+KBUILD_CFLAGS += -Wa,--undeclared-regs
+KBUILD_CFLAGS += $(call cc-option,-mtune=ultrasparc3)
+KBUILD_AFLAGS += -m64 -mcpu=ultrasparc -Wa,--undeclared-regs
+
+ifeq ($(CONFIG_MCOUNT),y)
+  KBUILD_CFLAGS += -pg
+endif
+
+endif
+
+head-y                 := arch/sparc/kernel/head_$(BITS).o
+
+# See arch/sparc/Kbuild for the core part of the kernel
+core-y                 += arch/sparc/
+
+libs-y                 += arch/sparc/prom/
+libs-y                 += arch/sparc/lib/
+
+drivers-$(CONFIG_PM) += arch/sparc/power/
+drivers-$(CONFIG_OPROFILE)	+= arch/sparc/oprofile/
+
+boot := arch/sparc/boot
+
+# Default target
+all: zImage
+
+image zImage uImage tftpboot.img vmlinux.aout: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+install:
+	$(Q)$(MAKE) $(build)=$(boot) $@
+
+archclean:
+	$(Q)$(MAKE) $(clean)=$(boot)
+
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/sparc/vdso $@
+
+# This is the image used for packaging
+KBUILD_IMAGE := $(boot)/zImage
+
+# Don't use tabs in echo arguments.
+ifeq ($(ARCH),sparc)
+define archhelp
+  echo  '* image        - kernel image ($(boot)/image)'
+  echo  '* zImage       - stripped kernel image ($(boot)/zImage)'
+  echo  '  uImage       - U-Boot SPARC32 Image (only for LEON)'
+  echo  '  tftpboot.img - image prepared for tftp'
+endef
+else
+define archhelp
+  echo  '* vmlinux      - standard sparc64 kernel'
+  echo  '* zImage       - stripped and compressed sparc64 kernel ($(boot)/zImage)'
+  echo  '  vmlinux.aout - a.out kernel for sparc64'
+  echo  '  tftpboot.img - image prepared for tftp'
+endef
+endif
diff --git a/arch/sparc/boot/.gitignore b/arch/sparc/boot/.gitignore
new file mode 100644
index 0000000..fc6f398
--- /dev/null
+++ b/arch/sparc/boot/.gitignore
@@ -0,0 +1,8 @@
+btfix.S
+btfixupprep
+image
+zImage
+tftpboot.img
+vmlinux.aout
+piggyback
+
diff --git a/arch/sparc/boot/Makefile b/arch/sparc/boot/Makefile
new file mode 100644
index 0000000..ec8cd70
--- /dev/null
+++ b/arch/sparc/boot/Makefile
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for the Sparc boot stuff.
+#
+# Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+# Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
+
+ROOT_IMG	:= /usr/src/root.img
+ELFTOAOUT	:= elftoaout
+
+hostprogs-y	:= piggyback
+targets		:= tftpboot.img image zImage vmlinux.aout
+clean-files	:= System.map
+
+quiet_cmd_elftoaout	= ELFTOAOUT $@
+      cmd_elftoaout	= $(ELFTOAOUT) $(obj)/image -o $@
+quiet_cmd_piggy		= PIGGY   $@
+      cmd_piggy		= $(obj)/piggyback $(BITS) $@ System.map $(ROOT_IMG)
+quiet_cmd_strip		= STRIP   $@
+      cmd_strip		= $(STRIP) -R .comment -R .note -K sun4u_init -K _end -K _start $< -o $@
+
+ifeq ($(CONFIG_SPARC64),y)
+
+# Actual linking
+
+$(obj)/zImage: $(obj)/image
+	$(call if_changed,gzip)
+	@echo '  kernel: $@ is ready'
+
+$(obj)/vmlinux.aout: vmlinux FORCE
+	$(call if_changed,elftoaout)
+	@echo '  kernel: $@ is ready'
+else
+
+$(obj)/zImage: $(obj)/image
+	$(call if_changed,strip)
+	@echo '  kernel: $@ is ready'
+
+# The following lines make a readable image for U-Boot.
+#  uImage   - Binary file read by U-boot
+#  uImage.o - object file of uImage for loading with a
+#             flash programmer understanding ELF.
+
+OBJCOPYFLAGS_image.bin := -S -O binary -R .note -R .comment
+$(obj)/image.bin: $(obj)/image FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/image.gz: $(obj)/image.bin
+	$(call if_changed,gzip)
+
+UIMAGE_LOADADDR = $(CONFIG_UBOOT_LOAD_ADDR)
+UIMAGE_ENTRYADDR = $(CONFIG_UBOOT_ENTRY_ADDR)
+UIMAGE_COMPRESSION = gzip
+
+quiet_cmd_uimage.o = UIMAGE.O $@
+      cmd_uimage.o = $(LD) -Tdata $(CONFIG_UBOOT_FLASH_ADDR) \
+                     -r -b binary $@ -o $@.o
+
+targets += uImage
+$(obj)/uImage: $(obj)/image.gz
+	$(call if_changed,uimage)
+	$(call if_changed,uimage.o)
+	@echo '  Image $@ is ready'
+
+endif
+
+$(obj)/image: vmlinux FORCE
+	$(call if_changed,strip)
+	@echo '  kernel: $@ is ready'
+
+$(obj)/tftpboot.img: $(obj)/image $(obj)/piggyback System.map $(ROOT_IMG) FORCE
+	$(call if_changed,elftoaout)
+	$(call if_changed,piggy)
+
+install:
+	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/zImage \
+		System.map "$(INSTALL_PATH)"
diff --git a/arch/sparc/boot/install.sh b/arch/sparc/boot/install.sh
new file mode 100644
index 0000000..b32851e
--- /dev/null
+++ b/arch/sparc/boot/install.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for SPARC architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+verify () {
+	if [ ! -f "$1" ]; then
+		echo ""                                                   1>&2
+		echo " *** Missing file: $1"                              1>&2
+		echo ' *** You need to run "make" before "make install".' 1>&2
+		echo ""                                                   1>&2
+		exit 1
+	fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
+# User may have a custom install script
+
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+	mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+	mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
diff --git a/arch/sparc/boot/piggyback.c b/arch/sparc/boot/piggyback.c
new file mode 100644
index 0000000..bb7c951
--- /dev/null
+++ b/arch/sparc/boot/piggyback.c
@@ -0,0 +1,272 @@
+/*
+   Simple utility to make a single-image install kernel with initial ramdisk
+   for Sparc tftpbooting without need to set up nfs.
+
+   Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+   Pete Zaitcev <zaitcev@yahoo.com> endian fixes for cross-compiles, 2000.
+   Copyright (C) 2011 Sam Ravnborg <sam@ravnborg.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#include <dirent.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+/*
+ * Note: run this on an a.out kernel (use elftoaout for it),
+ * as PROM looks for a.out image only.
+ */
+
+#define AOUT_TEXT_OFFSET   32
+
+static int is64bit = 0;
+
+/* align to power-of-two size */
+static int align(int n)
+{
+	if (is64bit)
+		return (n + 0x1fff) & ~0x1fff;
+	else
+		return (n + 0xfff) & ~0xfff;
+}
+
+/* read two bytes as big endian */
+static unsigned short ld2(char *p)
+{
+	return (p[0] << 8) | p[1];
+}
+
+/* save 4 bytes as big endian */
+static void st4(char *p, unsigned int x)
+{
+	p[0] = x >> 24;
+	p[1] = x >> 16;
+	p[2] = x >> 8;
+	p[3] = x;
+}
+
+static void die(const char *str)
+{
+	perror(str);
+	exit(1);
+}
+
+static void usage(void)
+{
+	/* fs_img.gz is an image of initial ramdisk. */
+	fprintf(stderr, "Usage: piggyback bits vmlinux.aout System.map fs_img.gz\n");
+	fprintf(stderr, "\tKernel image will be modified in place.\n");
+	exit(1);
+}
+
+static int start_line(const char *line)
+{
+	if (strcmp(line + 10, " _start\n") == 0)
+		return 1;
+	else if (strcmp(line + 18, " _start\n") == 0)
+		return 1;
+	return 0;
+}
+
+static int end_line(const char *line)
+{
+	if (strcmp(line + 10, " _end\n") == 0)
+		return 1;
+	else if (strcmp (line + 18, " _end\n") == 0)
+		return 1;
+	return 0;
+}
+
+/*
+ * Find address for start and end in System.map.
+ * The file looks like this:
+ * f0004000 ... _start
+ * f0379f79 ... _end
+ * 1234567890123456
+ * ^coloumn 1
+ * There is support for 64 bit addresses too.
+ *
+ * Return 0 if either start or end is not found
+ */
+static int get_start_end(const char *filename, unsigned int *start,
+                                               unsigned int *end)
+{
+	FILE *map;
+	char buffer[1024];
+
+	*start = 0;
+	*end = 0;
+	map = fopen(filename, "r");
+	if (!map)
+		die(filename);
+	while (fgets(buffer, 1024, map)) {
+		if (start_line(buffer))
+			*start = strtoul(buffer, NULL, 16);
+		else if (end_line(buffer))
+			*end = strtoul(buffer, NULL, 16);
+	}
+	fclose (map);
+
+	if (*start == 0 || *end == 0)
+		return 0;
+
+	return 1;
+}
+
+#define LOOKBACK (128 * 4)
+#define BUFSIZE 1024
+/*
+ * Find the HdrS entry from head_32/head_64.
+ * We check if it is at the beginning of the file (sparc64 case)
+ * and if not we search for it.
+ * When we search do so in steps of 4 as HdrS is on a 4-byte aligned
+ * address (it is on same alignment as sparc instructions)
+ * Return the offset to the HdrS entry (as off_t)
+ */
+static off_t get_hdrs_offset(int kernelfd, const char *filename)
+{
+	char buffer[BUFSIZE];
+	off_t offset;
+	int i;
+
+	if (lseek(kernelfd, 0, SEEK_SET) < 0)
+		die("lseek");
+	if (read(kernelfd, buffer, BUFSIZE) != BUFSIZE)
+		die(filename);
+
+	if (buffer[40] == 'H' && buffer[41] == 'd' &&
+	    buffer[42] == 'r' && buffer[43] == 'S') {
+		return 40;
+	} else {
+		/*  Find the gokernel label */
+		/* Decode offset from branch instruction */
+		offset = ld2(buffer + AOUT_TEXT_OFFSET + 2) << 2;
+		/* Go back 512 bytes so we do not miss HdrS */
+		offset -= LOOKBACK;
+		/* skip a.out header */
+		offset += AOUT_TEXT_OFFSET;
+		if (lseek(kernelfd, offset, SEEK_SET) < 0)
+			die("lseek");
+		if (read(kernelfd, buffer, BUFSIZE) != BUFSIZE)
+			die(filename);
+
+		for (i = 0; i < LOOKBACK; i += 4) {
+			if (buffer[i + 0] == 'H' && buffer[i + 1] == 'd' &&
+			    buffer[i + 2] == 'r' && buffer[i + 3] == 'S') {
+				return offset + i;
+			}
+		}
+	}
+	fprintf (stderr, "Couldn't find headers signature in %s\n", filename);
+	exit(1);
+}
+
+int main(int argc,char **argv)
+{
+	static char aout_magic[] = { 0x01, 0x03, 0x01, 0x07 };
+	char buffer[1024];
+	unsigned int i, start, end;
+	off_t offset;
+	struct stat s;
+	int image, tail;
+
+	if (argc != 5)
+		usage();
+	if (strcmp(argv[1], "64") == 0)
+		is64bit = 1;
+	if (stat (argv[4], &s) < 0)
+		die(argv[4]);
+
+	if (!get_start_end(argv[3], &start, &end)) {
+		fprintf(stderr, "Could not determine start and end from %s\n",
+		        argv[3]);
+		exit(1);
+	}
+	if ((image = open(argv[2], O_RDWR)) < 0)
+		die(argv[2]);
+	if (read(image, buffer, 512) != 512)
+		die(argv[2]);
+	if (memcmp(buffer, aout_magic, 4) != 0) {
+		fprintf (stderr, "Not a.out. Don't blame me.\n");
+		exit(1);
+	}
+	/*
+	 * We need to fill in values for
+	 * sparc_ramdisk_image + sparc_ramdisk_size
+	 * To locate these symbols search for the "HdrS" text which appear
+	 * in the image a little before the gokernel symbol.
+	 * See definition of these in init_32.S
+	 */
+
+	offset = get_hdrs_offset(image, argv[2]);
+	/* skip HdrS + LINUX_VERSION_CODE + HdrS version */
+	offset += 10;
+
+	if (lseek(image, offset, 0) < 0)
+		die("lseek");
+
+	/*
+	 * root_flags = 0
+	 * root_dev = 1 (RAMDISK_MAJOR)
+	 * ram_flags = 0
+	 * sparc_ramdisk_image = "PAGE aligned address after _end")
+	 * sparc_ramdisk_size = size of image
+	 */
+	st4(buffer, 0);
+	st4(buffer + 4, 0x01000000);
+	st4(buffer + 8, align(end + 32));
+	st4(buffer + 12, s.st_size);
+
+	if (write(image, buffer + 2, 14) != 14)
+		die(argv[2]);
+
+	/* For sparc64 update a_text and clear a_data + a_bss */
+	if (is64bit)
+	{
+		if (lseek(image, 4, 0) < 0)
+			die("lseek");
+		/* a_text */
+		st4(buffer, align(end + 32 + 8191) - (start & ~0x3fffffUL) +
+		            s.st_size);
+		/* a_data */
+		st4(buffer + 4, 0);
+		/* a_bss */
+		st4(buffer + 8, 0);
+		if (write(image, buffer, 12) != 12)
+			die(argv[2]);
+	}
+
+	/* seek page aligned boundary in the image file and add boot image */
+	if (lseek(image, AOUT_TEXT_OFFSET - start + align(end + 32), 0) < 0)
+		die("lseek");
+	if ((tail = open(argv[4], O_RDONLY)) < 0)
+		die(argv[4]);
+	while ((i = read(tail, buffer, 1024)) > 0)
+		if (write(image, buffer, i) != i)
+			die(argv[2]);
+	if (close(image) < 0)
+		die("close");
+	if (close(tail) < 0)
+		die("close");
+	return 0;
+}
diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig
new file mode 100644
index 0000000..207a43a
--- /dev/null
+++ b/arch/sparc/configs/sparc32_defconfig
@@ -0,0 +1,100 @@
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PCI=y
+CONFIG_SUN_OPENPROMFS=m
+CONFIG_BINFMT_MISC=m
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+CONFIG_INET_IPCOMP=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_NET_PKTGEN=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_QLOGICPTI=m
+CONFIG_SCSI_SUNESP=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+CONFIG_SUNLANCE=y
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNBMAC=m
+CONFIG_SUNQE=m
+# CONFIG_WLAN is not set
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=m
+CONFIG_INPUT_EVBUG=m
+CONFIG_KEYBOARD_ATKBD=m
+CONFIG_KEYBOARD_SUNKBD=m
+CONFIG_MOUSE_PS2=m
+CONFIG_MOUSE_SERIAL=m
+CONFIG_SERIO=m
+# CONFIG_SERIO_I8042 is not set
+CONFIG_SERIAL_SUNZILOG=y
+CONFIG_SERIAL_SUNZILOG_CONSOLE=y
+CONFIG_SERIAL_SUNSU=y
+CONFIG_SERIAL_SUNSU_CONSOLE=y
+CONFIG_SPI=y
+CONFIG_SPI_XILINX=m
+CONFIG_SPI_XILINX_PLTFM=m
+CONFIG_SUN_OPENPROMIO=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_ISO9660_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_ROMFS_FS=m
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_RPCSEC_GSS_KRB5=m
+CONFIG_NLS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_KGDB=y
+CONFIG_KGDB_TESTS=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TWOFISH=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+CONFIG_LIBCRC32C=m
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
new file mode 100644
index 0000000..4d4e1cc
--- /dev/null
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -0,0 +1,241 @@
+CONFIG_64BIT=y
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_SMP=y
+CONFIG_HZ_100=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_NUMA=y
+CONFIG_DEFAULT_MMAP_MIN_ADDR=8192
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_SUN_LDOMS=y
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_SUN_OPENPROMFS=m
+CONFIG_BINFMT_MISC=m
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_NET_KEY=m
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+CONFIG_INET_IPCOMP=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_OPTIMISTIC_DAD=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_VLAN_8021Q=m
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_CONNECTOR=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_CDROM_PKTCDVD_WCACHE=y
+CONFIG_ATA_OVER_ETH=m
+CONFIG_SUNVDC=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_ALI15X3=y
+CONFIG_RAID_ATTRS=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_NETDEVICES=y
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+CONFIG_SUNLANCE=m
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_SUNVNET=m
+CONFIG_LDMVSW=m
+CONFIG_NET_PCI=y
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_TIGON3=m
+CONFIG_BNX2=m
+CONFIG_NIU=m
+# CONFIG_WLAN is not set
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_KEYBOARD_LKKBD=m
+CONFIG_KEYBOARD_SUNKBD=y
+CONFIG_MOUSE_SERIAL=y
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_SPARCSPKR=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_PCIPS2=m
+CONFIG_SERIO_RAW=m
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_SUNSU=y
+CONFIG_SERIAL_SUNSU_CONSOLE=y
+CONFIG_SERIAL_SUNSAB=y
+CONFIG_SERIAL_SUNSAB_CONSOLE=y
+CONFIG_SERIAL_SUNHV=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_FB=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_SBUS=y
+CONFIG_FB_CG6=y
+CONFIG_FB_FFB=y
+CONFIG_FB_XVR500=y
+CONFIG_FB_XVR2500=y
+CONFIG_FB_XVR1000=y
+CONFIG_FB_RADEON=y
+# CONFIG_FB_RADEON_BACKLIGHT is not set
+CONFIG_FB_ATY=y
+CONFIG_FB_ATY_GX=y
+# CONFIG_FB_ATY_BACKLIGHT is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FONTS=y
+CONFIG_FONT_SUN8x16=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_SUN_CS4231=m
+CONFIG_USB_HIDDEV=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_NTRIG=y
+CONFIG_HID_ORTEK=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SONY=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_THRUSTMASTER=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_UHCI_HCD=m
+CONFIG_USB_STORAGE=m
+CONFIG_SUN_OPENPROMIO=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_UPROBE_EVENTS=y
+CONFIG_KEYS=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CRYPTO_XCBC=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TWOFISH=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRC16=m
+CONFIG_LIBCRC32C=m
+CONFIG_VCC=m
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
new file mode 100644
index 0000000..d257186
--- /dev/null
+++ b/arch/sparc/crypto/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Arch-specific CryptoAPI modules.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o
+obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
+obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o
+obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
+
+obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
+obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
+obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o
+
+obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
+
+sha1-sparc64-y := sha1_asm.o sha1_glue.o
+sha256-sparc64-y := sha256_asm.o sha256_glue.o
+sha512-sparc64-y := sha512_asm.o sha512_glue.o
+md5-sparc64-y := md5_asm.o md5_glue.o
+
+aes-sparc64-y := aes_asm.o aes_glue.o
+des-sparc64-y := des_asm.o des_glue.o
+camellia-sparc64-y := camellia_asm.o camellia_glue.o
+
+crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
new file mode 100644
index 0000000..155cefb
--- /dev/null
+++ b/arch/sparc/crypto/aes_asm.S
@@ -0,0 +1,1544 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
+	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
+	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
+	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
+	AES_EROUND23(KEY_BASE +  6, T0, T1, I1)
+
+#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
+	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
+	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
+	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
+	AES_EROUND01(KEY_BASE +  4, T0, T1, I0) \
+	AES_EROUND23(KEY_BASE +  6, T0, T1, I1) \
+	AES_EROUND01(KEY_BASE +  4, T2, T3, I2) \
+	AES_EROUND23(KEY_BASE +  6, T2, T3, I3)
+
+#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
+	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
+	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
+	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
+	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1)
+
+#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	AES_EROUND01(KEY_BASE +  0, I0, I1, T0) \
+	AES_EROUND23(KEY_BASE +  2, I0, I1, T1) \
+	AES_EROUND01(KEY_BASE +  0, I2, I3, T2) \
+	AES_EROUND23(KEY_BASE +  2, I2, I3, T3) \
+	AES_EROUND01_L(KEY_BASE +  4, T0, T1, I0) \
+	AES_EROUND23_L(KEY_BASE +  6, T0, T1, I1) \
+	AES_EROUND01_L(KEY_BASE +  4, T2, T3, I2) \
+	AES_EROUND23_L(KEY_BASE +  6, T2, T3, I3)
+
+	/* 10 rounds */
+#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
+
+#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
+
+	/* 12 rounds */
+#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
+
+#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
+	ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
+
+	/* 14 rounds */
+#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
+	ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
+
+#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
+	ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
+			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
+
+#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
+	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
+	ldd	[%o0 + 0xd0], %f56; \
+	ldd	[%o0 + 0xd8], %f58; \
+	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
+	ldd	[%o0 + 0xe0], %f60; \
+	ldd	[%o0 + 0xe8], %f62; \
+	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
+	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
+	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
+	ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
+	AES_EROUND01(KEY_BASE +  48, I0, I1, KEY_BASE + 0) \
+	AES_EROUND23(KEY_BASE +  50, I0, I1, KEY_BASE + 2) \
+	AES_EROUND01(KEY_BASE +  48, I2, I3, KEY_BASE + 4) \
+	AES_EROUND23(KEY_BASE +  50, I2, I3, KEY_BASE + 6) \
+	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I0) \
+	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I1) \
+	ldd	[%o0 + 0x10], %f8; \
+	ldd	[%o0 + 0x18], %f10; \
+	AES_EROUND01_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I2) \
+	AES_EROUND23_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I3) \
+	ldd	[%o0 + 0x20], %f12; \
+	ldd	[%o0 + 0x28], %f14;
+
+#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
+	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
+	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
+	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
+	AES_DROUND01(KEY_BASE +  6, T0, T1, I0)
+
+#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
+	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
+	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
+	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
+	AES_DROUND23(KEY_BASE +  4, T0, T1, I1) \
+	AES_DROUND01(KEY_BASE +  6, T0, T1, I0) \
+	AES_DROUND23(KEY_BASE +  4, T2, T3, I3) \
+	AES_DROUND01(KEY_BASE +  6, T2, T3, I2)
+
+#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
+	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
+	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
+	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
+	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0)
+
+#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	AES_DROUND23(KEY_BASE +  0, I0, I1, T1) \
+	AES_DROUND01(KEY_BASE +  2, I0, I1, T0) \
+	AES_DROUND23(KEY_BASE +  0, I2, I3, T3) \
+	AES_DROUND01(KEY_BASE +  2, I2, I3, T2) \
+	AES_DROUND23_L(KEY_BASE +  4, T0, T1, I1) \
+	AES_DROUND01_L(KEY_BASE +  6, T0, T1, I0) \
+	AES_DROUND23_L(KEY_BASE +  4, T2, T3, I3) \
+	AES_DROUND01_L(KEY_BASE +  6, T2, T3, I2)
+
+	/* 10 rounds */
+#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
+
+#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
+
+	/* 12 rounds */
+#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
+
+#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
+	DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
+
+	/* 14 rounds */
+#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  0, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE +  8, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
+	DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
+
+#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
+	DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
+			     TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
+
+#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
+	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  0, I0, I1, I2, I3, KEY_BASE + 48) \
+	ldd	[%o0 + 0x18], %f56; \
+	ldd	[%o0 + 0x10], %f58; \
+	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE +  8, I0, I1, I2, I3, KEY_BASE +  0) \
+	ldd	[%o0 + 0x08], %f60; \
+	ldd	[%o0 + 0x00], %f62; \
+	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE +  0) \
+	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE +  0) \
+	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE +  0) \
+	DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE +  0) \
+	AES_DROUND23(KEY_BASE +  48, I0, I1, KEY_BASE + 2) \
+	AES_DROUND01(KEY_BASE +  50, I0, I1, KEY_BASE + 0) \
+	AES_DROUND23(KEY_BASE +  48, I2, I3, KEY_BASE + 6) \
+	AES_DROUND01(KEY_BASE +  50, I2, I3, KEY_BASE + 4) \
+	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 0, KEY_BASE + 2, I1) \
+	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 0, KEY_BASE + 2, I0) \
+	ldd	[%o0 + 0xd8], %f8; \
+	ldd	[%o0 + 0xd0], %f10; \
+	AES_DROUND23_L(KEY_BASE +  52, KEY_BASE + 4, KEY_BASE + 6, I3) \
+	AES_DROUND01_L(KEY_BASE +  54, KEY_BASE + 4, KEY_BASE + 6, I2) \
+	ldd	[%o0 + 0xc8], %f12; \
+	ldd	[%o0 + 0xc0], %f14;
+
+	.align	32
+ENTRY(aes_sparc64_key_expand)
+	/* %o0=input_key, %o1=output_key, %o2=key_len */
+	VISEntry
+	ld	[%o0 + 0x00], %f0
+	ld	[%o0 + 0x04], %f1
+	ld	[%o0 + 0x08], %f2
+	ld	[%o0 + 0x0c], %f3
+
+	std	%f0, [%o1 + 0x00]
+	std	%f2, [%o1 + 0x08]
+	add	%o1, 0x10, %o1
+
+	cmp	%o2, 24
+	bl	2f
+	 nop
+
+	be	1f
+	 nop
+
+	/* 256-bit key expansion */
+	ld	[%o0 + 0x10], %f4
+	ld	[%o0 + 0x14], %f5
+	ld	[%o0 + 0x18], %f6
+	ld	[%o0 + 0x1c], %f7
+
+	std	%f4, [%o1 + 0x00]
+	std	%f6, [%o1 + 0x08]
+	add	%o1, 0x10, %o1
+
+	AES_KEXPAND1(0, 6, 0x0, 8)
+	AES_KEXPAND2(2, 8, 10)
+	AES_KEXPAND0(4, 10, 12)
+	AES_KEXPAND2(6, 12, 14)
+	AES_KEXPAND1(8, 14, 0x1, 16)
+	AES_KEXPAND2(10, 16, 18)
+	AES_KEXPAND0(12, 18, 20)
+	AES_KEXPAND2(14, 20, 22)
+	AES_KEXPAND1(16, 22, 0x2, 24)
+	AES_KEXPAND2(18, 24, 26)
+	AES_KEXPAND0(20, 26, 28)
+	AES_KEXPAND2(22, 28, 30)
+	AES_KEXPAND1(24, 30, 0x3, 32)
+	AES_KEXPAND2(26, 32, 34)
+	AES_KEXPAND0(28, 34, 36)
+	AES_KEXPAND2(30, 36, 38)
+	AES_KEXPAND1(32, 38, 0x4, 40)
+	AES_KEXPAND2(34, 40, 42)
+	AES_KEXPAND0(36, 42, 44)
+	AES_KEXPAND2(38, 44, 46)
+	AES_KEXPAND1(40, 46, 0x5, 48)
+	AES_KEXPAND2(42, 48, 50)
+	AES_KEXPAND0(44, 50, 52)
+	AES_KEXPAND2(46, 52, 54)
+	AES_KEXPAND1(48, 54, 0x6, 56)
+	AES_KEXPAND2(50, 56, 58)
+
+	std	%f8, [%o1 + 0x00]
+	std	%f10, [%o1 + 0x08]
+	std	%f12, [%o1 + 0x10]
+	std	%f14, [%o1 + 0x18]
+	std	%f16, [%o1 + 0x20]
+	std	%f18, [%o1 + 0x28]
+	std	%f20, [%o1 + 0x30]
+	std	%f22, [%o1 + 0x38]
+	std	%f24, [%o1 + 0x40]
+	std	%f26, [%o1 + 0x48]
+	std	%f28, [%o1 + 0x50]
+	std	%f30, [%o1 + 0x58]
+	std	%f32, [%o1 + 0x60]
+	std	%f34, [%o1 + 0x68]
+	std	%f36, [%o1 + 0x70]
+	std	%f38, [%o1 + 0x78]
+	std	%f40, [%o1 + 0x80]
+	std	%f42, [%o1 + 0x88]
+	std	%f44, [%o1 + 0x90]
+	std	%f46, [%o1 + 0x98]
+	std	%f48, [%o1 + 0xa0]
+	std	%f50, [%o1 + 0xa8]
+	std	%f52, [%o1 + 0xb0]
+	std	%f54, [%o1 + 0xb8]
+	std	%f56, [%o1 + 0xc0]
+	ba,pt	%xcc, 80f
+	 std	%f58, [%o1 + 0xc8]
+
+1:	
+	/* 192-bit key expansion */
+	ld	[%o0 + 0x10], %f4
+	ld	[%o0 + 0x14], %f5
+
+	std	%f4, [%o1 + 0x00]
+	add	%o1, 0x08, %o1
+
+	AES_KEXPAND1(0, 4, 0x0, 6)
+	AES_KEXPAND2(2, 6, 8)
+	AES_KEXPAND2(4, 8, 10)
+	AES_KEXPAND1(6, 10, 0x1, 12)
+	AES_KEXPAND2(8, 12, 14)
+	AES_KEXPAND2(10, 14, 16)
+	AES_KEXPAND1(12, 16, 0x2, 18)
+	AES_KEXPAND2(14, 18, 20)
+	AES_KEXPAND2(16, 20, 22)
+	AES_KEXPAND1(18, 22, 0x3, 24)
+	AES_KEXPAND2(20, 24, 26)
+	AES_KEXPAND2(22, 26, 28)
+	AES_KEXPAND1(24, 28, 0x4, 30)
+	AES_KEXPAND2(26, 30, 32)
+	AES_KEXPAND2(28, 32, 34)
+	AES_KEXPAND1(30, 34, 0x5, 36)
+	AES_KEXPAND2(32, 36, 38)
+	AES_KEXPAND2(34, 38, 40)
+	AES_KEXPAND1(36, 40, 0x6, 42)
+	AES_KEXPAND2(38, 42, 44)
+	AES_KEXPAND2(40, 44, 46)
+	AES_KEXPAND1(42, 46, 0x7, 48)
+	AES_KEXPAND2(44, 48, 50)
+
+	std	%f6, [%o1 + 0x00]
+	std	%f8, [%o1 + 0x08]
+	std	%f10, [%o1 + 0x10]
+	std	%f12, [%o1 + 0x18]
+	std	%f14, [%o1 + 0x20]
+	std	%f16, [%o1 + 0x28]
+	std	%f18, [%o1 + 0x30]
+	std	%f20, [%o1 + 0x38]
+	std	%f22, [%o1 + 0x40]
+	std	%f24, [%o1 + 0x48]
+	std	%f26, [%o1 + 0x50]
+	std	%f28, [%o1 + 0x58]
+	std	%f30, [%o1 + 0x60]
+	std	%f32, [%o1 + 0x68]
+	std	%f34, [%o1 + 0x70]
+	std	%f36, [%o1 + 0x78]
+	std	%f38, [%o1 + 0x80]
+	std	%f40, [%o1 + 0x88]
+	std	%f42, [%o1 + 0x90]
+	std	%f44, [%o1 + 0x98]
+	std	%f46, [%o1 + 0xa0]
+	std	%f48, [%o1 + 0xa8]
+	ba,pt	%xcc, 80f
+	 std	%f50, [%o1 + 0xb0]
+
+2:
+	/* 128-bit key expansion */
+	AES_KEXPAND1(0, 2, 0x0, 4)
+	AES_KEXPAND2(2, 4, 6)
+	AES_KEXPAND1(4, 6, 0x1, 8)
+	AES_KEXPAND2(6, 8, 10)
+	AES_KEXPAND1(8, 10, 0x2, 12)
+	AES_KEXPAND2(10, 12, 14)
+	AES_KEXPAND1(12, 14, 0x3, 16)
+	AES_KEXPAND2(14, 16, 18)
+	AES_KEXPAND1(16, 18, 0x4, 20)
+	AES_KEXPAND2(18, 20, 22)
+	AES_KEXPAND1(20, 22, 0x5, 24)
+	AES_KEXPAND2(22, 24, 26)
+	AES_KEXPAND1(24, 26, 0x6, 28)
+	AES_KEXPAND2(26, 28, 30)
+	AES_KEXPAND1(28, 30, 0x7, 32)
+	AES_KEXPAND2(30, 32, 34)
+	AES_KEXPAND1(32, 34, 0x8, 36)
+	AES_KEXPAND2(34, 36, 38)
+	AES_KEXPAND1(36, 38, 0x9, 40)
+	AES_KEXPAND2(38, 40, 42)
+
+	std	%f4, [%o1 + 0x00]
+	std	%f6, [%o1 + 0x08]
+	std	%f8, [%o1 + 0x10]
+	std	%f10, [%o1 + 0x18]
+	std	%f12, [%o1 + 0x20]
+	std	%f14, [%o1 + 0x28]
+	std	%f16, [%o1 + 0x30]
+	std	%f18, [%o1 + 0x38]
+	std	%f20, [%o1 + 0x40]
+	std	%f22, [%o1 + 0x48]
+	std	%f24, [%o1 + 0x50]
+	std	%f26, [%o1 + 0x58]
+	std	%f28, [%o1 + 0x60]
+	std	%f30, [%o1 + 0x68]
+	std	%f32, [%o1 + 0x70]
+	std	%f34, [%o1 + 0x78]
+	std	%f36, [%o1 + 0x80]
+	std	%f38, [%o1 + 0x88]
+	std	%f40, [%o1 + 0x90]
+	std	%f42, [%o1 + 0x98]
+80:
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_key_expand)
+
+	.align		32
+ENTRY(aes_sparc64_encrypt_128)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+	ldd		[%o0 + 0x00], %f8
+	ldd		[%o0 + 0x08], %f10
+	ldd		[%o0 + 0x10], %f12
+	ldd		[%o0 + 0x18], %f14
+	ldd		[%o0 + 0x20], %f16
+	ldd		[%o0 + 0x28], %f18
+	ldd		[%o0 + 0x30], %f20
+	ldd		[%o0 + 0x38], %f22
+	ldd		[%o0 + 0x40], %f24
+	ldd		[%o0 + 0x48], %f26
+	ldd		[%o0 + 0x50], %f28
+	ldd		[%o0 + 0x58], %f30
+	ldd		[%o0 + 0x60], %f32
+	ldd		[%o0 + 0x68], %f34
+	ldd		[%o0 + 0x70], %f36
+	ldd		[%o0 + 0x78], %f38
+	ldd		[%o0 + 0x80], %f40
+	ldd		[%o0 + 0x88], %f42
+	ldd		[%o0 + 0x90], %f44
+	ldd		[%o0 + 0x98], %f46
+	ldd		[%o0 + 0xa0], %f48
+	ldd		[%o0 + 0xa8], %f50
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
+	ENCRYPT_128(12, 4, 6, 0, 2)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_encrypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_encrypt_192)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+
+	ldd		[%o0 + 0x00], %f8
+	ldd		[%o0 + 0x08], %f10
+
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
+
+	ldd		[%o0 + 0x10], %f8
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	add		%o0, 0x20, %o0
+
+	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+	ldd		[%o0 + 0x10], %f12
+	ldd		[%o0 + 0x18], %f14
+	ldd		[%o0 + 0x20], %f16
+	ldd		[%o0 + 0x28], %f18
+	ldd		[%o0 + 0x30], %f20
+	ldd		[%o0 + 0x38], %f22
+	ldd		[%o0 + 0x40], %f24
+	ldd		[%o0 + 0x48], %f26
+	ldd		[%o0 + 0x50], %f28
+	ldd		[%o0 + 0x58], %f30
+	ldd		[%o0 + 0x60], %f32
+	ldd		[%o0 + 0x68], %f34
+	ldd		[%o0 + 0x70], %f36
+	ldd		[%o0 + 0x78], %f38
+	ldd		[%o0 + 0x80], %f40
+	ldd		[%o0 + 0x88], %f42
+	ldd		[%o0 + 0x90], %f44
+	ldd		[%o0 + 0x98], %f46
+	ldd		[%o0 + 0xa0], %f48
+	ldd		[%o0 + 0xa8], %f50
+
+
+	ENCRYPT_128(12, 4, 6, 0, 2)
+
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_encrypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_encrypt_256)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+
+	ldd		[%o0 + 0x00], %f8
+	ldd		[%o0 + 0x08], %f10
+
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
+
+	ldd		[%o0 + 0x10], %f8
+
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	add		%o0, 0x20, %o0
+
+	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+	ldd		[%o0 + 0x10], %f8
+
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	add		%o0, 0x20, %o0
+
+	ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+	ldd		[%o0 + 0x10], %f12
+	ldd		[%o0 + 0x18], %f14
+	ldd		[%o0 + 0x20], %f16
+	ldd		[%o0 + 0x28], %f18
+	ldd		[%o0 + 0x30], %f20
+	ldd		[%o0 + 0x38], %f22
+	ldd		[%o0 + 0x40], %f24
+	ldd		[%o0 + 0x48], %f26
+	ldd		[%o0 + 0x50], %f28
+	ldd		[%o0 + 0x58], %f30
+	ldd		[%o0 + 0x60], %f32
+	ldd		[%o0 + 0x68], %f34
+	ldd		[%o0 + 0x70], %f36
+	ldd		[%o0 + 0x78], %f38
+	ldd		[%o0 + 0x80], %f40
+	ldd		[%o0 + 0x88], %f42
+	ldd		[%o0 + 0x90], %f44
+	ldd		[%o0 + 0x98], %f46
+	ldd		[%o0 + 0xa0], %f48
+	ldd		[%o0 + 0xa8], %f50
+
+	ENCRYPT_128(12, 4, 6, 0, 2)
+
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_encrypt_256)
+
+	.align		32
+ENTRY(aes_sparc64_decrypt_128)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+	ldd		[%o0 + 0xa0], %f8
+	ldd		[%o0 + 0xa8], %f10
+	ldd		[%o0 + 0x98], %f12
+	ldd		[%o0 + 0x90], %f14
+	ldd		[%o0 + 0x88], %f16
+	ldd		[%o0 + 0x80], %f18
+	ldd		[%o0 + 0x78], %f20
+	ldd		[%o0 + 0x70], %f22
+	ldd		[%o0 + 0x68], %f24
+	ldd		[%o0 + 0x60], %f26
+	ldd		[%o0 + 0x58], %f28
+	ldd		[%o0 + 0x50], %f30
+	ldd		[%o0 + 0x48], %f32
+	ldd		[%o0 + 0x40], %f34
+	ldd		[%o0 + 0x38], %f36
+	ldd		[%o0 + 0x30], %f38
+	ldd		[%o0 + 0x28], %f40
+	ldd		[%o0 + 0x20], %f42
+	ldd		[%o0 + 0x18], %f44
+	ldd		[%o0 + 0x10], %f46
+	ldd		[%o0 + 0x08], %f48
+	ldd		[%o0 + 0x00], %f50
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
+	DECRYPT_128(12, 4, 6, 0, 2)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_decrypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_decrypt_192)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+	ldd		[%o0 + 0xc0], %f8
+	ldd		[%o0 + 0xc8], %f10
+	ldd		[%o0 + 0xb8], %f12
+	ldd		[%o0 + 0xb0], %f14
+	ldd		[%o0 + 0xa8], %f16
+	ldd		[%o0 + 0xa0], %f18
+	fxor		%f8, %f4, %f4
+	fxor		%f10, %f6, %f6
+	ldd		[%o0 + 0x98], %f20
+	ldd		[%o0 + 0x90], %f22
+	ldd		[%o0 + 0x88], %f24
+	ldd		[%o0 + 0x80], %f26
+	DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
+	ldd		[%o0 + 0x78], %f28
+	ldd		[%o0 + 0x70], %f30
+	ldd		[%o0 + 0x68], %f32
+	ldd		[%o0 + 0x60], %f34
+	ldd		[%o0 + 0x58], %f36
+	ldd		[%o0 + 0x50], %f38
+	ldd		[%o0 + 0x48], %f40
+	ldd		[%o0 + 0x40], %f42
+	ldd		[%o0 + 0x38], %f44
+	ldd		[%o0 + 0x30], %f46
+	ldd		[%o0 + 0x28], %f48
+	ldd		[%o0 + 0x20], %f50
+	ldd		[%o0 + 0x18], %f52
+	ldd		[%o0 + 0x10], %f54
+	ldd		[%o0 + 0x08], %f56
+	ldd		[%o0 + 0x00], %f58
+	DECRYPT_128(20, 4, 6, 0, 2)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_decrypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_decrypt_256)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ld		[%o1 + 0x00], %f4
+	ld		[%o1 + 0x04], %f5
+	ld		[%o1 + 0x08], %f6
+	ld		[%o1 + 0x0c], %f7
+	ldd		[%o0 + 0xe0], %f8
+	ldd		[%o0 + 0xe8], %f10
+	ldd		[%o0 + 0xd8], %f12
+	ldd		[%o0 + 0xd0], %f14
+	ldd		[%o0 + 0xc8], %f16
+	fxor		%f8, %f4, %f4
+	ldd		[%o0 + 0xc0], %f18
+	fxor		%f10, %f6, %f6
+	ldd		[%o0 + 0xb8], %f20
+	AES_DROUND23(12, 4, 6, 2)
+	ldd		[%o0 + 0xb0], %f22
+	AES_DROUND01(14, 4, 6, 0)
+	ldd		[%o0 + 0xa8], %f24
+	AES_DROUND23(16, 0, 2, 6)
+	ldd		[%o0 + 0xa0], %f26
+	AES_DROUND01(18, 0, 2, 4)
+	ldd		[%o0 + 0x98], %f12
+	AES_DROUND23(20, 4, 6, 2)
+	ldd		[%o0 + 0x90], %f14
+	AES_DROUND01(22, 4, 6, 0)
+	ldd		[%o0 + 0x88], %f16
+	AES_DROUND23(24, 0, 2, 6)
+	ldd		[%o0 + 0x80], %f18
+	AES_DROUND01(26, 0, 2, 4)
+	ldd		[%o0 + 0x78], %f20
+	AES_DROUND23(12, 4, 6, 2)
+	ldd		[%o0 + 0x70], %f22
+	AES_DROUND01(14, 4, 6, 0)
+	ldd		[%o0 + 0x68], %f24
+	AES_DROUND23(16, 0, 2, 6)
+	ldd		[%o0 + 0x60], %f26
+	AES_DROUND01(18, 0, 2, 4)
+	ldd		[%o0 + 0x58], %f28
+	AES_DROUND23(20, 4, 6, 2)
+	ldd		[%o0 + 0x50], %f30
+	AES_DROUND01(22, 4, 6, 0)
+	ldd		[%o0 + 0x48], %f32
+	AES_DROUND23(24, 0, 2, 6)
+	ldd		[%o0 + 0x40], %f34
+	AES_DROUND01(26, 0, 2, 4)
+	ldd		[%o0 + 0x38], %f36
+	AES_DROUND23(28, 4, 6, 2)
+	ldd		[%o0 + 0x30], %f38
+	AES_DROUND01(30, 4, 6, 0)
+	ldd		[%o0 + 0x28], %f40
+	AES_DROUND23(32, 0, 2, 6)
+	ldd		[%o0 + 0x20], %f42
+	AES_DROUND01(34, 0, 2, 4)
+	ldd		[%o0 + 0x18], %f44
+	AES_DROUND23(36, 4, 6, 2)
+	ldd		[%o0 + 0x10], %f46
+	AES_DROUND01(38, 4, 6, 0)
+	ldd		[%o0 + 0x08], %f48
+	AES_DROUND23(40, 0, 2, 6)
+	ldd		[%o0 + 0x00], %f50
+	AES_DROUND01(42, 0, 2, 4)
+	AES_DROUND23(44, 4, 6, 2)
+	AES_DROUND01(46, 4, 6, 0)
+	AES_DROUND23_L(48, 0, 2, 6)
+	AES_DROUND01_L(50, 0, 2, 4)
+	st		%f4, [%o2 + 0x00]
+	st		%f5, [%o2 + 0x04]
+	st		%f6, [%o2 + 0x08]
+	st		%f7, [%o2 + 0x0c]
+	retl
+	 VISExit
+ENDPROC(aes_sparc64_decrypt_256)
+
+	.align		32
+ENTRY(aes_sparc64_load_encrypt_keys_128)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0x10], %f8
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	ldd		[%o0 + 0x30], %f16
+	ldd		[%o0 + 0x38], %f18
+	ldd		[%o0 + 0x40], %f20
+	ldd		[%o0 + 0x48], %f22
+	ldd		[%o0 + 0x50], %f24
+	ldd		[%o0 + 0x58], %f26
+	ldd		[%o0 + 0x60], %f28
+	ldd		[%o0 + 0x68], %f30
+	ldd		[%o0 + 0x70], %f32
+	ldd		[%o0 + 0x78], %f34
+	ldd		[%o0 + 0x80], %f36
+	ldd		[%o0 + 0x88], %f38
+	ldd		[%o0 + 0x90], %f40
+	ldd		[%o0 + 0x98], %f42
+	ldd		[%o0 + 0xa0], %f44
+	retl
+	 ldd		[%o0 + 0xa8], %f46
+ENDPROC(aes_sparc64_load_encrypt_keys_128)
+
+	.align		32
+ENTRY(aes_sparc64_load_encrypt_keys_192)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0x10], %f8
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	ldd		[%o0 + 0x30], %f16
+	ldd		[%o0 + 0x38], %f18
+	ldd		[%o0 + 0x40], %f20
+	ldd		[%o0 + 0x48], %f22
+	ldd		[%o0 + 0x50], %f24
+	ldd		[%o0 + 0x58], %f26
+	ldd		[%o0 + 0x60], %f28
+	ldd		[%o0 + 0x68], %f30
+	ldd		[%o0 + 0x70], %f32
+	ldd		[%o0 + 0x78], %f34
+	ldd		[%o0 + 0x80], %f36
+	ldd		[%o0 + 0x88], %f38
+	ldd		[%o0 + 0x90], %f40
+	ldd		[%o0 + 0x98], %f42
+	ldd		[%o0 + 0xa0], %f44
+	ldd		[%o0 + 0xa8], %f46
+	ldd		[%o0 + 0xb0], %f48
+	ldd		[%o0 + 0xb8], %f50
+	ldd		[%o0 + 0xc0], %f52
+	retl
+	 ldd		[%o0 + 0xc8], %f54
+ENDPROC(aes_sparc64_load_encrypt_keys_192)
+
+	.align		32
+ENTRY(aes_sparc64_load_encrypt_keys_256)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0x10], %f8
+	ldd		[%o0 + 0x18], %f10
+	ldd		[%o0 + 0x20], %f12
+	ldd		[%o0 + 0x28], %f14
+	ldd		[%o0 + 0x30], %f16
+	ldd		[%o0 + 0x38], %f18
+	ldd		[%o0 + 0x40], %f20
+	ldd		[%o0 + 0x48], %f22
+	ldd		[%o0 + 0x50], %f24
+	ldd		[%o0 + 0x58], %f26
+	ldd		[%o0 + 0x60], %f28
+	ldd		[%o0 + 0x68], %f30
+	ldd		[%o0 + 0x70], %f32
+	ldd		[%o0 + 0x78], %f34
+	ldd		[%o0 + 0x80], %f36
+	ldd		[%o0 + 0x88], %f38
+	ldd		[%o0 + 0x90], %f40
+	ldd		[%o0 + 0x98], %f42
+	ldd		[%o0 + 0xa0], %f44
+	ldd		[%o0 + 0xa8], %f46
+	ldd		[%o0 + 0xb0], %f48
+	ldd		[%o0 + 0xb8], %f50
+	ldd		[%o0 + 0xc0], %f52
+	ldd		[%o0 + 0xc8], %f54
+	ldd		[%o0 + 0xd0], %f56
+	ldd		[%o0 + 0xd8], %f58
+	ldd		[%o0 + 0xe0], %f60
+	retl
+	 ldd		[%o0 + 0xe8], %f62
+ENDPROC(aes_sparc64_load_encrypt_keys_256)
+
+	.align		32
+ENTRY(aes_sparc64_load_decrypt_keys_128)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0x98], %f8
+	ldd		[%o0 + 0x90], %f10
+	ldd		[%o0 + 0x88], %f12
+	ldd		[%o0 + 0x80], %f14
+	ldd		[%o0 + 0x78], %f16
+	ldd		[%o0 + 0x70], %f18
+	ldd		[%o0 + 0x68], %f20
+	ldd		[%o0 + 0x60], %f22
+	ldd		[%o0 + 0x58], %f24
+	ldd		[%o0 + 0x50], %f26
+	ldd		[%o0 + 0x48], %f28
+	ldd		[%o0 + 0x40], %f30
+	ldd		[%o0 + 0x38], %f32
+	ldd		[%o0 + 0x30], %f34
+	ldd		[%o0 + 0x28], %f36
+	ldd		[%o0 + 0x20], %f38
+	ldd		[%o0 + 0x18], %f40
+	ldd		[%o0 + 0x10], %f42
+	ldd		[%o0 + 0x08], %f44
+	retl
+	 ldd		[%o0 + 0x00], %f46
+ENDPROC(aes_sparc64_load_decrypt_keys_128)
+
+	.align		32
+ENTRY(aes_sparc64_load_decrypt_keys_192)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0xb8], %f8
+	ldd		[%o0 + 0xb0], %f10
+	ldd		[%o0 + 0xa8], %f12
+	ldd		[%o0 + 0xa0], %f14
+	ldd		[%o0 + 0x98], %f16
+	ldd		[%o0 + 0x90], %f18
+	ldd		[%o0 + 0x88], %f20
+	ldd		[%o0 + 0x80], %f22
+	ldd		[%o0 + 0x78], %f24
+	ldd		[%o0 + 0x70], %f26
+	ldd		[%o0 + 0x68], %f28
+	ldd		[%o0 + 0x60], %f30
+	ldd		[%o0 + 0x58], %f32
+	ldd		[%o0 + 0x50], %f34
+	ldd		[%o0 + 0x48], %f36
+	ldd		[%o0 + 0x40], %f38
+	ldd		[%o0 + 0x38], %f40
+	ldd		[%o0 + 0x30], %f42
+	ldd		[%o0 + 0x28], %f44
+	ldd		[%o0 + 0x20], %f46
+	ldd		[%o0 + 0x18], %f48
+	ldd		[%o0 + 0x10], %f50
+	ldd		[%o0 + 0x08], %f52
+	retl
+	 ldd		[%o0 + 0x00], %f54
+ENDPROC(aes_sparc64_load_decrypt_keys_192)
+
+	.align		32
+ENTRY(aes_sparc64_load_decrypt_keys_256)
+	/* %o0=key */
+	VISEntry
+	ldd		[%o0 + 0xd8], %f8
+	ldd		[%o0 + 0xd0], %f10
+	ldd		[%o0 + 0xc8], %f12
+	ldd		[%o0 + 0xc0], %f14
+	ldd		[%o0 + 0xb8], %f16
+	ldd		[%o0 + 0xb0], %f18
+	ldd		[%o0 + 0xa8], %f20
+	ldd		[%o0 + 0xa0], %f22
+	ldd		[%o0 + 0x98], %f24
+	ldd		[%o0 + 0x90], %f26
+	ldd		[%o0 + 0x88], %f28
+	ldd		[%o0 + 0x80], %f30
+	ldd		[%o0 + 0x78], %f32
+	ldd		[%o0 + 0x70], %f34
+	ldd		[%o0 + 0x68], %f36
+	ldd		[%o0 + 0x60], %f38
+	ldd		[%o0 + 0x58], %f40
+	ldd		[%o0 + 0x50], %f42
+	ldd		[%o0 + 0x48], %f44
+	ldd		[%o0 + 0x40], %f46
+	ldd		[%o0 + 0x38], %f48
+	ldd		[%o0 + 0x30], %f50
+	ldd		[%o0 + 0x28], %f52
+	ldd		[%o0 + 0x20], %f54
+	ldd		[%o0 + 0x18], %f56
+	ldd		[%o0 + 0x10], %f58
+	ldd		[%o0 + 0x08], %f60
+	retl
+	 ldd		[%o0 + 0x00], %f62
+ENDPROC(aes_sparc64_load_decrypt_keys_256)
+
+	.align		32
+ENTRY(aes_sparc64_ecb_encrypt_128)
+	/* %o0=key, %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 + 0x00], %g1
+	subcc		%o3, 0x10, %o3
+	be		10f
+	 ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	ldx		[%o1 + 0x10], %o4
+	ldx		[%o1 + 0x18], %o5
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	xor		%g1, %o4, %g3
+	xor		%g2, %o5, %g7
+	MOVXTOD_G3_F60
+	MOVXTOD_G7_F62
+	ENCRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	std		%f60, [%o2 + 0x10]
+	std		%f62, [%o2 + 0x18]
+	sub		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	ENCRYPT_128(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	retl
+	 nop
+ENDPROC(aes_sparc64_ecb_encrypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_ecb_encrypt_192)
+	/* %o0=key, %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 + 0x00], %g1
+	subcc		%o3, 0x10, %o3
+	be		10f
+	 ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	ldx		[%o1 + 0x10], %o4
+	ldx		[%o1 + 0x18], %o5
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	xor		%g1, %o4, %g3
+	xor		%g2, %o5, %g7
+	MOVXTOD_G3_F60
+	MOVXTOD_G7_F62
+	ENCRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	std		%f60, [%o2 + 0x10]
+	std		%f62, [%o2 + 0x18]
+	sub		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	ENCRYPT_192(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	retl
+	 nop
+ENDPROC(aes_sparc64_ecb_encrypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_ecb_encrypt_256)
+	/* %o0=key, %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 + 0x00], %g1
+	subcc		%o3, 0x10, %o3
+	be		10f
+	 ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	ldx		[%o1 + 0x10], %o4
+	ldx		[%o1 + 0x18], %o5
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	xor		%g1, %o4, %g3
+	xor		%g2, %o5, %g7
+	MOVXTOD_G3_F0
+	MOVXTOD_G7_F2
+	ENCRYPT_256_2(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	std		%f0, [%o2 + 0x10]
+	std		%f2, [%o2 + 0x18]
+	sub		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	ldd		[%o0 + 0xd0], %f56
+	ldd		[%o0 + 0xd8], %f58
+	ldd		[%o0 + 0xe0], %f60
+	ldd		[%o0 + 0xe8], %f62
+	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	ENCRYPT_256(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	retl
+	 nop
+ENDPROC(aes_sparc64_ecb_encrypt_256)
+
+	.align		32
+ENTRY(aes_sparc64_ecb_decrypt_128)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 - 0x10], %g1
+	subcc		%o3, 0x10, %o3
+	be		10f
+	 ldx		[%o0 - 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	ldx		[%o1 + 0x10], %o4
+	ldx		[%o1 + 0x18], %o5
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	xor		%g1, %o4, %g3
+	xor		%g2, %o5, %g7
+	MOVXTOD_G3_F60
+	MOVXTOD_G7_F62
+	DECRYPT_128_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	std		%f60, [%o2 + 0x10]
+	std		%f62, [%o2 + 0x18]
+	sub		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz,pt		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	DECRYPT_128(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	retl
+	 nop
+ENDPROC(aes_sparc64_ecb_decrypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_ecb_decrypt_192)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 - 0x10], %g1
+	subcc		%o3, 0x10, %o3
+	be		10f
+	 ldx		[%o0 - 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	ldx		[%o1 + 0x10], %o4
+	ldx		[%o1 + 0x18], %o5
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	xor		%g1, %o4, %g3
+	xor		%g2, %o5, %g7
+	MOVXTOD_G3_F60
+	MOVXTOD_G7_F62
+	DECRYPT_192_2(8, 4, 6, 60, 62, 0, 2, 56, 58)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	std		%f60, [%o2 + 0x10]
+	std		%f62, [%o2 + 0x18]
+	sub		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz,pt		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	DECRYPT_192(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	retl
+	 nop
+ENDPROC(aes_sparc64_ecb_decrypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_ecb_decrypt_256)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len */
+	ldx		[%o0 - 0x10], %g1
+	subcc		%o3, 0x10, %o3
+	ldx		[%o0 - 0x08], %g2
+	be		10f
+	 sub		%o0, 0xf0, %o0
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	ldx		[%o1 + 0x10], %o4
+	ldx		[%o1 + 0x18], %o5
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	xor		%g1, %o4, %g3
+	xor		%g2, %o5, %g7
+	MOVXTOD_G3_F0
+	MOVXTOD_G7_F2
+	DECRYPT_256_2(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	std		%f0, [%o2 + 0x10]
+	std		%f2, [%o2 + 0x18]
+	sub		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz,pt		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	ldd		[%o0 + 0x18], %f56
+	ldd		[%o0 + 0x10], %f58
+	ldd		[%o0 + 0x08], %f60
+	ldd		[%o0 + 0x00], %f62
+	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	DECRYPT_256(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	retl
+	 nop
+ENDPROC(aes_sparc64_ecb_decrypt_256)
+
+	.align		32
+ENTRY(aes_sparc64_cbc_encrypt_128)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd		[%o4 + 0x00], %f4
+	ldd		[%o4 + 0x08], %f6
+	ldx		[%o0 + 0x00], %g1
+	ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F0
+	MOVXTOD_G7_F2
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	ENCRYPT_128(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	std		%f4, [%o4 + 0x00]
+	std		%f6, [%o4 + 0x08]
+	retl
+	 nop
+ENDPROC(aes_sparc64_cbc_encrypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_cbc_encrypt_192)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd		[%o4 + 0x00], %f4
+	ldd		[%o4 + 0x08], %f6
+	ldx		[%o0 + 0x00], %g1
+	ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F0
+	MOVXTOD_G7_F2
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	ENCRYPT_192(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	std		%f4, [%o4 + 0x00]
+	std		%f6, [%o4 + 0x08]
+	retl
+	 nop
+ENDPROC(aes_sparc64_cbc_encrypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_cbc_encrypt_256)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd		[%o4 + 0x00], %f4
+	ldd		[%o4 + 0x08], %f6
+	ldx		[%o0 + 0x00], %g1
+	ldx		[%o0 + 0x08], %g2
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F0
+	MOVXTOD_G7_F2
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	ENCRYPT_256(8, 4, 6, 0, 2)
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	std		%f4, [%o4 + 0x00]
+	std		%f6, [%o4 + 0x08]
+	retl
+	 nop
+ENDPROC(aes_sparc64_cbc_encrypt_256)
+
+	.align		32
+ENTRY(aes_sparc64_cbc_decrypt_128)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+	ldx		[%o0 - 0x10], %g1
+	ldx		[%o0 - 0x08], %g2
+	ldx		[%o4 + 0x00], %o0
+	ldx		[%o4 + 0x08], %o5
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	DECRYPT_128(8, 4, 6, 0, 2)
+	MOVXTOD_O0_F0
+	MOVXTOD_O5_F2
+	xor		%g1, %g3, %o0
+	xor		%g2, %g7, %o5
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	stx		%o0, [%o4 + 0x00]
+	stx		%o5, [%o4 + 0x08]
+	retl
+	 nop
+ENDPROC(aes_sparc64_cbc_decrypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_cbc_decrypt_192)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+	ldx		[%o0 - 0x10], %g1
+	ldx		[%o0 - 0x08], %g2
+	ldx		[%o4 + 0x00], %o0
+	ldx		[%o4 + 0x08], %o5
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	DECRYPT_192(8, 4, 6, 0, 2)
+	MOVXTOD_O0_F0
+	MOVXTOD_O5_F2
+	xor		%g1, %g3, %o0
+	xor		%g2, %g7, %o5
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	stx		%o0, [%o4 + 0x00]
+	stx		%o5, [%o4 + 0x08]
+	retl
+	 nop
+ENDPROC(aes_sparc64_cbc_decrypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_cbc_decrypt_256)
+	/* %o0=&key[key_len], %o1=input, %o2=output, %o3=len, %o4=iv */
+	ldx		[%o0 - 0x10], %g1
+	ldx		[%o0 - 0x08], %g2
+	ldx		[%o4 + 0x00], %o0
+	ldx		[%o4 + 0x08], %o5
+1:	ldx		[%o1 + 0x00], %g3
+	ldx		[%o1 + 0x08], %g7
+	add		%o1, 0x10, %o1
+	xor		%g1, %g3, %g3
+	xor		%g2, %g7, %g7
+	MOVXTOD_G3_F4
+	MOVXTOD_G7_F6
+	DECRYPT_256(8, 4, 6, 0, 2)
+	MOVXTOD_O0_F0
+	MOVXTOD_O5_F2
+	xor		%g1, %g3, %o0
+	xor		%g2, %g7, %o5
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+	subcc		%o3, 0x10, %o3
+	bne,pt		%xcc, 1b
+	 add		%o2, 0x10, %o2
+	stx		%o0, [%o4 + 0x00]
+	stx		%o5, [%o4 + 0x08]
+	retl
+	 nop
+ENDPROC(aes_sparc64_cbc_decrypt_256)
+
+	.align		32
+ENTRY(aes_sparc64_ctr_crypt_128)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldx		[%o4 + 0x00], %g3
+	ldx		[%o4 + 0x08], %g7
+	subcc		%o3, 0x10, %o3
+	ldx		[%o0 + 0x00], %g1
+	be		10f
+	 ldx		[%o0 + 0x08], %g2
+1:	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F0
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F2
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F4
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F6
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	ENCRYPT_128_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
+	ldd		[%o1 + 0x00], %f56
+	ldd		[%o1 + 0x08], %f58
+	ldd		[%o1 + 0x10], %f60
+	ldd		[%o1 + 0x18], %f62
+	fxor		%f56, %f0, %f56
+	fxor		%f58, %f2, %f58
+	fxor		%f60, %f4, %f60
+	fxor		%f62, %f6, %f62
+	std		%f56, [%o2 + 0x00]
+	std		%f58, [%o2 + 0x08]
+	std		%f60, [%o2 + 0x10]
+	std		%f62, [%o2 + 0x18]
+	subcc		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F0
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F2
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	ENCRYPT_128(8, 0, 2, 4, 6)
+	ldd		[%o1 + 0x00], %f4
+	ldd		[%o1 + 0x08], %f6
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	stx		%g3, [%o4 + 0x00]
+	retl
+	 stx		%g7, [%o4 + 0x08]
+ENDPROC(aes_sparc64_ctr_crypt_128)
+
+	.align		32
+ENTRY(aes_sparc64_ctr_crypt_192)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldx		[%o4 + 0x00], %g3
+	ldx		[%o4 + 0x08], %g7
+	subcc		%o3, 0x10, %o3
+	ldx		[%o0 + 0x00], %g1
+	be		10f
+	 ldx		[%o0 + 0x08], %g2
+1:	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F0
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F2
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F4
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F6
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	ENCRYPT_192_2(8, 0, 2, 4, 6, 56, 58, 60, 62)
+	ldd		[%o1 + 0x00], %f56
+	ldd		[%o1 + 0x08], %f58
+	ldd		[%o1 + 0x10], %f60
+	ldd		[%o1 + 0x18], %f62
+	fxor		%f56, %f0, %f56
+	fxor		%f58, %f2, %f58
+	fxor		%f60, %f4, %f60
+	fxor		%f62, %f6, %f62
+	std		%f56, [%o2 + 0x00]
+	std		%f58, [%o2 + 0x08]
+	std		%f60, [%o2 + 0x10]
+	std		%f62, [%o2 + 0x18]
+	subcc		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F0
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F2
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	ENCRYPT_192(8, 0, 2, 4, 6)
+	ldd		[%o1 + 0x00], %f4
+	ldd		[%o1 + 0x08], %f6
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	stx		%g3, [%o4 + 0x00]
+	retl
+	 stx		%g7, [%o4 + 0x08]
+ENDPROC(aes_sparc64_ctr_crypt_192)
+
+	.align		32
+ENTRY(aes_sparc64_ctr_crypt_256)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldx		[%o4 + 0x00], %g3
+	ldx		[%o4 + 0x08], %g7
+	subcc		%o3, 0x10, %o3
+	ldx		[%o0 + 0x00], %g1
+	be		10f
+	 ldx		[%o0 + 0x08], %g2
+1:	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F0
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F2
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F4
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F6
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	ENCRYPT_256_2(8, 0, 2, 4, 6)
+	ldd		[%o1 + 0x00], %f56
+	ldd		[%o1 + 0x08], %f58
+	ldd		[%o1 + 0x10], %f60
+	ldd		[%o1 + 0x18], %f62
+	fxor		%f56, %f0, %f56
+	fxor		%f58, %f2, %f58
+	fxor		%f60, %f4, %f60
+	fxor		%f62, %f6, %f62
+	std		%f56, [%o2 + 0x00]
+	std		%f58, [%o2 + 0x08]
+	std		%f60, [%o2 + 0x10]
+	std		%f62, [%o2 + 0x18]
+	subcc		%o3, 0x20, %o3
+	add		%o1, 0x20, %o1
+	brgz		%o3, 1b
+	 add		%o2, 0x20, %o2
+	brlz,pt		%o3, 11f
+	 nop
+10:	ldd		[%o0 + 0xd0], %f56
+	ldd		[%o0 + 0xd8], %f58
+	ldd		[%o0 + 0xe0], %f60
+	ldd		[%o0 + 0xe8], %f62
+	xor		%g1, %g3, %o5
+	MOVXTOD_O5_F0
+	xor		%g2, %g7, %o5
+	MOVXTOD_O5_F2
+	add		%g7, 1, %g7
+	add		%g3, 1, %o5
+	movrz		%g7, %o5, %g3
+	ENCRYPT_256(8, 0, 2, 4, 6)
+	ldd		[%o1 + 0x00], %f4
+	ldd		[%o1 + 0x08], %f6
+	fxor		%f4, %f0, %f4
+	fxor		%f6, %f2, %f6
+	std		%f4, [%o2 + 0x00]
+	std		%f6, [%o2 + 0x08]
+11:	stx		%g3, [%o4 + 0x00]
+	retl
+	 stx		%g7, [%o4 + 0x08]
+ENDPROC(aes_sparc64_ctr_crypt_256)
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
new file mode 100644
index 0000000..3cd4f6b
--- /dev/null
+++ b/arch/sparc/crypto/aes_glue.c
@@ -0,0 +1,505 @@
+/* Glue code for AES encryption optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/aesni-intel_glue.c
+ *
+ * Copyright (C) 2008, Intel Corp.
+ *    Author: Huang Ying <ying.huang@intel.com>
+ *
+ * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
+ * interface for 64-bit kernels.
+ *    Authors: Adrian Hoban <adrian.hoban@intel.com>
+ *             Gabriele Paoloni <gabriele.paoloni@intel.com>
+ *             Tadeusz Struk (tadeusz.struk@intel.com)
+ *             Aidan O'Mahony (aidan.o.mahony@intel.com)
+ *    Copyright (c) 2010, Intel Corporation.
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+
+#include <asm/fpumacro.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+struct aes_ops {
+	void (*encrypt)(const u64 *key, const u32 *input, u32 *output);
+	void (*decrypt)(const u64 *key, const u32 *input, u32 *output);
+	void (*load_encrypt_keys)(const u64 *key);
+	void (*load_decrypt_keys)(const u64 *key);
+	void (*ecb_encrypt)(const u64 *key, const u64 *input, u64 *output,
+			    unsigned int len);
+	void (*ecb_decrypt)(const u64 *key, const u64 *input, u64 *output,
+			    unsigned int len);
+	void (*cbc_encrypt)(const u64 *key, const u64 *input, u64 *output,
+			    unsigned int len, u64 *iv);
+	void (*cbc_decrypt)(const u64 *key, const u64 *input, u64 *output,
+			    unsigned int len, u64 *iv);
+	void (*ctr_crypt)(const u64 *key, const u64 *input, u64 *output,
+			  unsigned int len, u64 *iv);
+};
+
+struct crypto_sparc64_aes_ctx {
+	struct aes_ops *ops;
+	u64 key[AES_MAX_KEYLENGTH / sizeof(u64)];
+	u32 key_length;
+	u32 expanded_key_length;
+};
+
+extern void aes_sparc64_encrypt_128(const u64 *key, const u32 *input,
+				    u32 *output);
+extern void aes_sparc64_encrypt_192(const u64 *key, const u32 *input,
+				    u32 *output);
+extern void aes_sparc64_encrypt_256(const u64 *key, const u32 *input,
+				    u32 *output);
+
+extern void aes_sparc64_decrypt_128(const u64 *key, const u32 *input,
+				    u32 *output);
+extern void aes_sparc64_decrypt_192(const u64 *key, const u32 *input,
+				    u32 *output);
+extern void aes_sparc64_decrypt_256(const u64 *key, const u32 *input,
+				    u32 *output);
+
+extern void aes_sparc64_load_encrypt_keys_128(const u64 *key);
+extern void aes_sparc64_load_encrypt_keys_192(const u64 *key);
+extern void aes_sparc64_load_encrypt_keys_256(const u64 *key);
+
+extern void aes_sparc64_load_decrypt_keys_128(const u64 *key);
+extern void aes_sparc64_load_decrypt_keys_192(const u64 *key);
+extern void aes_sparc64_load_decrypt_keys_256(const u64 *key);
+
+extern void aes_sparc64_ecb_encrypt_128(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_encrypt_192(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_encrypt_256(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len);
+
+extern void aes_sparc64_ecb_decrypt_128(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_decrypt_192(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len);
+extern void aes_sparc64_ecb_decrypt_256(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len);
+
+extern void aes_sparc64_cbc_encrypt_128(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len,
+					u64 *iv);
+
+extern void aes_sparc64_cbc_encrypt_192(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len,
+					u64 *iv);
+
+extern void aes_sparc64_cbc_encrypt_256(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len,
+					u64 *iv);
+
+extern void aes_sparc64_cbc_decrypt_128(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len,
+					u64 *iv);
+
+extern void aes_sparc64_cbc_decrypt_192(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len,
+					u64 *iv);
+
+extern void aes_sparc64_cbc_decrypt_256(const u64 *key, const u64 *input,
+					u64 *output, unsigned int len,
+					u64 *iv);
+
+extern void aes_sparc64_ctr_crypt_128(const u64 *key, const u64 *input,
+				      u64 *output, unsigned int len,
+				      u64 *iv);
+extern void aes_sparc64_ctr_crypt_192(const u64 *key, const u64 *input,
+				      u64 *output, unsigned int len,
+				      u64 *iv);
+extern void aes_sparc64_ctr_crypt_256(const u64 *key, const u64 *input,
+				      u64 *output, unsigned int len,
+				      u64 *iv);
+
+static struct aes_ops aes128_ops = {
+	.encrypt		= aes_sparc64_encrypt_128,
+	.decrypt		= aes_sparc64_decrypt_128,
+	.load_encrypt_keys	= aes_sparc64_load_encrypt_keys_128,
+	.load_decrypt_keys	= aes_sparc64_load_decrypt_keys_128,
+	.ecb_encrypt		= aes_sparc64_ecb_encrypt_128,
+	.ecb_decrypt		= aes_sparc64_ecb_decrypt_128,
+	.cbc_encrypt		= aes_sparc64_cbc_encrypt_128,
+	.cbc_decrypt		= aes_sparc64_cbc_decrypt_128,
+	.ctr_crypt		= aes_sparc64_ctr_crypt_128,
+};
+
+static struct aes_ops aes192_ops = {
+	.encrypt		= aes_sparc64_encrypt_192,
+	.decrypt		= aes_sparc64_decrypt_192,
+	.load_encrypt_keys	= aes_sparc64_load_encrypt_keys_192,
+	.load_decrypt_keys	= aes_sparc64_load_decrypt_keys_192,
+	.ecb_encrypt		= aes_sparc64_ecb_encrypt_192,
+	.ecb_decrypt		= aes_sparc64_ecb_decrypt_192,
+	.cbc_encrypt		= aes_sparc64_cbc_encrypt_192,
+	.cbc_decrypt		= aes_sparc64_cbc_decrypt_192,
+	.ctr_crypt		= aes_sparc64_ctr_crypt_192,
+};
+
+static struct aes_ops aes256_ops = {
+	.encrypt		= aes_sparc64_encrypt_256,
+	.decrypt		= aes_sparc64_decrypt_256,
+	.load_encrypt_keys	= aes_sparc64_load_encrypt_keys_256,
+	.load_decrypt_keys	= aes_sparc64_load_decrypt_keys_256,
+	.ecb_encrypt		= aes_sparc64_ecb_encrypt_256,
+	.ecb_decrypt		= aes_sparc64_ecb_decrypt_256,
+	.cbc_encrypt		= aes_sparc64_cbc_encrypt_256,
+	.cbc_decrypt		= aes_sparc64_cbc_decrypt_256,
+	.ctr_crypt		= aes_sparc64_ctr_crypt_256,
+};
+
+extern void aes_sparc64_key_expand(const u32 *in_key, u64 *output_key,
+				   unsigned int key_len);
+
+static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+		       unsigned int key_len)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->expanded_key_length = 0xb0;
+		ctx->ops = &aes128_ops;
+		break;
+
+	case AES_KEYSIZE_192:
+		ctx->expanded_key_length = 0xd0;
+		ctx->ops = &aes192_ops;
+		break;
+
+	case AES_KEYSIZE_256:
+		ctx->expanded_key_length = 0xf0;
+		ctx->ops = &aes256_ops;
+		break;
+
+	default:
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	aes_sparc64_key_expand((const u32 *)in_key, &ctx->key[0], key_len);
+	ctx->key_length = key_len;
+
+	return 0;
+}
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
+}
+
+#define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	ctx->ops->load_encrypt_keys(&ctx->key[0]);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			ctx->ops->ecb_encrypt(&ctx->key[0],
+					      (const u64 *)walk.src.virt.addr,
+					      (u64 *) walk.dst.virt.addr,
+					      block_len);
+		}
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	u64 *key_end;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	ctx->ops->load_decrypt_keys(&ctx->key[0]);
+	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			ctx->ops->ecb_decrypt(key_end,
+					      (const u64 *) walk.src.virt.addr,
+					      (u64 *) walk.dst.virt.addr, block_len);
+		}
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+
+	return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	ctx->ops->load_encrypt_keys(&ctx->key[0]);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			ctx->ops->cbc_encrypt(&ctx->key[0],
+					      (const u64 *)walk.src.virt.addr,
+					      (u64 *) walk.dst.virt.addr,
+					      block_len, (u64 *) walk.iv);
+		}
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	u64 *key_end;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	ctx->ops->load_decrypt_keys(&ctx->key[0]);
+	key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			ctx->ops->cbc_decrypt(key_end,
+					      (const u64 *) walk.src.virt.addr,
+					      (u64 *) walk.dst.virt.addr,
+					      block_len, (u64 *) walk.iv);
+		}
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+
+	return err;
+}
+
+static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
+			    struct blkcipher_walk *walk)
+{
+	u8 *ctrblk = walk->iv;
+	u64 keystream[AES_BLOCK_SIZE / sizeof(u64)];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	ctx->ops->ecb_encrypt(&ctx->key[0], (const u64 *)ctrblk,
+			      keystream, AES_BLOCK_SIZE);
+	crypto_xor_cpy(dst, (u8 *) keystream, src, nbytes);
+	crypto_inc(ctrblk, AES_BLOCK_SIZE);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc,
+		     struct scatterlist *dst, struct scatterlist *src,
+		     unsigned int nbytes)
+{
+	struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	ctx->ops->load_encrypt_keys(&ctx->key[0]);
+	while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+		unsigned int block_len = nbytes & AES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			ctx->ops->ctr_crypt(&ctx->key[0],
+					    (const u64 *)walk.src.virt.addr,
+					    (u64 *) walk.dst.virt.addr,
+					    block_len, (u64 *) walk.iv);
+		}
+		nbytes &= AES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	if (walk.nbytes) {
+		ctr_crypt_final(ctx, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static struct crypto_alg algs[] = { {
+	.cra_name		= "aes",
+	.cra_driver_name	= "aes-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
+	.cra_alignmask		= 3,
+	.cra_module		= THIS_MODULE,
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= AES_MIN_KEY_SIZE,
+			.cia_max_keysize	= AES_MAX_KEY_SIZE,
+			.cia_setkey		= aes_set_key,
+			.cia_encrypt		= aes_encrypt,
+			.cia_decrypt		= aes_decrypt
+		}
+	}
+}, {
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "ecb-aes-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.setkey		= aes_set_key,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "cbc-aes-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= aes_set_key,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "ctr-aes-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct crypto_sparc64_aes_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= aes_set_key,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+} };
+
+static bool __init sparc64_has_aes_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_AES))
+		return false;
+
+	return true;
+}
+
+static int __init aes_sparc64_mod_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(algs); i++)
+		INIT_LIST_HEAD(&algs[i].cra_list);
+
+	if (sparc64_has_aes_opcode()) {
+		pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
+		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	}
+	pr_info("sparc64 aes opcodes not available.\n");
+	return -ENODEV;
+}
+
+static void __exit aes_sparc64_mod_fini(void)
+{
+	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(aes_sparc64_mod_init);
+module_exit(aes_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, sparc64 aes opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("aes");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/camellia_asm.S b/arch/sparc/crypto/camellia_asm.S
new file mode 100644
index 0000000..dcdc919
--- /dev/null
+++ b/arch/sparc/crypto/camellia_asm.S
@@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+#define CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
+	CAMELLIA_F(KEY_BASE +  0, I1, I0, I1) \
+	CAMELLIA_F(KEY_BASE +  2, I0, I1, I0) \
+	CAMELLIA_F(KEY_BASE +  4, I1, I0, I1) \
+	CAMELLIA_F(KEY_BASE +  6, I0, I1, I0) \
+	CAMELLIA_F(KEY_BASE +  8, I1, I0, I1) \
+	CAMELLIA_F(KEY_BASE + 10, I0, I1, I0)
+
+#define CAMELLIA_6ROUNDS_FL_FLI(KEY_BASE, I0, I1) \
+	CAMELLIA_6ROUNDS(KEY_BASE, I0, I1) \
+	CAMELLIA_FL(KEY_BASE + 12, I0, I0) \
+	CAMELLIA_FLI(KEY_BASE + 14, I1, I1)
+
+	.data
+
+	.align	8
+SIGMA:	.xword	0xA09E667F3BCC908B
+	.xword	0xB67AE8584CAA73B2
+	.xword	0xC6EF372FE94F82BE
+	.xword	0x54FF53A5F1D36F1C
+	.xword	0x10E527FADE682D1D
+	.xword	0xB05688C2B3E6C1FD
+
+	.text
+
+	.align	32
+ENTRY(camellia_sparc64_key_expand)
+	/* %o0=in_key, %o1=encrypt_key, %o2=key_len, %o3=decrypt_key */
+	VISEntry
+	ld	[%o0 + 0x00], %f0	! i0, k[0]
+	ld	[%o0 + 0x04], %f1	! i1, k[1]
+	ld	[%o0 + 0x08], %f2	! i2, k[2]
+	ld	[%o0 + 0x0c], %f3	! i3, k[3]
+	std	%f0, [%o1 + 0x00]	! k[0, 1]
+	fsrc2	%f0, %f28
+	std	%f2, [%o1 + 0x08]	! k[2, 3]
+	cmp	%o2, 16
+	be	10f
+	 fsrc2	%f2, %f30
+
+	ld	[%o0 + 0x10], %f0
+	ld	[%o0 + 0x14], %f1
+	std	%f0, [%o1 + 0x20]	! k[8, 9]
+	cmp	%o2, 24
+	fone	%f10
+	be,a	1f
+	 fxor	%f10, %f0, %f2
+	ld	[%o0 + 0x18], %f2
+	ld	[%o0 + 0x1c], %f3
+1:
+	std	%f2, [%o1 + 0x28]	! k[10, 11]
+	fxor	%f28, %f0, %f0
+	fxor	%f30, %f2, %f2
+
+10:
+	sethi	%hi(SIGMA), %g3
+	or	%g3, %lo(SIGMA), %g3
+	ldd	[%g3 + 0x00], %f16
+	ldd	[%g3 + 0x08], %f18
+	ldd	[%g3 + 0x10], %f20
+	ldd	[%g3 + 0x18], %f22
+	ldd	[%g3 + 0x20], %f24
+	ldd	[%g3 + 0x28], %f26
+	CAMELLIA_F(16, 2, 0, 2)
+	CAMELLIA_F(18, 0, 2, 0)
+	fxor	%f28, %f0, %f0
+	fxor	%f30, %f2, %f2
+	CAMELLIA_F(20, 2, 0, 2)
+	CAMELLIA_F(22, 0, 2, 0)
+
+#define ROTL128(S01, S23, TMP1, TMP2, N)	\
+	srlx	S01, (64 - N), TMP1;		\
+	sllx	S01, N, S01;			\
+	srlx	S23, (64 - N), TMP2;		\
+	sllx	S23, N, S23;			\
+	or	S01, TMP2, S01;			\
+	or	S23, TMP1, S23
+
+	cmp	%o2, 16
+	bne	1f
+	 nop
+	/* 128-bit key */
+	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
+	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
+	MOVDTOX_F0_O4
+	MOVDTOX_F2_O5
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x30]	! k[12, 13]
+	stx	%o5, [%o1 + 0x38]	! k[14, 15]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x40]	! k[16, 17]
+	stx	%o5, [%o1 + 0x48]	! k[18, 19]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x60]	! k[24, 25]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x70]	! k[28, 29]
+	stx	%o5, [%o1 + 0x78]	! k[30, 31]
+	ROTL128(%o4, %o5, %g2, %g3, 34)
+	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
+	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
+	ROTL128(%o4, %o5, %g2, %g3, 17)
+	stx	%o4, [%o1 + 0xc0]	! k[48, 49]
+	stx	%o5, [%o1 + 0xc8]	! k[50, 51]
+
+	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
+	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
+	stx	%o5, [%o1 + 0x28]	! k[10, 11]
+	ROTL128(%o4, %o5, %g2, %g3, 30)
+	stx	%o4, [%o1 + 0x50]	! k[20, 21]
+	stx	%o5, [%o1 + 0x58]	! k[22, 23]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o5, [%o1 + 0x68]	! k[26, 27]
+	ROTL128(%o4, %o5, %g2, %g3, 17)
+	stx	%o4, [%o1 + 0x80]	! k[32, 33]
+	stx	%o5, [%o1 + 0x88]	! k[34, 35]
+	ROTL128(%o4, %o5, %g2, %g3, 17)
+	stx	%o4, [%o1 + 0x90]	! k[36, 37]
+	stx	%o5, [%o1 + 0x98]	! k[38, 39]
+	ROTL128(%o4, %o5, %g2, %g3, 17)
+	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
+	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
+
+	ba,pt	%xcc, 2f
+	 mov	(3 * 16 * 4), %o0
+
+1:
+	/* 192-bit or 256-bit key */
+	std	%f0, [%o1 + 0x30]	! k[12, 13]
+	std	%f2, [%o1 + 0x38]	! k[14, 15]
+	ldd	[%o1 + 0x20], %f4	! k[ 8,  9]
+	ldd	[%o1 + 0x28], %f6	! k[10, 11]
+	fxor	%f0, %f4, %f0
+	fxor	%f2, %f6, %f2
+	CAMELLIA_F(24, 2, 0, 2)
+	CAMELLIA_F(26, 0, 2, 0)
+	std	%f0, [%o1 + 0x10]	! k[ 4,  5]
+	std	%f2, [%o1 + 0x18]	! k[ 6,  7]
+	MOVDTOX_F0_O4
+	MOVDTOX_F2_O5
+	ROTL128(%o4, %o5, %g2, %g3, 30)
+	stx	%o4, [%o1 + 0x50]	! k[20, 21]
+	stx	%o5, [%o1 + 0x58]	! k[22, 23]
+	ROTL128(%o4, %o5, %g2, %g3, 30)
+	stx	%o4, [%o1 + 0xa0]	! k[40, 41]
+	stx	%o5, [%o1 + 0xa8]	! k[42, 43]
+	ROTL128(%o4, %o5, %g2, %g3, 51)
+	stx	%o4, [%o1 + 0x100]	! k[64, 65]
+	stx	%o5, [%o1 + 0x108]	! k[66, 67]
+	ldx	[%o1 + 0x20], %o4	! k[ 8,  9]
+	ldx	[%o1 + 0x28], %o5	! k[10, 11]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x20]	! k[ 8,  9]
+	stx	%o5, [%o1 + 0x28]	! k[10, 11]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x40]	! k[16, 17]
+	stx	%o5, [%o1 + 0x48]	! k[18, 19]
+	ROTL128(%o4, %o5, %g2, %g3, 30)
+	stx	%o4, [%o1 + 0x90]	! k[36, 37]
+	stx	%o5, [%o1 + 0x98]	! k[38, 39]
+	ROTL128(%o4, %o5, %g2, %g3, 34)
+	stx	%o4, [%o1 + 0xd0]	! k[52, 53]
+	stx	%o5, [%o1 + 0xd8]	! k[54, 55]
+	ldx	[%o1 + 0x30], %o4	! k[12, 13]
+	ldx	[%o1 + 0x38], %o5	! k[14, 15]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x30]	! k[12, 13]
+	stx	%o5, [%o1 + 0x38]	! k[14, 15]
+	ROTL128(%o4, %o5, %g2, %g3, 30)
+	stx	%o4, [%o1 + 0x70]	! k[28, 29]
+	stx	%o5, [%o1 + 0x78]	! k[30, 31]
+	srlx	%o4, 32, %g2
+	srlx	%o5, 32, %g3
+	stw	%o4, [%o1 + 0xc0]	! k[48]
+	stw	%g3, [%o1 + 0xc4]	! k[49]
+	stw	%o5, [%o1 + 0xc8]	! k[50]
+	stw	%g2, [%o1 + 0xcc]	! k[51]
+	ROTL128(%o4, %o5, %g2, %g3, 49)
+	stx	%o4, [%o1 + 0xe0]	! k[56, 57]
+	stx	%o5, [%o1 + 0xe8]	! k[58, 59]
+	ldx	[%o1 + 0x00], %o4	! k[ 0,  1]
+	ldx	[%o1 + 0x08], %o5	! k[ 2,  3]
+	ROTL128(%o4, %o5, %g2, %g3, 45)
+	stx	%o4, [%o1 + 0x60]	! k[24, 25]
+	stx	%o5, [%o1 + 0x68]	! k[26, 27]
+	ROTL128(%o4, %o5, %g2, %g3, 15)
+	stx	%o4, [%o1 + 0x80]	! k[32, 33]
+	stx	%o5, [%o1 + 0x88]	! k[34, 35]
+	ROTL128(%o4, %o5, %g2, %g3, 17)
+	stx	%o4, [%o1 + 0xb0]	! k[44, 45]
+	stx	%o5, [%o1 + 0xb8]	! k[46, 47]
+	ROTL128(%o4, %o5, %g2, %g3, 34)
+	stx	%o4, [%o1 + 0xf0]	! k[60, 61]
+	stx	%o5, [%o1 + 0xf8]	! k[62, 63]
+	mov	(4 * 16 * 4), %o0
+2:
+	add	%o1, %o0, %o1
+	ldd	[%o1 + 0x00], %f0
+	ldd	[%o1 + 0x08], %f2
+	std	%f0, [%o3 + 0x00]
+	std	%f2, [%o3 + 0x08]
+	add	%o3, 0x10, %o3
+1:
+	sub	%o1, (16 * 4), %o1
+	ldd	[%o1 + 0x38], %f0
+	ldd	[%o1 + 0x30], %f2
+	ldd	[%o1 + 0x28], %f4
+	ldd	[%o1 + 0x20], %f6
+	ldd	[%o1 + 0x18], %f8
+	ldd	[%o1 + 0x10], %f10
+	std	%f0, [%o3 + 0x00]
+	std	%f2, [%o3 + 0x08]
+	std	%f4, [%o3 + 0x10]
+	std	%f6, [%o3 + 0x18]
+	std	%f8, [%o3 + 0x20]
+	std	%f10, [%o3 + 0x28]
+
+	ldd	[%o1 + 0x08], %f0
+	ldd	[%o1 + 0x00], %f2
+	std	%f0, [%o3 + 0x30]
+	std	%f2, [%o3 + 0x38]
+	subcc	%o0, (16 * 4), %o0
+	bne,pt	%icc, 1b
+	 add	%o3, (16 * 4), %o3
+
+	std	%f2, [%o3 - 0x10]
+	std	%f0, [%o3 - 0x08]
+
+	retl
+	 VISExit
+ENDPROC(camellia_sparc64_key_expand)
+
+	.align	32
+ENTRY(camellia_sparc64_crypt)
+	/* %o0=key, %o1=input, %o2=output, %o3=key_len */
+	VISEntry
+
+	ld	[%o1 + 0x00], %f0
+	ld	[%o1 + 0x04], %f1
+	ld	[%o1 + 0x08], %f2
+	ld	[%o1 + 0x0c], %f3
+
+	ldd	[%o0 + 0x00], %f4
+	ldd	[%o0 + 0x08], %f6
+
+	cmp	%o3, 16
+	fxor	%f4, %f0, %f0
+	be	1f
+	 fxor	%f6, %f2, %f2
+
+	ldd	[%o0 + 0x10], %f8
+	ldd	[%o0 + 0x18], %f10
+	ldd	[%o0 + 0x20], %f12
+	ldd	[%o0 + 0x28], %f14
+	ldd	[%o0 + 0x30], %f16
+	ldd	[%o0 + 0x38], %f18
+	ldd	[%o0 + 0x40], %f20
+	ldd	[%o0 + 0x48], %f22
+	add	%o0, 0x40, %o0
+
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+
+1:
+	ldd	[%o0 + 0x10], %f8
+	ldd	[%o0 + 0x18], %f10
+	ldd	[%o0 + 0x20], %f12
+	ldd	[%o0 + 0x28], %f14
+	ldd	[%o0 + 0x30], %f16
+	ldd	[%o0 + 0x38], %f18
+	ldd	[%o0 + 0x40], %f20
+	ldd	[%o0 + 0x48], %f22
+	ldd	[%o0 + 0x50], %f24
+	ldd	[%o0 + 0x58], %f26
+	ldd	[%o0 + 0x60], %f28
+	ldd	[%o0 + 0x68], %f30
+	ldd	[%o0 + 0x70], %f32
+	ldd	[%o0 + 0x78], %f34
+	ldd	[%o0 + 0x80], %f36
+	ldd	[%o0 + 0x88], %f38
+	ldd	[%o0 + 0x90], %f40
+	ldd	[%o0 + 0x98], %f42
+	ldd	[%o0 + 0xa0], %f44
+	ldd	[%o0 + 0xa8], %f46
+	ldd	[%o0 + 0xb0], %f48
+	ldd	[%o0 + 0xb8], %f50
+	ldd	[%o0 + 0xc0], %f52
+	ldd	[%o0 + 0xc8], %f54
+
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+	CAMELLIA_6ROUNDS(40, 0, 2)
+	fxor	%f52, %f2, %f2
+	fxor	%f54, %f0, %f0
+
+	st	%f2, [%o2 + 0x00]
+	st	%f3, [%o2 + 0x04]
+	st	%f0, [%o2 + 0x08]
+	st	%f1, [%o2 + 0x0c]
+
+	retl
+	 VISExit
+ENDPROC(camellia_sparc64_crypt)
+
+	.align	32
+ENTRY(camellia_sparc64_load_keys)
+	/* %o0=key, %o1=key_len */
+	VISEntry
+	ldd	[%o0 + 0x00], %f4
+	ldd	[%o0 + 0x08], %f6
+	ldd	[%o0 + 0x10], %f8
+	ldd	[%o0 + 0x18], %f10
+	ldd	[%o0 + 0x20], %f12
+	ldd	[%o0 + 0x28], %f14
+	ldd	[%o0 + 0x30], %f16
+	ldd	[%o0 + 0x38], %f18
+	ldd	[%o0 + 0x40], %f20
+	ldd	[%o0 + 0x48], %f22
+	ldd	[%o0 + 0x50], %f24
+	ldd	[%o0 + 0x58], %f26
+	ldd	[%o0 + 0x60], %f28
+	ldd	[%o0 + 0x68], %f30
+	ldd	[%o0 + 0x70], %f32
+	ldd	[%o0 + 0x78], %f34
+	ldd	[%o0 + 0x80], %f36
+	ldd	[%o0 + 0x88], %f38
+	ldd	[%o0 + 0x90], %f40
+	ldd	[%o0 + 0x98], %f42
+	ldd	[%o0 + 0xa0], %f44
+	ldd	[%o0 + 0xa8], %f46
+	ldd	[%o0 + 0xb0], %f48
+	ldd	[%o0 + 0xb8], %f50
+	ldd	[%o0 + 0xc0], %f52
+	retl
+	 ldd	[%o0 + 0xc8], %f54
+ENDPROC(camellia_sparc64_load_keys)
+
+	.align	32
+ENTRY(camellia_sparc64_ecb_crypt_3_grand_rounds)
+	/* %o0=input, %o1=output, %o2=len, %o3=key */
+1:	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	add	%o0, 0x10, %o0
+	fxor	%f4, %f0, %f0
+	fxor	%f6, %f2, %f2
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+	CAMELLIA_6ROUNDS(40, 0, 2)
+	fxor	%f52, %f2, %f2
+	fxor	%f54, %f0, %f0
+	std	%f2, [%o1 + 0x00]
+	std	%f0, [%o1 + 0x08]
+	subcc	%o2, 0x10, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x10, %o1
+	retl
+	 nop
+ENDPROC(camellia_sparc64_ecb_crypt_3_grand_rounds)
+
+	.align	32
+ENTRY(camellia_sparc64_ecb_crypt_4_grand_rounds)
+	/* %o0=input, %o1=output, %o2=len, %o3=key */
+1:	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	add	%o0, 0x10, %o0
+	fxor	%f4, %f0, %f0
+	fxor	%f6, %f2, %f2
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+	ldd	[%o3 + 0xd0], %f8
+	ldd	[%o3 + 0xd8], %f10
+	ldd	[%o3 + 0xe0], %f12
+	ldd	[%o3 + 0xe8], %f14
+	ldd	[%o3 + 0xf0], %f16
+	ldd	[%o3 + 0xf8], %f18
+	ldd	[%o3 + 0x100], %f20
+	ldd	[%o3 + 0x108], %f22
+	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+	CAMELLIA_F(8, 2, 0, 2)
+	CAMELLIA_F(10, 0, 2, 0)
+	ldd	[%o3 + 0x10], %f8
+	ldd	[%o3 + 0x18], %f10
+	CAMELLIA_F(12, 2, 0, 2)
+	CAMELLIA_F(14, 0, 2, 0)
+	ldd	[%o3 + 0x20], %f12
+	ldd	[%o3 + 0x28], %f14
+	CAMELLIA_F(16, 2, 0, 2)
+	CAMELLIA_F(18, 0, 2, 0)
+	ldd	[%o3 + 0x30], %f16
+	ldd	[%o3 + 0x38], %f18
+	fxor	%f20, %f2, %f2
+	fxor	%f22, %f0, %f0
+	ldd	[%o3 + 0x40], %f20
+	ldd	[%o3 + 0x48], %f22
+	std	%f2, [%o1 + 0x00]
+	std	%f0, [%o1 + 0x08]
+	subcc	%o2, 0x10, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x10, %o1
+	retl
+	 nop
+ENDPROC(camellia_sparc64_ecb_crypt_4_grand_rounds)
+
+	.align	32
+ENTRY(camellia_sparc64_cbc_encrypt_3_grand_rounds)
+	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+	ldd	[%o4 + 0x00], %f60
+	ldd	[%o4 + 0x08], %f62
+1:	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	add	%o0, 0x10, %o0
+	fxor	%f60, %f0, %f0
+	fxor	%f62, %f2, %f2
+	fxor	%f4, %f0, %f0
+	fxor	%f6, %f2, %f2
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+	CAMELLIA_6ROUNDS(40, 0, 2)
+	fxor	%f52, %f2, %f60
+	fxor	%f54, %f0, %f62
+	std	%f60, [%o1 + 0x00]
+	std	%f62, [%o1 + 0x08]
+	subcc	%o2, 0x10, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x10, %o1
+	std	%f60, [%o4 + 0x00]
+	retl
+	 std	%f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_encrypt_3_grand_rounds)
+
+	.align	32
+ENTRY(camellia_sparc64_cbc_encrypt_4_grand_rounds)
+	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+	ldd	[%o4 + 0x00], %f60
+	ldd	[%o4 + 0x08], %f62
+1:	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	add	%o0, 0x10, %o0
+	fxor	%f60, %f0, %f0
+	fxor	%f62, %f2, %f2
+	fxor	%f4, %f0, %f0
+	fxor	%f6, %f2, %f2
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+	ldd	[%o3 + 0xd0], %f8
+	ldd	[%o3 + 0xd8], %f10
+	ldd	[%o3 + 0xe0], %f12
+	ldd	[%o3 + 0xe8], %f14
+	ldd	[%o3 + 0xf0], %f16
+	ldd	[%o3 + 0xf8], %f18
+	ldd	[%o3 + 0x100], %f20
+	ldd	[%o3 + 0x108], %f22
+	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+	CAMELLIA_F(8, 2, 0, 2)
+	CAMELLIA_F(10, 0, 2, 0)
+	ldd	[%o3 + 0x10], %f8
+	ldd	[%o3 + 0x18], %f10
+	CAMELLIA_F(12, 2, 0, 2)
+	CAMELLIA_F(14, 0, 2, 0)
+	ldd	[%o3 + 0x20], %f12
+	ldd	[%o3 + 0x28], %f14
+	CAMELLIA_F(16, 2, 0, 2)
+	CAMELLIA_F(18, 0, 2, 0)
+	ldd	[%o3 + 0x30], %f16
+	ldd	[%o3 + 0x38], %f18
+	fxor	%f20, %f2, %f60
+	fxor	%f22, %f0, %f62
+	ldd	[%o3 + 0x40], %f20
+	ldd	[%o3 + 0x48], %f22
+	std	%f60, [%o1 + 0x00]
+	std	%f62, [%o1 + 0x08]
+	subcc	%o2, 0x10, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x10, %o1
+	std	%f60, [%o4 + 0x00]
+	retl
+	 std	%f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_encrypt_4_grand_rounds)
+
+	.align	32
+ENTRY(camellia_sparc64_cbc_decrypt_3_grand_rounds)
+	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+	ldd	[%o4 + 0x00], %f60
+	ldd	[%o4 + 0x08], %f62
+1:	ldd	[%o0 + 0x00], %f56
+	ldd	[%o0 + 0x08], %f58
+	add	%o0, 0x10, %o0
+	fxor	%f4, %f56, %f0
+	fxor	%f6, %f58, %f2
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+	CAMELLIA_6ROUNDS(40, 0, 2)
+	fxor	%f52, %f2, %f2
+	fxor	%f54, %f0, %f0
+	fxor	%f60, %f2, %f2
+	fxor	%f62, %f0, %f0
+	fsrc2	%f56, %f60
+	fsrc2	%f58, %f62
+	std	%f2, [%o1 + 0x00]
+	std	%f0, [%o1 + 0x08]
+	subcc	%o2, 0x10, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x10, %o1
+	std	%f60, [%o4 + 0x00]
+	retl
+	 std	%f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_decrypt_3_grand_rounds)
+
+	.align	32
+ENTRY(camellia_sparc64_cbc_decrypt_4_grand_rounds)
+	/* %o0=input, %o1=output, %o2=len, %o3=key, %o4=IV */
+	ldd	[%o4 + 0x00], %f60
+	ldd	[%o4 + 0x08], %f62
+1:	ldd	[%o0 + 0x00], %f56
+	ldd	[%o0 + 0x08], %f58
+	add	%o0, 0x10, %o0
+	fxor	%f4, %f56, %f0
+	fxor	%f6, %f58, %f2
+	CAMELLIA_6ROUNDS_FL_FLI( 8, 0, 2)
+	ldd	[%o3 + 0xd0], %f8
+	ldd	[%o3 + 0xd8], %f10
+	ldd	[%o3 + 0xe0], %f12
+	ldd	[%o3 + 0xe8], %f14
+	ldd	[%o3 + 0xf0], %f16
+	ldd	[%o3 + 0xf8], %f18
+	ldd	[%o3 + 0x100], %f20
+	ldd	[%o3 + 0x108], %f22
+	CAMELLIA_6ROUNDS_FL_FLI(24, 0, 2)
+	CAMELLIA_6ROUNDS_FL_FLI(40, 0, 2)
+	CAMELLIA_F(8, 2, 0, 2)
+	CAMELLIA_F(10, 0, 2, 0)
+	ldd	[%o3 + 0x10], %f8
+	ldd	[%o3 + 0x18], %f10
+	CAMELLIA_F(12, 2, 0, 2)
+	CAMELLIA_F(14, 0, 2, 0)
+	ldd	[%o3 + 0x20], %f12
+	ldd	[%o3 + 0x28], %f14
+	CAMELLIA_F(16, 2, 0, 2)
+	CAMELLIA_F(18, 0, 2, 0)
+	ldd	[%o3 + 0x30], %f16
+	ldd	[%o3 + 0x38], %f18
+	fxor	%f20, %f2, %f2
+	fxor	%f22, %f0, %f0
+	ldd	[%o3 + 0x40], %f20
+	ldd	[%o3 + 0x48], %f22
+	fxor	%f60, %f2, %f2
+	fxor	%f62, %f0, %f0
+	fsrc2	%f56, %f60
+	fsrc2	%f58, %f62
+	std	%f2, [%o1 + 0x00]
+	std	%f0, [%o1 + 0x08]
+	subcc	%o2, 0x10, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x10, %o1
+	std	%f60, [%o4 + 0x00]
+	retl
+	 std	%f62, [%o4 + 0x08]
+ENDPROC(camellia_sparc64_cbc_decrypt_4_grand_rounds)
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
new file mode 100644
index 0000000..561a84d
--- /dev/null
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -0,0 +1,328 @@
+/* Glue code for CAMELLIA encryption optimized for sparc64 crypto opcodes.
+ *
+ * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+
+#include <asm/fpumacro.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+#define CAMELLIA_MIN_KEY_SIZE        16
+#define CAMELLIA_MAX_KEY_SIZE        32
+#define CAMELLIA_BLOCK_SIZE          16
+#define CAMELLIA_TABLE_BYTE_LEN     272
+
+struct camellia_sparc64_ctx {
+	u64 encrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+	u64 decrypt_key[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+	int key_len;
+};
+
+extern void camellia_sparc64_key_expand(const u32 *in_key, u64 *encrypt_key,
+					unsigned int key_len, u64 *decrypt_key);
+
+static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
+			    unsigned int key_len)
+{
+	struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u32 *in_key = (const u32 *) _in_key;
+	u32 *flags = &tfm->crt_flags;
+
+	if (key_len != 16 && key_len != 24 && key_len != 32) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	ctx->key_len = key_len;
+
+	camellia_sparc64_key_expand(in_key, &ctx->encrypt_key[0],
+				    key_len, &ctx->decrypt_key[0]);
+	return 0;
+}
+
+extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
+				   u32 *output, unsigned int key_len);
+
+static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	camellia_sparc64_crypt(&ctx->encrypt_key[0],
+			       (const u32 *) src,
+			       (u32 *) dst, ctx->key_len);
+}
+
+static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct camellia_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	camellia_sparc64_crypt(&ctx->decrypt_key[0],
+			       (const u32 *) src,
+			       (u32 *) dst, ctx->key_len);
+}
+
+extern void camellia_sparc64_load_keys(const u64 *key, unsigned int key_len);
+
+typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
+			  const u64 *key);
+
+extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
+extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
+
+#define CAMELLIA_BLOCK_MASK	(~(CAMELLIA_BLOCK_SIZE - 1))
+
+static int __ecb_crypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes, bool encrypt)
+{
+	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	ecb_crypt_op *op;
+	const u64 *key;
+	int err;
+
+	op = camellia_sparc64_ecb_crypt_3_grand_rounds;
+	if (ctx->key_len != 16)
+		op = camellia_sparc64_ecb_crypt_4_grand_rounds;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	if (encrypt)
+		key = &ctx->encrypt_key[0];
+	else
+		key = &ctx->decrypt_key[0];
+	camellia_sparc64_load_keys(key, ctx->key_len);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			const u64 *src64;
+			u64 *dst64;
+
+			src64 = (const u64 *)walk.src.virt.addr;
+			dst64 = (u64 *) walk.dst.virt.addr;
+			op(src64, dst64, block_len, key);
+		}
+		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	return __ecb_crypt(desc, dst, src, nbytes, true);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	return __ecb_crypt(desc, dst, src, nbytes, false);
+}
+
+typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
+			  const u64 *key, u64 *iv);
+
+extern cbc_crypt_op camellia_sparc64_cbc_encrypt_3_grand_rounds;
+extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
+extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
+extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	cbc_crypt_op *op;
+	const u64 *key;
+	int err;
+
+	op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
+	if (ctx->key_len != 16)
+		op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	key = &ctx->encrypt_key[0];
+	camellia_sparc64_load_keys(key, ctx->key_len);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			const u64 *src64;
+			u64 *dst64;
+
+			src64 = (const u64 *)walk.src.virt.addr;
+			dst64 = (u64 *) walk.dst.virt.addr;
+			op(src64, dst64, block_len, key,
+			   (u64 *) walk.iv);
+		}
+		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	cbc_crypt_op *op;
+	const u64 *key;
+	int err;
+
+	op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
+	if (ctx->key_len != 16)
+		op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	key = &ctx->decrypt_key[0];
+	camellia_sparc64_load_keys(key, ctx->key_len);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			const u64 *src64;
+			u64 *dst64;
+
+			src64 = (const u64 *)walk.src.virt.addr;
+			dst64 = (u64 *) walk.dst.virt.addr;
+			op(src64, dst64, block_len, key,
+			   (u64 *) walk.iv);
+		}
+		nbytes &= CAMELLIA_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static struct crypto_alg algs[] = { {
+	.cra_name		= "camellia",
+	.cra_driver_name	= "camellia-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx),
+	.cra_alignmask		= 3,
+	.cra_module		= THIS_MODULE,
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= CAMELLIA_MIN_KEY_SIZE,
+			.cia_max_keysize	= CAMELLIA_MAX_KEY_SIZE,
+			.cia_setkey		= camellia_set_key,
+			.cia_encrypt		= camellia_encrypt,
+			.cia_decrypt		= camellia_decrypt
+		}
+	}
+}, {
+	.cra_name		= "ecb(camellia)",
+	.cra_driver_name	= "ecb-camellia-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
+			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
+			.setkey		= camellia_set_key,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(camellia)",
+	.cra_driver_name	= "cbc-camellia-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAMELLIA_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct camellia_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
+			.max_keysize	= CAMELLIA_MAX_KEY_SIZE,
+			.ivsize		= CAMELLIA_BLOCK_SIZE,
+			.setkey		= camellia_set_key,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}
+};
+
+static bool __init sparc64_has_camellia_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_CAMELLIA))
+		return false;
+
+	return true;
+}
+
+static int __init camellia_sparc64_mod_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(algs); i++)
+		INIT_LIST_HEAD(&algs[i].cra_list);
+
+	if (sparc64_has_camellia_opcode()) {
+		pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
+		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	}
+	pr_info("sparc64 camellia opcodes not available.\n");
+	return -ENODEV;
+}
+
+static void __exit camellia_sparc64_mod_fini(void)
+{
+	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(camellia_sparc64_mod_init);
+module_exit(camellia_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("camellia");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/crypto/crc32c_asm.S
new file mode 100644
index 0000000..b8659a4
--- /dev/null
+++ b/arch/sparc/crypto/crc32c_asm.S
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+#include <asm/asi.h>
+
+#include "opcodes.h"
+
+ENTRY(crc32c_sparc64)
+	/* %o0=crc32p, %o1=data_ptr, %o2=len */
+	VISEntryHalf
+	lda	[%o0] ASI_PL, %f1
+1:	ldd	[%o1], %f2
+	CRC32C(0,2,0)
+	subcc	%o2, 8, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x8, %o1
+	sta	%f1, [%o0] ASI_PL
+	VISExitHalf
+2:	retl
+	 nop
+ENDPROC(crc32c_sparc64)
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
new file mode 100644
index 0000000..8aa6646
--- /dev/null
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -0,0 +1,182 @@
+/* Glue code for CRC32C optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/crc32c-intel.c
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Authors: Austin Zhang <austin_zhang@linux.intel.com>
+ *          Kent Liu <kent.liu@intel.com>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/crc32.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key,
+				 unsigned int keylen)
+{
+	u32 *mctx = crypto_shash_ctx(hash);
+
+	if (keylen != sizeof(u32)) {
+		crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	*(__le32 *)mctx = le32_to_cpup((__le32 *)key);
+	return 0;
+}
+
+static int crc32c_sparc64_init(struct shash_desc *desc)
+{
+	u32 *mctx = crypto_shash_ctx(desc->tfm);
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*crcp = *mctx;
+
+	return 0;
+}
+
+extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len);
+
+static void crc32c_compute(u32 *crcp, const u64 *data, unsigned int len)
+{
+	unsigned int asm_len;
+
+	asm_len = len & ~7U;
+	if (asm_len) {
+		crc32c_sparc64(crcp, data, asm_len);
+		data += asm_len / 8;
+		len -= asm_len;
+	}
+	if (len)
+		*crcp = __crc32c_le(*crcp, (const unsigned char *) data, len);
+}
+
+static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data,
+				 unsigned int len)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	crc32c_compute(crcp, (const u64 *) data, len);
+
+	return 0;
+}
+
+static int __crc32c_sparc64_finup(u32 *crcp, const u8 *data, unsigned int len,
+				  u8 *out)
+{
+	u32 tmp = *crcp;
+
+	crc32c_compute(&tmp, (const u64 *) data, len);
+
+	*(__le32 *) out = ~cpu_to_le32(tmp);
+	return 0;
+}
+
+static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data,
+				unsigned int len, u8 *out)
+{
+	return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+	u32 *crcp = shash_desc_ctx(desc);
+
+	*(__le32 *) out = ~cpu_to_le32p(crcp);
+	return 0;
+}
+
+static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data,
+				 unsigned int len, u8 *out)
+{
+	return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len,
+				      out);
+}
+
+static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm)
+{
+	u32 *key = crypto_tfm_ctx(tfm);
+
+	*key = ~0;
+
+	return 0;
+}
+
+#define CHKSUM_BLOCK_SIZE	1
+#define CHKSUM_DIGEST_SIZE	4
+
+static struct shash_alg alg = {
+	.setkey			=	crc32c_sparc64_setkey,
+	.init			=	crc32c_sparc64_init,
+	.update			=	crc32c_sparc64_update,
+	.final			=	crc32c_sparc64_final,
+	.finup			=	crc32c_sparc64_finup,
+	.digest			=	crc32c_sparc64_digest,
+	.descsize		=	sizeof(u32),
+	.digestsize		=	CHKSUM_DIGEST_SIZE,
+	.base			=	{
+		.cra_name		=	"crc32c",
+		.cra_driver_name	=	"crc32c-sparc64",
+		.cra_priority		=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_flags		=	CRYPTO_ALG_OPTIONAL_KEY,
+		.cra_blocksize		=	CHKSUM_BLOCK_SIZE,
+		.cra_ctxsize		=	sizeof(u32),
+		.cra_alignmask		=	7,
+		.cra_module		=	THIS_MODULE,
+		.cra_init		=	crc32c_sparc64_cra_init,
+	}
+};
+
+static bool __init sparc64_has_crc32c_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_CRC32C))
+		return false;
+
+	return true;
+}
+
+static int __init crc32c_sparc64_mod_init(void)
+{
+	if (sparc64_has_crc32c_opcode()) {
+		pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n");
+		return crypto_register_shash(&alg);
+	}
+	pr_info("sparc64 crc32c opcode not available.\n");
+	return -ENODEV;
+}
+
+static void __exit crc32c_sparc64_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(crc32c_sparc64_mod_init);
+module_exit(crc32c_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("crc32c");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/crop_devid.c b/arch/sparc/crypto/crop_devid.c
new file mode 100644
index 0000000..83fc453
--- /dev/null
+++ b/arch/sparc/crypto/crop_devid.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+/* This is a dummy device table linked into all of the crypto
+ * opcode drivers.  It serves to trigger the module autoloading
+ * mechanisms in userspace which scan the OF device tree and
+ * load any modules which have device table entries that
+ * match OF device nodes.
+ */
+static const struct of_device_id crypto_opcode_match[] = {
+	{ .name = "cpu", .compatible = "sun4v", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, crypto_opcode_match);
diff --git a/arch/sparc/crypto/des_asm.S b/arch/sparc/crypto/des_asm.S
new file mode 100644
index 0000000..7157468
--- /dev/null
+++ b/arch/sparc/crypto/des_asm.S
@@ -0,0 +1,420 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+	.align	32
+ENTRY(des_sparc64_key_expand)
+	/* %o0=input_key, %o1=output_key */
+	VISEntryHalf
+	ld	[%o0 + 0x00], %f0
+	ld	[%o0 + 0x04], %f1
+	DES_KEXPAND(0, 0, 0)
+	DES_KEXPAND(0, 1, 2)
+	DES_KEXPAND(2, 3, 6)
+	DES_KEXPAND(2, 2, 4)
+	DES_KEXPAND(6, 3, 10)
+	DES_KEXPAND(6, 2, 8)
+	DES_KEXPAND(10, 3, 14)
+	DES_KEXPAND(10, 2, 12)
+	DES_KEXPAND(14, 1, 16)
+	DES_KEXPAND(16, 3, 20)
+	DES_KEXPAND(16, 2, 18)
+	DES_KEXPAND(20, 3, 24)
+	DES_KEXPAND(20, 2, 22)
+	DES_KEXPAND(24, 3, 28)
+	DES_KEXPAND(24, 2, 26)
+	DES_KEXPAND(28, 1, 30)
+	std	%f0, [%o1 + 0x00]
+	std	%f2, [%o1 + 0x08]
+	std	%f4, [%o1 + 0x10]
+	std	%f6, [%o1 + 0x18]
+	std	%f8, [%o1 + 0x20]
+	std	%f10, [%o1 + 0x28]
+	std	%f12, [%o1 + 0x30]
+	std	%f14, [%o1 + 0x38]
+	std	%f16, [%o1 + 0x40]
+	std	%f18, [%o1 + 0x48]
+	std	%f20, [%o1 + 0x50]
+	std	%f22, [%o1 + 0x58]
+	std	%f24, [%o1 + 0x60]
+	std	%f26, [%o1 + 0x68]
+	std	%f28, [%o1 + 0x70]
+	std	%f30, [%o1 + 0x78]
+	retl
+	 VISExitHalf
+ENDPROC(des_sparc64_key_expand)
+
+	.align	32
+ENTRY(des_sparc64_crypt)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ldd	[%o1 + 0x00], %f32
+	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	ldd	[%o0 + 0x10], %f4
+	ldd	[%o0 + 0x18], %f6
+	ldd	[%o0 + 0x20], %f8
+	ldd	[%o0 + 0x28], %f10
+	ldd	[%o0 + 0x30], %f12
+	ldd	[%o0 + 0x38], %f14
+	ldd	[%o0 + 0x40], %f16
+	ldd	[%o0 + 0x48], %f18
+	ldd	[%o0 + 0x50], %f20
+	ldd	[%o0 + 0x58], %f22
+	ldd	[%o0 + 0x60], %f24
+	ldd	[%o0 + 0x68], %f26
+	ldd	[%o0 + 0x70], %f28
+	ldd	[%o0 + 0x78], %f30
+	DES_IP(32, 32)
+	DES_ROUND(0, 2, 32, 32)
+	DES_ROUND(4, 6, 32, 32)
+	DES_ROUND(8, 10, 32, 32)
+	DES_ROUND(12, 14, 32, 32)
+	DES_ROUND(16, 18, 32, 32)
+	DES_ROUND(20, 22, 32, 32)
+	DES_ROUND(24, 26, 32, 32)
+	DES_ROUND(28, 30, 32, 32)
+	DES_IIP(32, 32)
+	std	%f32, [%o2 + 0x00]
+	retl
+	 VISExit
+ENDPROC(des_sparc64_crypt)
+
+	.align	32
+ENTRY(des_sparc64_load_keys)
+	/* %o0=key */
+	VISEntry
+	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	ldd	[%o0 + 0x10], %f4
+	ldd	[%o0 + 0x18], %f6
+	ldd	[%o0 + 0x20], %f8
+	ldd	[%o0 + 0x28], %f10
+	ldd	[%o0 + 0x30], %f12
+	ldd	[%o0 + 0x38], %f14
+	ldd	[%o0 + 0x40], %f16
+	ldd	[%o0 + 0x48], %f18
+	ldd	[%o0 + 0x50], %f20
+	ldd	[%o0 + 0x58], %f22
+	ldd	[%o0 + 0x60], %f24
+	ldd	[%o0 + 0x68], %f26
+	ldd	[%o0 + 0x70], %f28
+	retl
+	 ldd	[%o0 + 0x78], %f30
+ENDPROC(des_sparc64_load_keys)
+
+	.align	32
+ENTRY(des_sparc64_ecb_crypt)
+	/* %o0=input, %o1=output, %o2=len */
+1:	ldd	[%o0 + 0x00], %f32
+	add	%o0, 0x08, %o0
+	DES_IP(32, 32)
+	DES_ROUND(0, 2, 32, 32)
+	DES_ROUND(4, 6, 32, 32)
+	DES_ROUND(8, 10, 32, 32)
+	DES_ROUND(12, 14, 32, 32)
+	DES_ROUND(16, 18, 32, 32)
+	DES_ROUND(20, 22, 32, 32)
+	DES_ROUND(24, 26, 32, 32)
+	DES_ROUND(28, 30, 32, 32)
+	DES_IIP(32, 32)
+	std	%f32, [%o1 + 0x00]
+	subcc	%o2, 0x08, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x08, %o1
+	retl
+	 nop
+ENDPROC(des_sparc64_ecb_crypt)
+
+	.align	32
+ENTRY(des_sparc64_cbc_encrypt)
+	/* %o0=input, %o1=output, %o2=len, %o3=IV */
+	ldd	[%o3 + 0x00], %f32
+1:	ldd	[%o0 + 0x00], %f34
+	fxor	%f32, %f34, %f32
+	DES_IP(32, 32)
+	DES_ROUND(0, 2, 32, 32)
+	DES_ROUND(4, 6, 32, 32)
+	DES_ROUND(8, 10, 32, 32)
+	DES_ROUND(12, 14, 32, 32)
+	DES_ROUND(16, 18, 32, 32)
+	DES_ROUND(20, 22, 32, 32)
+	DES_ROUND(24, 26, 32, 32)
+	DES_ROUND(28, 30, 32, 32)
+	DES_IIP(32, 32)
+	std	%f32, [%o1 + 0x00]
+	add	%o0, 0x08, %o0
+	subcc	%o2, 0x08, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x08, %o1
+	retl
+	 std	%f32, [%o3 + 0x00]
+ENDPROC(des_sparc64_cbc_encrypt)
+
+	.align	32
+ENTRY(des_sparc64_cbc_decrypt)
+	/* %o0=input, %o1=output, %o2=len, %o3=IV */
+	ldd	[%o3 + 0x00], %f34
+1:	ldd	[%o0 + 0x00], %f36
+	DES_IP(36, 32)
+	DES_ROUND(0, 2, 32, 32)
+	DES_ROUND(4, 6, 32, 32)
+	DES_ROUND(8, 10, 32, 32)
+	DES_ROUND(12, 14, 32, 32)
+	DES_ROUND(16, 18, 32, 32)
+	DES_ROUND(20, 22, 32, 32)
+	DES_ROUND(24, 26, 32, 32)
+	DES_ROUND(28, 30, 32, 32)
+	DES_IIP(32, 32)
+	fxor	%f32, %f34, %f32
+	fsrc2	%f36, %f34
+	std	%f32, [%o1 + 0x00]
+	add	%o0, 0x08, %o0
+	subcc	%o2, 0x08, %o2
+	bne,pt	%icc, 1b
+	 add	%o1, 0x08, %o1
+	retl
+	 std	%f36, [%o3 + 0x00]
+ENDPROC(des_sparc64_cbc_decrypt)
+
+	.align	32
+ENTRY(des3_ede_sparc64_crypt)
+	/* %o0=key, %o1=input, %o2=output */
+	VISEntry
+	ldd	[%o1 + 0x00], %f32
+	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	ldd	[%o0 + 0x10], %f4
+	ldd	[%o0 + 0x18], %f6
+	ldd	[%o0 + 0x20], %f8
+	ldd	[%o0 + 0x28], %f10
+	ldd	[%o0 + 0x30], %f12
+	ldd	[%o0 + 0x38], %f14
+	ldd	[%o0 + 0x40], %f16
+	ldd	[%o0 + 0x48], %f18
+	ldd	[%o0 + 0x50], %f20
+	ldd	[%o0 + 0x58], %f22
+	ldd	[%o0 + 0x60], %f24
+	ldd	[%o0 + 0x68], %f26
+	ldd	[%o0 + 0x70], %f28
+	ldd	[%o0 + 0x78], %f30
+	DES_IP(32, 32)
+	DES_ROUND(0, 2, 32, 32)
+	ldd	[%o0 + 0x80], %f0
+	ldd	[%o0 + 0x88], %f2
+	DES_ROUND(4, 6, 32, 32)
+	ldd	[%o0 + 0x90], %f4
+	ldd	[%o0 + 0x98], %f6
+	DES_ROUND(8, 10, 32, 32)
+	ldd	[%o0 + 0xa0], %f8
+	ldd	[%o0 + 0xa8], %f10
+	DES_ROUND(12, 14, 32, 32)
+	ldd	[%o0 + 0xb0], %f12
+	ldd	[%o0 + 0xb8], %f14
+	DES_ROUND(16, 18, 32, 32)
+	ldd	[%o0 + 0xc0], %f16
+	ldd	[%o0 + 0xc8], %f18
+	DES_ROUND(20, 22, 32, 32)
+	ldd	[%o0 + 0xd0], %f20
+	ldd	[%o0 + 0xd8], %f22
+	DES_ROUND(24, 26, 32, 32)
+	ldd	[%o0 + 0xe0], %f24
+	ldd	[%o0 + 0xe8], %f26
+	DES_ROUND(28, 30, 32, 32)
+	ldd	[%o0 + 0xf0], %f28
+	ldd	[%o0 + 0xf8], %f30
+	DES_IIP(32, 32)
+	DES_IP(32, 32)
+	DES_ROUND(0, 2, 32, 32)
+	ldd	[%o0 + 0x100], %f0
+	ldd	[%o0 + 0x108], %f2
+	DES_ROUND(4, 6, 32, 32)
+	ldd	[%o0 + 0x110], %f4
+	ldd	[%o0 + 0x118], %f6
+	DES_ROUND(8, 10, 32, 32)
+	ldd	[%o0 + 0x120], %f8
+	ldd	[%o0 + 0x128], %f10
+	DES_ROUND(12, 14, 32, 32)
+	ldd	[%o0 + 0x130], %f12
+	ldd	[%o0 + 0x138], %f14
+	DES_ROUND(16, 18, 32, 32)
+	ldd	[%o0 + 0x140], %f16
+	ldd	[%o0 + 0x148], %f18
+	DES_ROUND(20, 22, 32, 32)
+	ldd	[%o0 + 0x150], %f20
+	ldd	[%o0 + 0x158], %f22
+	DES_ROUND(24, 26, 32, 32)
+	ldd	[%o0 + 0x160], %f24
+	ldd	[%o0 + 0x168], %f26
+	DES_ROUND(28, 30, 32, 32)
+	ldd	[%o0 + 0x170], %f28
+	ldd	[%o0 + 0x178], %f30
+	DES_IIP(32, 32)
+	DES_IP(32, 32)
+	DES_ROUND(0, 2, 32, 32)
+	DES_ROUND(4, 6, 32, 32)
+	DES_ROUND(8, 10, 32, 32)
+	DES_ROUND(12, 14, 32, 32)
+	DES_ROUND(16, 18, 32, 32)
+	DES_ROUND(20, 22, 32, 32)
+	DES_ROUND(24, 26, 32, 32)
+	DES_ROUND(28, 30, 32, 32)
+	DES_IIP(32, 32)
+
+	std	%f32, [%o2 + 0x00]
+	retl
+	 VISExit
+ENDPROC(des3_ede_sparc64_crypt)
+
+	.align	32
+ENTRY(des3_ede_sparc64_load_keys)
+	/* %o0=key */
+	VISEntry
+	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	ldd	[%o0 + 0x10], %f4
+	ldd	[%o0 + 0x18], %f6
+	ldd	[%o0 + 0x20], %f8
+	ldd	[%o0 + 0x28], %f10
+	ldd	[%o0 + 0x30], %f12
+	ldd	[%o0 + 0x38], %f14
+	ldd	[%o0 + 0x40], %f16
+	ldd	[%o0 + 0x48], %f18
+	ldd	[%o0 + 0x50], %f20
+	ldd	[%o0 + 0x58], %f22
+	ldd	[%o0 + 0x60], %f24
+	ldd	[%o0 + 0x68], %f26
+	ldd	[%o0 + 0x70], %f28
+	ldd	[%o0 + 0x78], %f30
+	ldd	[%o0 + 0x80], %f32
+	ldd	[%o0 + 0x88], %f34
+	ldd	[%o0 + 0x90], %f36
+	ldd	[%o0 + 0x98], %f38
+	ldd	[%o0 + 0xa0], %f40
+	ldd	[%o0 + 0xa8], %f42
+	ldd	[%o0 + 0xb0], %f44
+	ldd	[%o0 + 0xb8], %f46
+	ldd	[%o0 + 0xc0], %f48
+	ldd	[%o0 + 0xc8], %f50
+	ldd	[%o0 + 0xd0], %f52
+	ldd	[%o0 + 0xd8], %f54
+	ldd	[%o0 + 0xe0], %f56
+	retl
+	 ldd	[%o0 + 0xe8], %f58
+ENDPROC(des3_ede_sparc64_load_keys)
+
+#define DES3_LOOP_BODY(X) \
+	DES_IP(X, X) \
+	DES_ROUND(0, 2, X, X) \
+	DES_ROUND(4, 6, X, X) \
+	DES_ROUND(8, 10, X, X) \
+	DES_ROUND(12, 14, X, X) \
+	DES_ROUND(16, 18, X, X) \
+	ldd	[%o0 + 0xf0], %f16; \
+	ldd	[%o0 + 0xf8], %f18; \
+	DES_ROUND(20, 22, X, X) \
+	ldd	[%o0 + 0x100], %f20; \
+	ldd	[%o0 + 0x108], %f22; \
+	DES_ROUND(24, 26, X, X) \
+	ldd	[%o0 + 0x110], %f24; \
+	ldd	[%o0 + 0x118], %f26; \
+	DES_ROUND(28, 30, X, X) \
+	ldd	[%o0 + 0x120], %f28; \
+	ldd	[%o0 + 0x128], %f30; \
+	DES_IIP(X, X) \
+	DES_IP(X, X) \
+	DES_ROUND(32, 34, X, X) \
+	ldd	[%o0 + 0x130], %f0; \
+	ldd	[%o0 + 0x138], %f2; \
+	DES_ROUND(36, 38, X, X) \
+	ldd	[%o0 + 0x140], %f4; \
+	ldd	[%o0 + 0x148], %f6; \
+	DES_ROUND(40, 42, X, X) \
+	ldd	[%o0 + 0x150], %f8; \
+	ldd	[%o0 + 0x158], %f10; \
+	DES_ROUND(44, 46, X, X) \
+	ldd	[%o0 + 0x160], %f12; \
+	ldd	[%o0 + 0x168], %f14; \
+	DES_ROUND(48, 50, X, X) \
+	DES_ROUND(52, 54, X, X) \
+	DES_ROUND(56, 58, X, X) \
+	DES_ROUND(16, 18, X, X) \
+	ldd	[%o0 + 0x170], %f16; \
+	ldd	[%o0 + 0x178], %f18; \
+	DES_IIP(X, X) \
+	DES_IP(X, X) \
+	DES_ROUND(20, 22, X, X) \
+	ldd	[%o0 + 0x50], %f20; \
+	ldd	[%o0 + 0x58], %f22; \
+	DES_ROUND(24, 26, X, X) \
+	ldd	[%o0 + 0x60], %f24; \
+	ldd	[%o0 + 0x68], %f26; \
+	DES_ROUND(28, 30, X, X) \
+	ldd	[%o0 + 0x70], %f28; \
+	ldd	[%o0 + 0x78], %f30; \
+	DES_ROUND(0, 2, X, X) \
+	ldd	[%o0 + 0x00], %f0; \
+	ldd	[%o0 + 0x08], %f2; \
+	DES_ROUND(4, 6, X, X) \
+	ldd	[%o0 + 0x10], %f4; \
+	ldd	[%o0 + 0x18], %f6; \
+	DES_ROUND(8, 10, X, X) \
+	ldd	[%o0 + 0x20], %f8; \
+	ldd	[%o0 + 0x28], %f10; \
+	DES_ROUND(12, 14, X, X) \
+	ldd	[%o0 + 0x30], %f12; \
+	ldd	[%o0 + 0x38], %f14; \
+	DES_ROUND(16, 18, X, X) \
+	ldd	[%o0 + 0x40], %f16; \
+	ldd	[%o0 + 0x48], %f18; \
+	DES_IIP(X, X)
+
+	.align	32
+ENTRY(des3_ede_sparc64_ecb_crypt)
+	/* %o0=key, %o1=input, %o2=output, %o3=len */
+1:	ldd	[%o1 + 0x00], %f60
+	DES3_LOOP_BODY(60)
+	std	%f60, [%o2 + 0x00]
+	add	%o1, 0x08, %o1
+	subcc	%o3, 0x08, %o3
+	bne,pt	%icc, 1b
+	 add	%o2, 0x08, %o2
+	retl
+	 nop
+ENDPROC(des3_ede_sparc64_ecb_crypt)
+
+	.align	32
+ENTRY(des3_ede_sparc64_cbc_encrypt)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd	[%o4 + 0x00], %f60
+1:	ldd	[%o1 + 0x00], %f62
+	fxor	%f60, %f62, %f60
+	DES3_LOOP_BODY(60)
+	std	%f60, [%o2 + 0x00]
+	add	%o1, 0x08, %o1
+	subcc	%o3, 0x08, %o3
+	bne,pt	%icc, 1b
+	 add	%o2, 0x08, %o2
+	retl
+	 std	%f60, [%o4 + 0x00]
+ENDPROC(des3_ede_sparc64_cbc_encrypt)
+
+	.align	32
+ENTRY(des3_ede_sparc64_cbc_decrypt)
+	/* %o0=key, %o1=input, %o2=output, %o3=len, %o4=IV */
+	ldd	[%o4 + 0x00], %f62
+1:	ldx	[%o1 + 0x00], %g1
+	MOVXTOD_G1_F60
+	DES3_LOOP_BODY(60)
+	fxor	%f62, %f60, %f60
+	MOVXTOD_G1_F62
+	std	%f60, [%o2 + 0x00]
+	add	%o1, 0x08, %o1
+	subcc	%o3, 0x08, %o3
+	bne,pt	%icc, 1b
+	 add	%o2, 0x08, %o2
+	retl
+	 stx	%g1, [%o4 + 0x00]
+ENDPROC(des3_ede_sparc64_cbc_decrypt)
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
new file mode 100644
index 0000000..61af794
--- /dev/null
+++ b/arch/sparc/crypto/des_glue.c
@@ -0,0 +1,540 @@
+/* Glue code for DES encryption optimized for sparc64 crypto opcodes.
+ *
+ * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <crypto/algapi.h>
+#include <crypto/des.h>
+
+#include <asm/fpumacro.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+struct des_sparc64_ctx {
+	u64 encrypt_expkey[DES_EXPKEY_WORDS / 2];
+	u64 decrypt_expkey[DES_EXPKEY_WORDS / 2];
+};
+
+struct des3_ede_sparc64_ctx {
+	u64 encrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
+	u64 decrypt_expkey[DES3_EDE_EXPKEY_WORDS / 2];
+};
+
+static void encrypt_to_decrypt(u64 *d, const u64 *e)
+{
+	const u64 *s = e + (DES_EXPKEY_WORDS / 2) - 1;
+	int i;
+
+	for (i = 0; i < DES_EXPKEY_WORDS / 2; i++)
+		*d++ = *s--;
+}
+
+extern void des_sparc64_key_expand(const u32 *input_key, u64 *key);
+
+static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
+		       unsigned int keylen)
+{
+	struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	u32 tmp[DES_EXPKEY_WORDS];
+	int ret;
+
+	/* Even though we have special instructions for key expansion,
+	 * we call des_ekey() so that we don't have to write our own
+	 * weak key detection code.
+	 */
+	ret = des_ekey(tmp, key);
+	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]);
+	encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]);
+
+	return 0;
+}
+
+extern void des_sparc64_crypt(const u64 *key, const u64 *input,
+			      u64 *output);
+
+static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u64 *K = ctx->encrypt_expkey;
+
+	des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u64 *K = ctx->decrypt_expkey;
+
+	des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+extern void des_sparc64_load_keys(const u64 *key);
+
+extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
+				  unsigned int len);
+
+#define DES_BLOCK_MASK	(~(DES_BLOCK_SIZE - 1))
+
+static int __ecb_crypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes, bool encrypt)
+{
+	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	if (encrypt)
+		des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+	else
+		des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
+					      (u64 *) walk.dst.virt.addr,
+					      block_len);
+		}
+		nbytes &= DES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	return __ecb_crypt(desc, dst, src, nbytes, true);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	return __ecb_crypt(desc, dst, src, nbytes, false);
+}
+
+extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
+				    unsigned int len, u64 *iv);
+
+static int cbc_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
+						(u64 *) walk.dst.virt.addr,
+						block_len, (u64 *) walk.iv);
+		}
+		nbytes &= DES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
+				    unsigned int len, u64 *iv);
+
+static int cbc_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
+						(u64 *) walk.dst.virt.addr,
+						block_len, (u64 *) walk.iv);
+		}
+		nbytes &= DES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
+	const u32 *K = (const u32 *)key;
+	u32 *flags = &tfm->crt_flags;
+	u64 k1[DES_EXPKEY_WORDS / 2];
+	u64 k2[DES_EXPKEY_WORDS / 2];
+	u64 k3[DES_EXPKEY_WORDS / 2];
+
+	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
+		     (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	des_sparc64_key_expand((const u32 *)key, k1);
+	key += DES_KEY_SIZE;
+	des_sparc64_key_expand((const u32 *)key, k2);
+	key += DES_KEY_SIZE;
+	des_sparc64_key_expand((const u32 *)key, k3);
+
+	memcpy(&dctx->encrypt_expkey[0], &k1[0], sizeof(k1));
+	encrypt_to_decrypt(&dctx->encrypt_expkey[DES_EXPKEY_WORDS / 2], &k2[0]);
+	memcpy(&dctx->encrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
+	       &k3[0], sizeof(k3));
+
+	encrypt_to_decrypt(&dctx->decrypt_expkey[0], &k3[0]);
+	memcpy(&dctx->decrypt_expkey[DES_EXPKEY_WORDS / 2],
+	       &k2[0], sizeof(k2));
+	encrypt_to_decrypt(&dctx->decrypt_expkey[(DES_EXPKEY_WORDS / 2) * 2],
+			   &k1[0]);
+
+	return 0;
+}
+
+extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
+				   u64 *output);
+
+static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u64 *K = ctx->encrypt_expkey;
+
+	des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
+	const u64 *K = ctx->decrypt_expkey;
+
+	des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
+}
+
+extern void des3_ede_sparc64_load_keys(const u64 *key);
+
+extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
+				       u64 *output, unsigned int len);
+
+static int __ecb3_crypt(struct blkcipher_desc *desc,
+			struct scatterlist *dst, struct scatterlist *src,
+			unsigned int nbytes, bool encrypt)
+{
+	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	const u64 *K;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	if (encrypt)
+		K = &ctx->encrypt_expkey[0];
+	else
+		K = &ctx->decrypt_expkey[0];
+	des3_ede_sparc64_load_keys(K);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			const u64 *src64 = (const u64 *)walk.src.virt.addr;
+			des3_ede_sparc64_ecb_crypt(K, src64,
+						   (u64 *) walk.dst.virt.addr,
+						   block_len);
+		}
+		nbytes &= DES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static int ecb3_encrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	return __ecb3_crypt(desc, dst, src, nbytes, true);
+}
+
+static int ecb3_decrypt(struct blkcipher_desc *desc,
+		       struct scatterlist *dst, struct scatterlist *src,
+		       unsigned int nbytes)
+{
+	return __ecb3_crypt(desc, dst, src, nbytes, false);
+}
+
+extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
+					 u64 *output, unsigned int len,
+					 u64 *iv);
+
+static int cbc3_encrypt(struct blkcipher_desc *desc,
+			struct scatterlist *dst, struct scatterlist *src,
+			unsigned int nbytes)
+{
+	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	const u64 *K;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	K = &ctx->encrypt_expkey[0];
+	des3_ede_sparc64_load_keys(K);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			const u64 *src64 = (const u64 *)walk.src.virt.addr;
+			des3_ede_sparc64_cbc_encrypt(K, src64,
+						     (u64 *) walk.dst.virt.addr,
+						     block_len,
+						     (u64 *) walk.iv);
+		}
+		nbytes &= DES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
+					 u64 *output, unsigned int len,
+					 u64 *iv);
+
+static int cbc3_decrypt(struct blkcipher_desc *desc,
+			struct scatterlist *dst, struct scatterlist *src,
+			unsigned int nbytes)
+{
+	struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	struct blkcipher_walk walk;
+	const u64 *K;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	K = &ctx->decrypt_expkey[0];
+	des3_ede_sparc64_load_keys(K);
+	while ((nbytes = walk.nbytes)) {
+		unsigned int block_len = nbytes & DES_BLOCK_MASK;
+
+		if (likely(block_len)) {
+			const u64 *src64 = (const u64 *)walk.src.virt.addr;
+			des3_ede_sparc64_cbc_decrypt(K, src64,
+						     (u64 *) walk.dst.virt.addr,
+						     block_len,
+						     (u64 *) walk.iv);
+		}
+		nbytes &= DES_BLOCK_SIZE - 1;
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+	fprs_write(0);
+	return err;
+}
+
+static struct crypto_alg algs[] = { {
+	.cra_name		= "des",
+	.cra_driver_name	= "des-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_module		= THIS_MODULE,
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= DES_KEY_SIZE,
+			.cia_max_keysize	= DES_KEY_SIZE,
+			.cia_setkey		= des_set_key,
+			.cia_encrypt		= des_encrypt,
+			.cia_decrypt		= des_decrypt
+		}
+	}
+}, {
+	.cra_name		= "ecb(des)",
+	.cra_driver_name	= "ecb-des-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.setkey		= des_set_key,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(des)",
+	.cra_driver_name	= "cbc-des-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct des_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= DES_KEY_SIZE,
+			.max_keysize	= DES_KEY_SIZE,
+			.ivsize		= DES_BLOCK_SIZE,
+			.setkey		= des_set_key,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "des3_ede",
+	.cra_driver_name	= "des3_ede-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_module		= THIS_MODULE,
+	.cra_u	= {
+		.cipher	= {
+			.cia_min_keysize	= DES3_EDE_KEY_SIZE,
+			.cia_max_keysize	= DES3_EDE_KEY_SIZE,
+			.cia_setkey		= des3_ede_set_key,
+			.cia_encrypt		= des3_ede_encrypt,
+			.cia_decrypt		= des3_ede_decrypt
+		}
+	}
+}, {
+	.cra_name		= "ecb(des3_ede)",
+	.cra_driver_name	= "ecb-des3_ede-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= DES3_EDE_KEY_SIZE,
+			.max_keysize	= DES3_EDE_KEY_SIZE,
+			.setkey		= des3_ede_set_key,
+			.encrypt	= ecb3_encrypt,
+			.decrypt	= ecb3_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(des3_ede)",
+	.cra_driver_name	= "cbc-des3_ede-sparc64",
+	.cra_priority		= SPARC_CR_OPCODE_PRIORITY,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct des3_ede_sparc64_ctx),
+	.cra_alignmask		= 7,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= DES3_EDE_KEY_SIZE,
+			.max_keysize	= DES3_EDE_KEY_SIZE,
+			.ivsize		= DES3_EDE_BLOCK_SIZE,
+			.setkey		= des3_ede_set_key,
+			.encrypt	= cbc3_encrypt,
+			.decrypt	= cbc3_decrypt,
+		},
+	},
+} };
+
+static bool __init sparc64_has_des_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_DES))
+		return false;
+
+	return true;
+}
+
+static int __init des_sparc64_mod_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(algs); i++)
+		INIT_LIST_HEAD(&algs[i].cra_list);
+
+	if (sparc64_has_des_opcode()) {
+		pr_info("Using sparc64 des opcodes optimized DES implementation\n");
+		return crypto_register_algs(algs, ARRAY_SIZE(algs));
+	}
+	pr_info("sparc64 des opcodes not available.\n");
+	return -ENODEV;
+}
+
+static void __exit des_sparc64_mod_fini(void)
+{
+	crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+}
+
+module_init(des_sparc64_mod_init);
+module_exit(des_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des3_ede");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/md5_asm.S b/arch/sparc/crypto/md5_asm.S
new file mode 100644
index 0000000..7a66374
--- /dev/null
+++ b/arch/sparc/crypto/md5_asm.S
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(md5_sparc64_transform)
+	/* %o0 = digest, %o1 = data, %o2 = rounds */
+	VISEntryHalf
+	ld	[%o0 + 0x00], %f0
+	ld	[%o0 + 0x04], %f1
+	andcc	%o1, 0x7, %g0
+	ld	[%o0 + 0x08], %f2
+	bne,pn	%xcc, 10f
+	 ld	[%o0 + 0x0c], %f3
+
+1:
+	ldd	[%o1 + 0x00], %f8
+	ldd	[%o1 + 0x08], %f10
+	ldd	[%o1 + 0x10], %f12
+	ldd	[%o1 + 0x18], %f14
+	ldd	[%o1 + 0x20], %f16
+	ldd	[%o1 + 0x28], %f18
+	ldd	[%o1 + 0x30], %f20
+	ldd	[%o1 + 0x38], %f22
+
+	MD5
+
+	subcc	%o2, 1, %o2
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x40, %o1
+
+5:
+	st	%f0, [%o0 + 0x00]
+	st	%f1, [%o0 + 0x04]
+	st	%f2, [%o0 + 0x08]
+	st	%f3, [%o0 + 0x0c]
+	retl
+	 VISExitHalf
+10:
+	alignaddr %o1, %g0, %o1
+
+	ldd	[%o1 + 0x00], %f10
+1:
+	ldd	[%o1 + 0x08], %f12
+	ldd	[%o1 + 0x10], %f14
+	ldd	[%o1 + 0x18], %f16
+	ldd	[%o1 + 0x20], %f18
+	ldd	[%o1 + 0x28], %f20
+	ldd	[%o1 + 0x30], %f22
+	ldd	[%o1 + 0x38], %f24
+	ldd	[%o1 + 0x40], %f26
+
+	faligndata %f10, %f12, %f8
+	faligndata %f12, %f14, %f10
+	faligndata %f14, %f16, %f12
+	faligndata %f16, %f18, %f14
+	faligndata %f18, %f20, %f16
+	faligndata %f20, %f22, %f18
+	faligndata %f22, %f24, %f20
+	faligndata %f24, %f26, %f22
+
+	MD5
+
+	subcc	%o2, 1, %o2
+	fsrc2	%f26, %f10
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x40, %o1
+
+	ba,a,pt	%xcc, 5b
+ENDPROC(md5_sparc64_transform)
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
new file mode 100644
index 0000000..bc9cc26
--- /dev/null
+++ b/arch/sparc/crypto/md5_glue.c
@@ -0,0 +1,189 @@
+/* Glue code for MD5 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
+ * and crypto/md5.c which are:
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ * Copyright (c) Cryptoapi developers.
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/md5.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void md5_sparc64_transform(u32 *digest, const char *data,
+				      unsigned int rounds);
+
+static int md5_sparc64_init(struct shash_desc *desc)
+{
+	struct md5_state *mctx = shash_desc_ctx(desc);
+
+	mctx->hash[0] = cpu_to_le32(MD5_H0);
+	mctx->hash[1] = cpu_to_le32(MD5_H1);
+	mctx->hash[2] = cpu_to_le32(MD5_H2);
+	mctx->hash[3] = cpu_to_le32(MD5_H3);
+	mctx->byte_count = 0;
+
+	return 0;
+}
+
+static void __md5_sparc64_update(struct md5_state *sctx, const u8 *data,
+				 unsigned int len, unsigned int partial)
+{
+	unsigned int done = 0;
+
+	sctx->byte_count += len;
+	if (partial) {
+		done = MD5_HMAC_BLOCK_SIZE - partial;
+		memcpy((u8 *)sctx->block + partial, data, done);
+		md5_sparc64_transform(sctx->hash, (u8 *)sctx->block, 1);
+	}
+	if (len - done >= MD5_HMAC_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / MD5_HMAC_BLOCK_SIZE;
+
+		md5_sparc64_transform(sctx->hash, data + done, rounds);
+		done += rounds * MD5_HMAC_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->block, data + done, len - done);
+}
+
+static int md5_sparc64_update(struct shash_desc *desc, const u8 *data,
+			      unsigned int len)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
+
+	/* Handle the fast case right here */
+	if (partial + len < MD5_HMAC_BLOCK_SIZE) {
+		sctx->byte_count += len;
+		memcpy((u8 *)sctx->block + partial, data, len);
+	} else
+		__md5_sparc64_update(sctx, data, len, partial);
+
+	return 0;
+}
+
+/* Add padding and return the message digest. */
+static int md5_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	u32 *dst = (u32 *)out;
+	__le64 bits;
+	static const u8 padding[MD5_HMAC_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_le64(sctx->byte_count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->byte_count % MD5_HMAC_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((MD5_HMAC_BLOCK_SIZE+56) - index);
+
+	/* We need to fill a whole block for __md5_sparc64_update() */
+	if (padlen <= 56) {
+		sctx->byte_count += padlen;
+		memcpy((u8 *)sctx->block + index, padding, padlen);
+	} else {
+		__md5_sparc64_update(sctx, padding, padlen, index);
+	}
+	__md5_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+
+	/* Store state in digest */
+	for (i = 0; i < MD5_HASH_WORDS; i++)
+		dst[i] = sctx->hash[i];
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int md5_sparc64_export(struct shash_desc *desc, void *out)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int md5_sparc64_import(struct shash_desc *desc, const void *in)
+{
+	struct md5_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	MD5_DIGEST_SIZE,
+	.init		=	md5_sparc64_init,
+	.update		=	md5_sparc64_update,
+	.final		=	md5_sparc64_final,
+	.export		=	md5_sparc64_export,
+	.import		=	md5_sparc64_import,
+	.descsize	=	sizeof(struct md5_state),
+	.statesize	=	sizeof(struct md5_state),
+	.base		=	{
+		.cra_name	=	"md5",
+		.cra_driver_name=	"md5-sparc64",
+		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_blocksize	=	MD5_HMAC_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static bool __init sparc64_has_md5_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_MD5))
+		return false;
+
+	return true;
+}
+
+static int __init md5_sparc64_mod_init(void)
+{
+	if (sparc64_has_md5_opcode()) {
+		pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n");
+		return crypto_register_shash(&alg);
+	}
+	pr_info("sparc64 md5 opcode not available.\n");
+	return -ENODEV;
+}
+
+static void __exit md5_sparc64_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(md5_sparc64_mod_init);
+module_exit(md5_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD5 Message Digest Algorithm, sparc64 md5 opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("md5");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/opcodes.h b/arch/sparc/crypto/opcodes.h
new file mode 100644
index 0000000..417b6a1
--- /dev/null
+++ b/arch/sparc/crypto/opcodes.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _OPCODES_H
+#define _OPCODES_H
+
+#define SPARC_CR_OPCODE_PRIORITY	300
+
+#define F3F(x,y,z)	(((x)<<30)|((y)<<19)|((z)<<5))
+
+#define FPD_ENCODE(x)	(((x) >> 5) | ((x) & ~(0x20)))
+
+#define RS1(x)		(FPD_ENCODE(x) << 14)
+#define RS2(x)		(FPD_ENCODE(x) <<  0)
+#define RS3(x)		(FPD_ENCODE(x) <<  9)
+#define RD(x)		(FPD_ENCODE(x) << 25)
+#define IMM5_0(x)	((x)           <<  0)
+#define IMM5_9(x)	((x)           <<  9)
+
+#define CRC32C(a,b,c)	\
+	.word		(F3F(2,0x36,0x147)|RS1(a)|RS2(b)|RD(c));
+
+#define MD5		\
+	.word	0x81b02800;
+#define SHA1		\
+	.word	0x81b02820;
+#define SHA256		\
+	.word	0x81b02840;
+#define SHA512		\
+	.word	0x81b02860;
+
+#define AES_EROUND01(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 0)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_EROUND23(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 1)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND01(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 2)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND23(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 3)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_EROUND01_L(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 4)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_EROUND23_L(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 5)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND01_L(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 6)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_DROUND23_L(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 7)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define AES_KEXPAND1(a,b,c,d)	\
+	.word	(F3F(2, 0x19, 8)|RS1(a)|RS2(b)|IMM5_9(c)|RD(d));
+#define AES_KEXPAND0(a,b,c)	\
+	.word	(F3F(2, 0x36, 0x130)|RS1(a)|RS2(b)|RD(c));
+#define AES_KEXPAND2(a,b,c)	\
+	.word	(F3F(2, 0x36, 0x131)|RS1(a)|RS2(b)|RD(c));
+
+#define DES_IP(a,b)		\
+	.word		(F3F(2, 0x36, 0x134)|RS1(a)|RD(b));
+#define DES_IIP(a,b)		\
+	.word		(F3F(2, 0x36, 0x135)|RS1(a)|RD(b));
+#define DES_KEXPAND(a,b,c)	\
+	.word		(F3F(2, 0x36, 0x136)|RS1(a)|IMM5_0(b)|RD(c));
+#define DES_ROUND(a,b,c,d)	\
+	.word		(F3F(2, 0x19, 0x009)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+
+#define CAMELLIA_F(a,b,c,d)		\
+	.word		(F3F(2, 0x19, 0x00c)|RS1(a)|RS2(b)|RS3(c)|RD(d));
+#define CAMELLIA_FL(a,b,c)		\
+	.word		(F3F(2, 0x36, 0x13c)|RS1(a)|RS2(b)|RD(c));
+#define CAMELLIA_FLI(a,b,c)		\
+	.word		(F3F(2, 0x36, 0x13d)|RS1(a)|RS2(b)|RD(c));
+
+#define MOVDTOX_F0_O4		\
+	.word	0x99b02200
+#define MOVDTOX_F2_O5		\
+	.word	0x9bb02202
+#define MOVXTOD_G1_F60 		\
+	.word	0xbbb02301
+#define MOVXTOD_G1_F62 		\
+	.word	0xbfb02301
+#define MOVXTOD_G3_F4		\
+	.word	0x89b02303;
+#define MOVXTOD_G7_F6		\
+	.word	0x8db02307;
+#define MOVXTOD_G3_F0		\
+	.word	0x81b02303;
+#define MOVXTOD_G7_F2		\
+	.word	0x85b02307;
+#define MOVXTOD_O0_F0		\
+	.word	0x81b02308;
+#define MOVXTOD_O5_F0		\
+	.word	0x81b0230d;
+#define MOVXTOD_O5_F2		\
+	.word	0x85b0230d;
+#define MOVXTOD_O5_F4		\
+	.word	0x89b0230d;
+#define MOVXTOD_O5_F6		\
+	.word	0x8db0230d;
+#define MOVXTOD_G3_F60		\
+	.word	0xbbb02303;
+#define MOVXTOD_G7_F62		\
+	.word	0xbfb02307;
+
+#endif /* _OPCODES_H */
diff --git a/arch/sparc/crypto/sha1_asm.S b/arch/sparc/crypto/sha1_asm.S
new file mode 100644
index 0000000..7d8bf35
--- /dev/null
+++ b/arch/sparc/crypto/sha1_asm.S
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(sha1_sparc64_transform)
+	/* %o0 = digest, %o1 = data, %o2 = rounds */
+	VISEntryHalf
+	ld	[%o0 + 0x00], %f0
+	ld	[%o0 + 0x04], %f1
+	ld	[%o0 + 0x08], %f2
+	andcc	%o1, 0x7, %g0
+	ld	[%o0 + 0x0c], %f3
+	bne,pn	%xcc, 10f
+	 ld	[%o0 + 0x10], %f4
+
+1:
+	ldd	[%o1 + 0x00], %f8
+	ldd	[%o1 + 0x08], %f10
+	ldd	[%o1 + 0x10], %f12
+	ldd	[%o1 + 0x18], %f14
+	ldd	[%o1 + 0x20], %f16
+	ldd	[%o1 + 0x28], %f18
+	ldd	[%o1 + 0x30], %f20
+	ldd	[%o1 + 0x38], %f22
+
+	SHA1
+
+	subcc	%o2, 1, %o2
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x40, %o1
+
+5:
+	st	%f0, [%o0 + 0x00]
+	st	%f1, [%o0 + 0x04]
+	st	%f2, [%o0 + 0x08]
+	st	%f3, [%o0 + 0x0c]
+	st	%f4, [%o0 + 0x10]
+	retl
+	 VISExitHalf
+10:
+	alignaddr %o1, %g0, %o1
+
+	ldd	[%o1 + 0x00], %f10
+1:
+	ldd	[%o1 + 0x08], %f12
+	ldd	[%o1 + 0x10], %f14
+	ldd	[%o1 + 0x18], %f16
+	ldd	[%o1 + 0x20], %f18
+	ldd	[%o1 + 0x28], %f20
+	ldd	[%o1 + 0x30], %f22
+	ldd	[%o1 + 0x38], %f24
+	ldd	[%o1 + 0x40], %f26
+
+	faligndata %f10, %f12, %f8
+	faligndata %f12, %f14, %f10
+	faligndata %f14, %f16, %f12
+	faligndata %f16, %f18, %f14
+	faligndata %f18, %f20, %f16
+	faligndata %f20, %f22, %f18
+	faligndata %f22, %f24, %f20
+	faligndata %f24, %f26, %f22
+
+	SHA1
+
+	subcc	%o2, 1, %o2
+	fsrc2	%f26, %f10
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x40, %o1
+
+	ba,a,pt	%xcc, 5b
+ENDPROC(sha1_sparc64_transform)
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
new file mode 100644
index 0000000..4d6d7fa
--- /dev/null
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -0,0 +1,184 @@
+/* Glue code for SHA1 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon arch/x86/crypto/sha1_ssse3_glue.c
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ * Copyright (c) Mathias Krause <minipli@googlemail.com>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void sha1_sparc64_transform(u32 *digest, const char *data,
+				       unsigned int rounds);
+
+static int sha1_sparc64_init(struct shash_desc *desc)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
+	};
+
+	return 0;
+}
+
+static void __sha1_sparc64_update(struct sha1_state *sctx, const u8 *data,
+				  unsigned int len, unsigned int partial)
+{
+	unsigned int done = 0;
+
+	sctx->count += len;
+	if (partial) {
+		done = SHA1_BLOCK_SIZE - partial;
+		memcpy(sctx->buffer + partial, data, done);
+		sha1_sparc64_transform(sctx->state, sctx->buffer, 1);
+	}
+	if (len - done >= SHA1_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
+
+		sha1_sparc64_transform(sctx->state, data + done, rounds);
+		done += rounds * SHA1_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buffer, data + done, len - done);
+}
+
+static int sha1_sparc64_update(struct shash_desc *desc, const u8 *data,
+			       unsigned int len)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA1_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buffer + partial, data, len);
+	} else
+		__sha1_sparc64_update(sctx, data, len, partial);
+
+	return 0;
+}
+
+/* Add padding and return the message digest. */
+static int sha1_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA1_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
+
+	/* We need to fill a whole block for __sha1_sparc64_update() */
+	if (padlen <= 56) {
+		sctx->count += padlen;
+		memcpy(sctx->buffer + index, padding, padlen);
+	} else {
+		__sha1_sparc64_update(sctx, padding, padlen, index);
+	}
+	__sha1_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+
+	/* Store state in digest */
+	for (i = 0; i < 5; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_sparc64_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha1_sparc64_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.digestsize	=	SHA1_DIGEST_SIZE,
+	.init		=	sha1_sparc64_init,
+	.update		=	sha1_sparc64_update,
+	.final		=	sha1_sparc64_final,
+	.export		=	sha1_sparc64_export,
+	.import		=	sha1_sparc64_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
+	.base		=	{
+		.cra_name	=	"sha1",
+		.cra_driver_name=	"sha1-sparc64",
+		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_blocksize	=	SHA1_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static bool __init sparc64_has_sha1_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_SHA1))
+		return false;
+
+	return true;
+}
+
+static int __init sha1_sparc64_mod_init(void)
+{
+	if (sparc64_has_sha1_opcode()) {
+		pr_info("Using sparc64 sha1 opcode optimized SHA-1 implementation\n");
+		return crypto_register_shash(&alg);
+	}
+	pr_info("sparc64 sha1 opcode not available.\n");
+	return -ENODEV;
+}
+
+static void __exit sha1_sparc64_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_sparc64_mod_init);
+module_exit(sha1_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("sha1");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha256_asm.S b/arch/sparc/crypto/sha256_asm.S
new file mode 100644
index 0000000..0b39ec7
--- /dev/null
+++ b/arch/sparc/crypto/sha256_asm.S
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(sha256_sparc64_transform)
+	/* %o0 = digest, %o1 = data, %o2 = rounds */
+	VISEntryHalf
+	ld	[%o0 + 0x00], %f0
+	ld	[%o0 + 0x04], %f1
+	ld	[%o0 + 0x08], %f2
+	ld	[%o0 + 0x0c], %f3
+	ld	[%o0 + 0x10], %f4
+	ld	[%o0 + 0x14], %f5
+	andcc	%o1, 0x7, %g0
+	ld	[%o0 + 0x18], %f6
+	bne,pn	%xcc, 10f
+	 ld	[%o0 + 0x1c], %f7
+
+1:
+	ldd	[%o1 + 0x00], %f8
+	ldd	[%o1 + 0x08], %f10
+	ldd	[%o1 + 0x10], %f12
+	ldd	[%o1 + 0x18], %f14
+	ldd	[%o1 + 0x20], %f16
+	ldd	[%o1 + 0x28], %f18
+	ldd	[%o1 + 0x30], %f20
+	ldd	[%o1 + 0x38], %f22
+
+	SHA256
+
+	subcc	%o2, 1, %o2
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x40, %o1
+
+5:
+	st	%f0, [%o0 + 0x00]
+	st	%f1, [%o0 + 0x04]
+	st	%f2, [%o0 + 0x08]
+	st	%f3, [%o0 + 0x0c]
+	st	%f4, [%o0 + 0x10]
+	st	%f5, [%o0 + 0x14]
+	st	%f6, [%o0 + 0x18]
+	st	%f7, [%o0 + 0x1c]
+	retl
+	 VISExitHalf
+10:
+	alignaddr %o1, %g0, %o1
+
+	ldd	[%o1 + 0x00], %f10
+1:
+	ldd	[%o1 + 0x08], %f12
+	ldd	[%o1 + 0x10], %f14
+	ldd	[%o1 + 0x18], %f16
+	ldd	[%o1 + 0x20], %f18
+	ldd	[%o1 + 0x28], %f20
+	ldd	[%o1 + 0x30], %f22
+	ldd	[%o1 + 0x38], %f24
+	ldd	[%o1 + 0x40], %f26
+
+	faligndata %f10, %f12, %f8
+	faligndata %f12, %f14, %f10
+	faligndata %f14, %f16, %f12
+	faligndata %f16, %f18, %f14
+	faligndata %f18, %f20, %f16
+	faligndata %f20, %f22, %f18
+	faligndata %f22, %f24, %f20
+	faligndata %f24, %f26, %f22
+
+	SHA256
+
+	subcc	%o2, 1, %o2
+	fsrc2	%f26, %f10
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x40, %o1
+
+	ba,a,pt	%xcc, 5b
+ENDPROC(sha256_sparc64_transform)
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
new file mode 100644
index 0000000..54c4de2
--- /dev/null
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -0,0 +1,241 @@
+/* Glue code for SHA256 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon crypto/sha256_generic.c
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void sha256_sparc64_transform(u32 *digest, const char *data,
+					 unsigned int rounds);
+
+static int sha224_sparc64_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static int sha256_sparc64_init(struct shash_desc *desc)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+static void __sha256_sparc64_update(struct sha256_state *sctx, const u8 *data,
+				    unsigned int len, unsigned int partial)
+{
+	unsigned int done = 0;
+
+	sctx->count += len;
+	if (partial) {
+		done = SHA256_BLOCK_SIZE - partial;
+		memcpy(sctx->buf + partial, data, done);
+		sha256_sparc64_transform(sctx->state, sctx->buf, 1);
+	}
+	if (len - done >= SHA256_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
+
+		sha256_sparc64_transform(sctx->state, data + done, rounds);
+		done += rounds * SHA256_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buf, data + done, len - done);
+}
+
+static int sha256_sparc64_update(struct shash_desc *desc, const u8 *data,
+				 unsigned int len)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA256_BLOCK_SIZE) {
+		sctx->count += len;
+		memcpy(sctx->buf + partial, data, len);
+	} else
+		__sha256_sparc64_update(sctx, data, len, partial);
+
+	return 0;
+}
+
+static int sha256_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
+
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64 and append length */
+	index = sctx->count % SHA256_BLOCK_SIZE;
+	padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56) - index);
+
+	/* We need to fill a whole block for __sha256_sparc64_update() */
+	if (padlen <= 56) {
+		sctx->count += padlen;
+		memcpy(sctx->buf + index, padding, padlen);
+	} else {
+		__sha256_sparc64_update(sctx, padding, padlen, index);
+	}
+	__sha256_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha224_sparc64_final(struct shash_desc *desc, u8 *hash)
+{
+	u8 D[SHA256_DIGEST_SIZE];
+
+	sha256_sparc64_final(desc, D);
+
+	memcpy(hash, D, SHA224_DIGEST_SIZE);
+	memzero_explicit(D, SHA256_DIGEST_SIZE);
+
+	return 0;
+}
+
+static int sha256_sparc64_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
+static struct shash_alg sha256 = {
+	.digestsize	=	SHA256_DIGEST_SIZE,
+	.init		=	sha256_sparc64_init,
+	.update		=	sha256_sparc64_update,
+	.final		=	sha256_sparc64_final,
+	.export		=	sha256_sparc64_export,
+	.import		=	sha256_sparc64_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha256",
+		.cra_driver_name=	"sha256-sparc64",
+		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_blocksize	=	SHA256_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static struct shash_alg sha224 = {
+	.digestsize	=	SHA224_DIGEST_SIZE,
+	.init		=	sha224_sparc64_init,
+	.update		=	sha256_sparc64_update,
+	.final		=	sha224_sparc64_final,
+	.descsize	=	sizeof(struct sha256_state),
+	.base		=	{
+		.cra_name	=	"sha224",
+		.cra_driver_name=	"sha224-sparc64",
+		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_blocksize	=	SHA224_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static bool __init sparc64_has_sha256_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_SHA256))
+		return false;
+
+	return true;
+}
+
+static int __init sha256_sparc64_mod_init(void)
+{
+	if (sparc64_has_sha256_opcode()) {
+		int ret = crypto_register_shash(&sha224);
+		if (ret < 0)
+			return ret;
+
+		ret = crypto_register_shash(&sha256);
+		if (ret < 0) {
+			crypto_unregister_shash(&sha224);
+			return ret;
+		}
+
+		pr_info("Using sparc64 sha256 opcode optimized SHA-256/SHA-224 implementation\n");
+		return 0;
+	}
+	pr_info("sparc64 sha256 opcode not available.\n");
+	return -ENODEV;
+}
+
+static void __exit sha256_sparc64_mod_fini(void)
+{
+	crypto_unregister_shash(&sha224);
+	crypto_unregister_shash(&sha256);
+}
+
+module_init(sha256_sparc64_mod_init);
+module_exit(sha256_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha512_asm.S b/arch/sparc/crypto/sha512_asm.S
new file mode 100644
index 0000000..b2f6e67
--- /dev/null
+++ b/arch/sparc/crypto/sha512_asm.S
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+ENTRY(sha512_sparc64_transform)
+	/* %o0 = digest, %o1 = data, %o2 = rounds */
+	VISEntry
+	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	ldd	[%o0 + 0x10], %f4
+	ldd	[%o0 + 0x18], %f6
+	ldd	[%o0 + 0x20], %f8
+	ldd	[%o0 + 0x28], %f10
+	andcc	%o1, 0x7, %g0
+	ldd	[%o0 + 0x30], %f12
+	bne,pn	%xcc, 10f
+	 ldd	[%o0 + 0x38], %f14
+
+1:
+	ldd	[%o1 + 0x00], %f16
+	ldd	[%o1 + 0x08], %f18
+	ldd	[%o1 + 0x10], %f20
+	ldd	[%o1 + 0x18], %f22
+	ldd	[%o1 + 0x20], %f24
+	ldd	[%o1 + 0x28], %f26
+	ldd	[%o1 + 0x30], %f28
+	ldd	[%o1 + 0x38], %f30
+	ldd	[%o1 + 0x40], %f32
+	ldd	[%o1 + 0x48], %f34
+	ldd	[%o1 + 0x50], %f36
+	ldd	[%o1 + 0x58], %f38
+	ldd	[%o1 + 0x60], %f40
+	ldd	[%o1 + 0x68], %f42
+	ldd	[%o1 + 0x70], %f44
+	ldd	[%o1 + 0x78], %f46
+
+	SHA512
+
+	subcc	%o2, 1, %o2
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x80, %o1
+
+5:
+	std	%f0, [%o0 + 0x00]
+	std	%f2, [%o0 + 0x08]
+	std	%f4, [%o0 + 0x10]
+	std	%f6, [%o0 + 0x18]
+	std	%f8, [%o0 + 0x20]
+	std	%f10, [%o0 + 0x28]
+	std	%f12, [%o0 + 0x30]
+	std	%f14, [%o0 + 0x38]
+	retl
+	 VISExit
+10:
+	alignaddr %o1, %g0, %o1
+
+	ldd	[%o1 + 0x00], %f18
+1:
+	ldd	[%o1 + 0x08], %f20
+	ldd	[%o1 + 0x10], %f22
+	ldd	[%o1 + 0x18], %f24
+	ldd	[%o1 + 0x20], %f26
+	ldd	[%o1 + 0x28], %f28
+	ldd	[%o1 + 0x30], %f30
+	ldd	[%o1 + 0x38], %f32
+	ldd	[%o1 + 0x40], %f34
+	ldd	[%o1 + 0x48], %f36
+	ldd	[%o1 + 0x50], %f38
+	ldd	[%o1 + 0x58], %f40
+	ldd	[%o1 + 0x60], %f42
+	ldd	[%o1 + 0x68], %f44
+	ldd	[%o1 + 0x70], %f46
+	ldd	[%o1 + 0x78], %f48
+	ldd	[%o1 + 0x80], %f50
+
+	faligndata %f18, %f20, %f16
+	faligndata %f20, %f22, %f18
+	faligndata %f22, %f24, %f20
+	faligndata %f24, %f26, %f22
+	faligndata %f26, %f28, %f24
+	faligndata %f28, %f30, %f26
+	faligndata %f30, %f32, %f28
+	faligndata %f32, %f34, %f30
+	faligndata %f34, %f36, %f32
+	faligndata %f36, %f38, %f34
+	faligndata %f38, %f40, %f36
+	faligndata %f40, %f42, %f38
+	faligndata %f42, %f44, %f40
+	faligndata %f44, %f46, %f42
+	faligndata %f46, %f48, %f44
+	faligndata %f48, %f50, %f46
+
+	SHA512
+
+	subcc	%o2, 1, %o2
+	fsrc2	%f50, %f18
+	bne,pt	%xcc, 1b
+	 add	%o1, 0x80, %o1
+
+	ba,a,pt	%xcc, 5b
+ENDPROC(sha512_sparc64_transform)
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
new file mode 100644
index 0000000..4c55e97
--- /dev/null
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -0,0 +1,226 @@
+/* Glue code for SHA512 hashing optimized for sparc64 crypto opcodes.
+ *
+ * This is based largely upon crypto/sha512_generic.c
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+#include <asm/pstate.h>
+#include <asm/elf.h>
+
+#include "opcodes.h"
+
+asmlinkage void sha512_sparc64_transform(u64 *digest, const char *data,
+					 unsigned int rounds);
+
+static int sha512_sparc64_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	sctx->state[0] = SHA512_H0;
+	sctx->state[1] = SHA512_H1;
+	sctx->state[2] = SHA512_H2;
+	sctx->state[3] = SHA512_H3;
+	sctx->state[4] = SHA512_H4;
+	sctx->state[5] = SHA512_H5;
+	sctx->state[6] = SHA512_H6;
+	sctx->state[7] = SHA512_H7;
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static int sha384_sparc64_init(struct shash_desc *desc)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	sctx->state[0] = SHA384_H0;
+	sctx->state[1] = SHA384_H1;
+	sctx->state[2] = SHA384_H2;
+	sctx->state[3] = SHA384_H3;
+	sctx->state[4] = SHA384_H4;
+	sctx->state[5] = SHA384_H5;
+	sctx->state[6] = SHA384_H6;
+	sctx->state[7] = SHA384_H7;
+	sctx->count[0] = sctx->count[1] = 0;
+
+	return 0;
+}
+
+static void __sha512_sparc64_update(struct sha512_state *sctx, const u8 *data,
+				    unsigned int len, unsigned int partial)
+{
+	unsigned int done = 0;
+
+	if ((sctx->count[0] += len) < len)
+		sctx->count[1]++;
+	if (partial) {
+		done = SHA512_BLOCK_SIZE - partial;
+		memcpy(sctx->buf + partial, data, done);
+		sha512_sparc64_transform(sctx->state, sctx->buf, 1);
+	}
+	if (len - done >= SHA512_BLOCK_SIZE) {
+		const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+
+		sha512_sparc64_transform(sctx->state, data + done, rounds);
+		done += rounds * SHA512_BLOCK_SIZE;
+	}
+
+	memcpy(sctx->buf, data + done, len - done);
+}
+
+static int sha512_sparc64_update(struct shash_desc *desc, const u8 *data,
+				 unsigned int len)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
+
+	/* Handle the fast case right here */
+	if (partial + len < SHA512_BLOCK_SIZE) {
+		if ((sctx->count[0] += len) < len)
+			sctx->count[1]++;
+		memcpy(sctx->buf + partial, data, len);
+	} else
+		__sha512_sparc64_update(sctx, data, len, partial);
+
+	return 0;
+}
+
+static int sha512_sparc64_final(struct shash_desc *desc, u8 *out)
+{
+	struct sha512_state *sctx = shash_desc_ctx(desc);
+	unsigned int i, index, padlen;
+	__be64 *dst = (__be64 *)out;
+	__be64 bits[2];
+	static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
+
+	/* Save number of bits */
+	bits[1] = cpu_to_be64(sctx->count[0] << 3);
+	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+	/* Pad out to 112 mod 128 and append length */
+	index = sctx->count[0] % SHA512_BLOCK_SIZE;
+	padlen = (index < 112) ? (112 - index) : ((SHA512_BLOCK_SIZE+112) - index);
+
+	/* We need to fill a whole block for __sha512_sparc64_update() */
+	if (padlen <= 112) {
+		if ((sctx->count[0] += padlen) < padlen)
+			sctx->count[1]++;
+		memcpy(sctx->buf + index, padding, padlen);
+	} else {
+		__sha512_sparc64_update(sctx, padding, padlen, index);
+	}
+	__sha512_sparc64_update(sctx, (const u8 *)&bits, sizeof(bits), 112);
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be64(sctx->state[i]);
+
+	/* Wipe context */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
+
+static int sha384_sparc64_final(struct shash_desc *desc, u8 *hash)
+{
+	u8 D[64];
+
+	sha512_sparc64_final(desc, D);
+
+	memcpy(hash, D, 48);
+	memzero_explicit(D, 64);
+
+	return 0;
+}
+
+static struct shash_alg sha512 = {
+	.digestsize	=	SHA512_DIGEST_SIZE,
+	.init		=	sha512_sparc64_init,
+	.update		=	sha512_sparc64_update,
+	.final		=	sha512_sparc64_final,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha512",
+		.cra_driver_name=	"sha512-sparc64",
+		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_blocksize	=	SHA512_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static struct shash_alg sha384 = {
+	.digestsize	=	SHA384_DIGEST_SIZE,
+	.init		=	sha384_sparc64_init,
+	.update		=	sha512_sparc64_update,
+	.final		=	sha384_sparc64_final,
+	.descsize	=	sizeof(struct sha512_state),
+	.base		=	{
+		.cra_name	=	"sha384",
+		.cra_driver_name=	"sha384-sparc64",
+		.cra_priority	=	SPARC_CR_OPCODE_PRIORITY,
+		.cra_blocksize	=	SHA384_BLOCK_SIZE,
+		.cra_module	=	THIS_MODULE,
+	}
+};
+
+static bool __init sparc64_has_sha512_opcode(void)
+{
+	unsigned long cfr;
+
+	if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO))
+		return false;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+	if (!(cfr & CFR_SHA512))
+		return false;
+
+	return true;
+}
+
+static int __init sha512_sparc64_mod_init(void)
+{
+	if (sparc64_has_sha512_opcode()) {
+		int ret = crypto_register_shash(&sha384);
+		if (ret < 0)
+			return ret;
+
+		ret = crypto_register_shash(&sha512);
+		if (ret < 0) {
+			crypto_unregister_shash(&sha384);
+			return ret;
+		}
+
+		pr_info("Using sparc64 sha512 opcode optimized SHA-512/SHA-384 implementation\n");
+		return 0;
+	}
+	pr_info("sparc64 sha512 opcode not available.\n");
+	return -ENODEV;
+}
+
+static void __exit sha512_sparc64_mod_fini(void)
+{
+	crypto_unregister_shash(&sha384);
+	crypto_unregister_shash(&sha512);
+}
+
+module_init(sha512_sparc64_mod_init);
+module_exit(sha512_sparc64_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated");
+
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
+
+#include "crop_devid.c"
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
new file mode 100644
index 0000000..410b263
--- /dev/null
+++ b/arch/sparc/include/asm/Kbuild
@@ -0,0 +1,21 @@
+# User exported sparc header files
+
+
+generic-y += div64.h
+generic-y += emergency-restart.h
+generic-y += exec.h
+generic-y += export.h
+generic-y += irq_regs.h
+generic-y += irq_work.h
+generic-y += linkage.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += mcs_spinlock.h
+generic-y += mm-arch-hooks.h
+generic-y += module.h
+generic-y += msi.h
+generic-y += preempt.h
+generic-y += rwsem.h
+generic-y += serial.h
+generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/asm/adi.h b/arch/sparc/include/asm/adi.h
new file mode 100644
index 0000000..acad0d0
--- /dev/null
+++ b/arch/sparc/include/asm/adi.h
@@ -0,0 +1,6 @@
+#ifndef ___ASM_SPARC_ADI_H
+#define ___ASM_SPARC_ADI_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/adi_64.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/adi_64.h b/arch/sparc/include/asm/adi_64.h
new file mode 100644
index 0000000..85f7a76
--- /dev/null
+++ b/arch/sparc/include/asm/adi_64.h
@@ -0,0 +1,47 @@
+/* adi_64.h: ADI related data structures
+ *
+ * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
+ * Author: Khalid Aziz (khalid.aziz@oracle.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#ifndef __ASM_SPARC64_ADI_H
+#define __ASM_SPARC64_ADI_H
+
+#include <linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+struct adi_caps {
+	__u64 blksz;
+	__u64 nbits;
+	__u64 ue_on_adi;
+};
+
+struct adi_config {
+	bool enabled;
+	struct adi_caps caps;
+};
+
+extern struct adi_config adi_state;
+
+extern void mdesc_adi_init(void);
+
+static inline bool adi_capable(void)
+{
+	return adi_state.enabled;
+}
+
+static inline unsigned long adi_blksize(void)
+{
+	return adi_state.caps.blksz;
+}
+
+static inline unsigned long adi_nbits(void)
+{
+	return adi_state.caps.nbits;
+}
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* !(__ASM_SPARC64_ADI_H) */
diff --git a/arch/sparc/include/asm/agp.h b/arch/sparc/include/asm/agp.h
new file mode 100644
index 0000000..efe0d6a
--- /dev/null
+++ b/arch/sparc/include/asm/agp.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef AGP_H
+#define AGP_H 1
+
+/* dummy for now */
+
+#define map_page_into_agp(page)
+#define unmap_page_from_agp(page)
+#define flush_agp_cache() mb()
+
+/* GATT allocation. Returns/accepts GATT kernel virtual address. */
+#define alloc_gatt_pages(order)		\
+	((char *)__get_free_pages(GFP_KERNEL, (order)))
+#define free_gatt_pages(table, order)	\
+	free_pages((unsigned long)(table), (order))
+
+#endif
diff --git a/arch/sparc/include/asm/apb.h b/arch/sparc/include/asm/apb.h
new file mode 100644
index 0000000..b1dfb1a
--- /dev/null
+++ b/arch/sparc/include/asm/apb.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * apb.h: Advanced PCI Bridge Configuration Registers and Bits
+ *
+ * Copyright (C) 1998  Eddie C. Dost  (ecd@skynet.be)
+ */
+
+#ifndef _SPARC64_APB_H
+#define _SPARC64_APB_H
+
+#define APB_TICK_REGISTER			0xb0
+#define APB_INT_ACK				0xb8
+#define APB_PRIMARY_MASTER_RETRY_LIMIT		0xc0
+#define APB_DMA_ASFR				0xc8
+#define APB_DMA_AFAR				0xd0
+#define APB_PIO_TARGET_RETRY_LIMIT		0xd8
+#define APB_PIO_TARGET_LATENCY_TIMER		0xd9
+#define APB_DMA_TARGET_RETRY_LIMIT		0xda
+#define APB_DMA_TARGET_LATENCY_TIMER		0xdb
+#define APB_SECONDARY_MASTER_RETRY_LIMIT	0xdc
+#define APB_SECONDARY_CONTROL			0xdd
+#define APB_IO_ADDRESS_MAP			0xde
+#define APB_MEM_ADDRESS_MAP			0xdf
+
+#define APB_PCI_CONTROL_LOW			0xe0
+#  define APB_PCI_CTL_LOW_ARB_PARK			(1 << 21)
+#  define APB_PCI_CTL_LOW_ERRINT_EN			(1 << 8)
+
+#define APB_PCI_CONTROL_HIGH			0xe4
+#  define APB_PCI_CTL_HIGH_SERR				(1 << 2)
+#  define APB_PCI_CTL_HIGH_ARBITER_EN			(1 << 0)
+
+#define APB_PIO_ASFR				0xe8
+#define APB_PIO_AFAR				0xf0
+#define APB_DIAG_REGISTER			0xf8
+
+#endif /* !(_SPARC64_APB_H) */
diff --git a/arch/sparc/include/asm/asm-offsets.h b/arch/sparc/include/asm/asm-offsets.h
new file mode 100644
index 0000000..d370ee3
--- /dev/null
+++ b/arch/sparc/include/asm/asm-offsets.h
@@ -0,0 +1 @@
+#include <generated/asm-offsets.h>
diff --git a/arch/sparc/include/asm/asm-prototypes.h b/arch/sparc/include/asm/asm-prototypes.h
new file mode 100644
index 0000000..4987c73
--- /dev/null
+++ b/arch/sparc/include/asm/asm-prototypes.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <asm/xor.h>
+#include <asm/checksum.h>
+#include <asm/trap_block.h>
+#include <linux/uaccess.h>
+#include <asm/atomic.h>
+#include <asm/ftrace.h>
+#include <asm/cacheflush.h>
+#include <asm/oplib.h>
+#include <linux/atomic.h>
+
+void *__memscan_zero(void *, size_t);
+void *__memscan_generic(void *, int, size_t);
+void *__bzero(void *, size_t);
+void VISenter(void); /* Dummy prototype to supress warning */
+#undef memcpy
+#undef memset
+void *memcpy(void *dest, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
+typedef int TItype __attribute__((mode(TI)));
+TItype __multi3(TItype a, TItype b);
diff --git a/arch/sparc/include/asm/asm.h b/arch/sparc/include/asm/asm.h
new file mode 100644
index 0000000..eaed011
--- /dev/null
+++ b/arch/sparc/include/asm/asm.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_ASM_H
+#define _SPARC_ASM_H
+
+/* Macros to assist the sharing of assembler code between 32-bit and
+ * 64-bit sparc.
+ */
+
+#ifdef CONFIG_SPARC64
+#define BRANCH32(TYPE, PREDICT, DEST) \
+	TYPE,PREDICT	%icc, DEST
+#define BRANCH32_ANNUL(TYPE, PREDICT, DEST) \
+	TYPE,a,PREDICT	%icc, DEST
+#define BRANCH_REG_ZERO(PREDICT, REG, DEST) \
+	brz,PREDICT	REG, DEST
+#define BRANCH_REG_ZERO_ANNUL(PREDICT, REG, DEST) \
+	brz,a,PREDICT	REG, DEST
+#define BRANCH_REG_NOT_ZERO(PREDICT, REG, DEST) \
+	brnz,PREDICT	REG, DEST
+#define BRANCH_REG_NOT_ZERO_ANNUL(PREDICT, REG, DEST) \
+	brnz,a,PREDICT	REG, DEST
+#else
+#define BRANCH32(TYPE, PREDICT, DEST) \
+	TYPE		DEST
+#define BRANCH32_ANNUL(TYPE, PREDICT, DEST) \
+	TYPE,a		DEST
+#define BRANCH_REG_ZERO(PREDICT, REG, DEST) \
+	cmp		REG, 0; \
+	be		DEST
+#define BRANCH_REG_ZERO_ANNUL(PREDICT, REG, DEST) \
+	cmp		REG, 0; \
+	be,a		DEST
+#define BRANCH_REG_NOT_ZERO(PREDICT, REG, DEST) \
+	cmp		REG, 0; \
+	bne		DEST
+#define BRANCH_REG_NOT_ZERO_ANNUL(PREDICT, REG, DEST) \
+	cmp		REG, 0; \
+	bne,a		DEST
+#endif
+
+#endif /* _SPARC_ASM_H */
diff --git a/arch/sparc/include/asm/asmmacro.h b/arch/sparc/include/asm/asmmacro.h
new file mode 100644
index 0000000..49aaf6f
--- /dev/null
+++ b/arch/sparc/include/asm/asmmacro.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* asmmacro.h: Assembler macros.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caipfs.rutgers.edu)
+ */
+
+#ifndef _SPARC_ASMMACRO_H
+#define _SPARC_ASMMACRO_H
+
+/* All trap entry points _must_ begin with this macro or else you
+ * lose.  It makes sure the kernel has a proper window so that
+ * c-code can be called.
+ */
+#define SAVE_ALL_HEAD \
+	sethi	%hi(trap_setup), %l4; \
+	jmpl	%l4 + %lo(trap_setup), %l6;
+#define SAVE_ALL \
+	SAVE_ALL_HEAD \
+	 nop;
+
+/* All traps low-level code here must end with this macro. */
+#define RESTORE_ALL b ret_trap_entry; clr %l6;
+
+/* Support for run-time patching of single instructions.
+ * This is used to handle the differences in the ASI for
+ * MMUREGS for LEON and SUN.
+ *
+ * Sample:
+ * LEON_PI(lda [%g0] ASI_LEON_MMUREGS, %o0
+ * SUN_PI_(lda [%g0] ASI_M_MMUREGS, %o0
+ * PI == Patch Instruction
+ *
+ * For LEON we will use the first variant,
+ * and for all other we will use the SUN variant.
+ * The order is important.
+ */
+#define LEON_PI(...)				\
+662:	__VA_ARGS__
+
+#define SUN_PI_(...)				\
+	.section .leon_1insn_patch, "ax";	\
+	.word 662b;				\
+	__VA_ARGS__;				\
+	.previous
+
+#endif /* !(_SPARC_ASMMACRO_H) */
diff --git a/arch/sparc/include/asm/atomic.h b/arch/sparc/include/asm/atomic.h
new file mode 100644
index 0000000..425151c
--- /dev/null
+++ b/arch/sparc/include/asm/atomic.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_ATOMIC_H
+#define ___ASM_SPARC_ATOMIC_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/atomic_64.h>
+#else
+#include <asm/atomic_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
new file mode 100644
index 0000000..94c930f
--- /dev/null
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* atomic.h: These still suck, but the I-cache hit rate is higher.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com.au)
+ * Copyright (C) 2007 Kyle McMartin (kyle@parisc-linux.org)
+ *
+ * Additions by Keith M Wesolowski (wesolows@foobazco.org) based
+ * on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>.
+ */
+
+#ifndef __ARCH_SPARC_ATOMIC__
+#define __ARCH_SPARC_ATOMIC__
+
+#include <linux/types.h>
+
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+#include <asm-generic/atomic64.h>
+
+#define ATOMIC_INIT(i)  { (i) }
+
+int atomic_add_return(int, atomic_t *);
+int atomic_fetch_add(int, atomic_t *);
+int atomic_fetch_and(int, atomic_t *);
+int atomic_fetch_or(int, atomic_t *);
+int atomic_fetch_xor(int, atomic_t *);
+int atomic_cmpxchg(atomic_t *, int, int);
+int atomic_xchg(atomic_t *, int);
+int atomic_fetch_add_unless(atomic_t *, int, int);
+void atomic_set(atomic_t *, int);
+
+#define atomic_fetch_add_unless	atomic_fetch_add_unless
+
+#define atomic_set_release(v, i)	atomic_set((v), (i))
+
+#define atomic_read(v)          READ_ONCE((v)->counter)
+
+#define atomic_add(i, v)	((void)atomic_add_return( (int)(i), (v)))
+#define atomic_sub(i, v)	((void)atomic_add_return(-(int)(i), (v)))
+
+#define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
+#define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
+#define atomic_xor(i, v)	((void)atomic_fetch_xor((i), (v)))
+
+#define atomic_sub_return(i, v)	(atomic_add_return(-(int)(i), (v)))
+#define atomic_fetch_sub(i, v)  (atomic_fetch_add (-(int)(i), (v)))
+
+#endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
new file mode 100644
index 0000000..6963482
--- /dev/null
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* atomic.h: Thankfully the V9 is at least reasonable for this
+ *           stuff.
+ *
+ * Copyright (C) 1996, 1997, 2000, 2012 David S. Miller (davem@redhat.com)
+ */
+
+#ifndef __ARCH_SPARC64_ATOMIC__
+#define __ARCH_SPARC64_ATOMIC__
+
+#include <linux/types.h>
+#include <asm/cmpxchg.h>
+#include <asm/barrier.h>
+
+#define ATOMIC_INIT(i)		{ (i) }
+#define ATOMIC64_INIT(i)	{ (i) }
+
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
+
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
+#define atomic64_set(v, i)	WRITE_ONCE(((v)->counter), (i))
+
+#define ATOMIC_OP(op)							\
+void atomic_##op(int, atomic_t *);					\
+void atomic64_##op(long, atomic64_t *);
+
+#define ATOMIC_OP_RETURN(op)						\
+int atomic_##op##_return(int, atomic_t *);				\
+long atomic64_##op##_return(long, atomic64_t *);
+
+#define ATOMIC_FETCH_OP(op)						\
+int atomic_fetch_##op(int, atomic_t *);					\
+long atomic64_fetch_##op(long, atomic64_t *);
+
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(add)
+ATOMIC_OPS(sub)
+
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
+
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
+
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+	return xchg(&v->counter, new);
+}
+
+#define atomic64_cmpxchg(v, o, n) \
+	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
+#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+
+long atomic64_dec_if_positive(atomic64_t *v);
+#define atomic64_dec_if_positive atomic64_dec_if_positive
+
+#endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/sparc/include/asm/auxio.h b/arch/sparc/include/asm/auxio.h
new file mode 100644
index 0000000..a268105
--- /dev/null
+++ b/arch/sparc/include/asm/auxio.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_AUXIO_H
+#define ___ASM_SPARC_AUXIO_H
+
+#ifndef __ASSEMBLY__
+
+extern void __iomem *auxio_register;
+
+#endif /* ifndef __ASSEMBLY__ */
+
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/auxio_64.h>
+#else
+#include <asm/auxio_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/auxio_32.h b/arch/sparc/include/asm/auxio_32.h
new file mode 100644
index 0000000..852457c
--- /dev/null
+++ b/arch/sparc/include/asm/auxio_32.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * auxio.h:  Definitions and code for the Auxiliary I/O register.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef _SPARC_AUXIO_H
+#define _SPARC_AUXIO_H
+
+#include <asm/vaddrs.h>
+
+/* This register is an unsigned char in IO space.  It does two things.
+ * First, it is used to control the front panel LED light on machines
+ * that have it (good for testing entry points to trap handlers and irq's)
+ * Secondly, it controls various floppy drive parameters.
+ */
+#define AUXIO_ORMEIN      0xf0    /* All writes must set these bits. */
+#define AUXIO_ORMEIN4M    0xc0    /* sun4m - All writes must set these bits. */
+#define AUXIO_FLPY_DENS   0x20    /* Floppy density, high if set. Read only. */
+#define AUXIO_FLPY_DCHG   0x10    /* A disk change occurred.  Read only. */
+#define AUXIO_EDGE_ON     0x10    /* sun4m - On means Jumper block is in. */
+#define AUXIO_FLPY_DSEL   0x08    /* Drive select/start-motor. Write only. */
+#define AUXIO_LINK_TEST   0x08    /* sun4m - On means TPE Carrier detect. */
+
+/* Set the following to one, then zero, after doing a pseudo DMA transfer. */
+#define AUXIO_FLPY_TCNT   0x04    /* Floppy terminal count. Write only. */
+
+/* Set the following to zero to eject the floppy. */
+#define AUXIO_FLPY_EJCT   0x02    /* Eject floppy disk.  Write only. */
+#define AUXIO_LED         0x01    /* On if set, off if unset. Read/Write */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * NOTE: these routines are implementation dependent--
+ * understand the hardware you are querying!
+ */
+void set_auxio(unsigned char bits_on, unsigned char bits_off);
+unsigned char get_auxio(void); /* .../asm/floppy.h */
+
+/*
+ * The following routines are provided for driver-compatibility
+ * with sparc64 (primarily sunlance.c)
+ */
+
+#define AUXIO_LTE_ON    1
+#define AUXIO_LTE_OFF   0
+
+/* auxio_set_lte - Set Link Test Enable (TPE Link Detect)
+ *
+ * on - AUXIO_LTE_ON or AUXIO_LTE_OFF
+ */
+#define auxio_set_lte(on) \
+do { \
+	if(on) { \
+		set_auxio(AUXIO_LINK_TEST, 0); \
+	} else { \
+		set_auxio(0, AUXIO_LINK_TEST); \
+	} \
+} while (0)
+
+#define AUXIO_LED_ON    1
+#define AUXIO_LED_OFF   0
+
+/* auxio_set_led - Set system front panel LED
+ *
+ * on - AUXIO_LED_ON or AUXIO_LED_OFF
+ */
+#define auxio_set_led(on) \
+do { \
+	if(on) { \
+		set_auxio(AUXIO_LED, 0); \
+	} else { \
+		set_auxio(0, AUXIO_LED); \
+	} \
+} while (0)
+
+#endif /* !(__ASSEMBLY__) */
+
+
+/* AUXIO2 (Power Off Control) */
+extern volatile u8 __iomem *auxio_power_register;
+
+#define	AUXIO_POWER_DETECT_FAILURE	32
+#define	AUXIO_POWER_CLEAR_FAILURE	2
+#define	AUXIO_POWER_OFF			1
+
+
+#endif /* !(_SPARC_AUXIO_H) */
diff --git a/arch/sparc/include/asm/auxio_64.h b/arch/sparc/include/asm/auxio_64.h
new file mode 100644
index 0000000..ae1ed41
--- /dev/null
+++ b/arch/sparc/include/asm/auxio_64.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * auxio.h:  Definitions and code for the Auxiliary I/O registers.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * Refactoring for unified NCR/PCIO support 2002 Eric Brower (ebrower@usa.net)
+ */
+#ifndef _SPARC64_AUXIO_H
+#define _SPARC64_AUXIO_H
+
+/* AUXIO implementations:
+ * sbus-based NCR89C105 "Slavio"
+ *	LED/Floppy (AUX1) register
+ *	Power (AUX2) register
+ *
+ * ebus-based auxio on PCIO
+ *	LED Auxio Register
+ *	Power Auxio Register
+ *
+ * Register definitions from NCR _NCR89C105 Chip Specification_
+ *
+ * SLAVIO AUX1 @ 0x1900000
+ * -------------------------------------------------
+ * | (R) | (R) |  D  | (R) |  E  |  M  |  T  |  L  |
+ * -------------------------------------------------
+ * (R) - bit 7:6,4 are reserved and should be masked in s/w
+ *  D  - Floppy Density Sense (1=high density) R/O
+ *  E  - Link Test Enable, directly reflected on AT&T 7213 LTE pin
+ *  M  - Monitor/Mouse Mux, directly reflected on MON_MSE_MUX pin
+ *  T  - Terminal Count: sends TC pulse to 82077 floppy controller
+ *  L  - System LED on front panel (0=off, 1=on)
+ */
+#define AUXIO_AUX1_MASK		0xc0 /* Mask bits 		*/
+#define AUXIO_AUX1_FDENS	0x20 /* Floppy Density Sense	*/
+#define AUXIO_AUX1_LTE 		0x08 /* Link Test Enable 	*/
+#define AUXIO_AUX1_MMUX		0x04 /* Monitor/Mouse Mux	*/
+#define AUXIO_AUX1_FTCNT	0x02 /* Terminal Count, 	*/
+#define AUXIO_AUX1_LED		0x01 /* System LED		*/
+
+/* SLAVIO AUX2 @ 0x1910000
+ * -------------------------------------------------
+ * | (R) | (R) |  D  | (R) | (R) | (R) |  C  |  F  |
+ * -------------------------------------------------
+ * (R) - bits 7:6,4:2 are reserved and should be masked in s/w
+ *  D  - Power Failure Detect (1=power fail)
+ *  C  - Clear Power Failure Detect Int (1=clear)
+ *  F  - Power Off (1=power off)
+ */
+#define AUXIO_AUX2_MASK		0xdc /* Mask Bits		*/
+#define AUXIO_AUX2_PFAILDET	0x20 /* Power Fail Detect	*/
+#define AUXIO_AUX2_PFAILCLR 	0x02 /* Clear Pwr Fail Det Intr	*/
+#define AUXIO_AUX2_PWR_OFF	0x01 /* Power Off		*/
+
+/* Register definitions from Sun Microsystems _PCIO_ p/n 802-7837
+ *
+ * PCIO LED Auxio @ 0x726000
+ * -------------------------------------------------
+ * |             31:1 Unused                 | LED |
+ * -------------------------------------------------
+ * Bits 31:1 unused
+ * LED - System LED on front panel (0=off, 1=on)
+ */
+#define AUXIO_PCIO_LED		0x01 /* System LED 		*/
+
+/* PCIO Power Auxio @ 0x724000
+ * -------------------------------------------------
+ * |             31:2 Unused           | CPO | SPO |
+ * -------------------------------------------------
+ * Bits 31:2 unused
+ * CPO - Courtesy Power Off (1=off)
+ * SPO - System Power Off   (1=off)
+ */
+#define AUXIO_PCIO_CPWR_OFF	0x02 /* Courtesy Power Off	*/
+#define AUXIO_PCIO_SPWR_OFF	0x01 /* System Power Off	*/
+
+#ifndef __ASSEMBLY__
+
+#define AUXIO_LTE_ON	1
+#define AUXIO_LTE_OFF	0
+
+/* auxio_set_lte - Set Link Test Enable (TPE Link Detect)
+ *
+ * on - AUXIO_LTE_ON or AUXIO_LTE_OFF
+ */
+void auxio_set_lte(int on);
+
+#define AUXIO_LED_ON	1
+#define AUXIO_LED_OFF	0
+
+/* auxio_set_led - Set system front panel LED
+ *
+ * on - AUXIO_LED_ON or AUXIO_LED_OFF
+ */
+void auxio_set_led(int on);
+
+#endif /* ifndef __ASSEMBLY__ */
+
+#endif /* !(_SPARC64_AUXIO_H) */
diff --git a/arch/sparc/include/asm/backoff.h b/arch/sparc/include/asm/backoff.h
new file mode 100644
index 0000000..8625946
--- /dev/null
+++ b/arch/sparc/include/asm/backoff.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_BACKOFF_H
+#define _SPARC64_BACKOFF_H
+
+/* The macros in this file implement an exponential backoff facility
+ * for atomic operations.
+ *
+ * When multiple threads compete on an atomic operation, it is
+ * possible for one thread to be continually denied a successful
+ * completion of the compare-and-swap instruction.  Heavily
+ * threaded cpu implementations like Niagara can compound this
+ * problem even further.
+ *
+ * When an atomic operation fails and needs to be retried, we spin a
+ * certain number of times.  At each subsequent failure of the same
+ * operation we double the spin count, realizing an exponential
+ * backoff.
+ *
+ * When we spin, we try to use an operation that will cause the
+ * current cpu strand to block, and therefore make the core fully
+ * available to any other other runnable strands.  There are two
+ * options, based upon cpu capabilities.
+ *
+ * On all cpus prior to SPARC-T4 we do three dummy reads of the
+ * condition code register.  Each read blocks the strand for something
+ * between 40 and 50 cpu cycles.
+ *
+ * For SPARC-T4 and later we have a special "pause" instruction
+ * available.  This is implemented using writes to register %asr27.
+ * The cpu will block the number of cycles written into the register,
+ * unless a disrupting trap happens first.  SPARC-T4 specifically
+ * implements pause with a granularity of 8 cycles.  Each strand has
+ * an internal pause counter which decrements every 8 cycles.  So the
+ * chip shifts the %asr27 value down by 3 bits, and writes the result
+ * into the pause counter.  If a value smaller than 8 is written, the
+ * chip blocks for 1 cycle.
+ *
+ * To achieve the same amount of backoff as the three %ccr reads give
+ * on earlier chips, we shift the backoff value up by 7 bits.  (Three
+ * %ccr reads block for about 128 cycles, 1 << 7 == 128) We write the
+ * whole amount we want to block into the pause register, rather than
+ * loop writing 128 each time.
+ */
+
+#define BACKOFF_LIMIT	(4 * 1024)
+
+#ifdef CONFIG_SMP
+
+#define BACKOFF_SETUP(reg)	\
+	mov	1, reg
+
+#define BACKOFF_LABEL(spin_label, continue_label) \
+	spin_label
+
+#define BACKOFF_SPIN(reg, tmp, label)		\
+	mov		reg, tmp;		\
+88:	rd		%ccr, %g0;		\
+	rd		%ccr, %g0;		\
+	rd		%ccr, %g0;		\
+	.section	.pause_3insn_patch,"ax";\
+	.word		88b;			\
+	sllx		tmp, 7, tmp;		\
+	wr		tmp, 0, %asr27;		\
+	clr		tmp;			\
+	.previous;				\
+	brnz,pt		tmp, 88b;		\
+	 sub		tmp, 1, tmp;		\
+	set		BACKOFF_LIMIT, tmp;	\
+	cmp		reg, tmp;		\
+	bg,pn		%xcc, label;		\
+	 nop;					\
+	ba,pt		%xcc, label;		\
+	 sllx		reg, 1, reg;
+
+#else
+
+#define BACKOFF_SETUP(reg)
+
+#define BACKOFF_LABEL(spin_label, continue_label) \
+	continue_label
+
+#define BACKOFF_SPIN(reg, tmp, label)
+
+#endif
+
+#endif /* _SPARC64_BACKOFF_H */
diff --git a/arch/sparc/include/asm/barrier.h b/arch/sparc/include/asm/barrier.h
new file mode 100644
index 0000000..c10b416
--- /dev/null
+++ b/arch/sparc/include/asm/barrier.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_BARRIER_H
+#define ___ASM_SPARC_BARRIER_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/barrier_64.h>
+#else
+#include <asm/barrier_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/barrier_32.h b/arch/sparc/include/asm/barrier_32.h
new file mode 100644
index 0000000..304f9c3
--- /dev/null
+++ b/arch/sparc/include/asm/barrier_32.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_BARRIER_H
+#define __SPARC_BARRIER_H
+
+#include <asm-generic/barrier.h>
+
+#endif /* !(__SPARC_BARRIER_H) */
diff --git a/arch/sparc/include/asm/barrier_64.h b/arch/sparc/include/asm/barrier_64.h
new file mode 100644
index 0000000..9fb148b
--- /dev/null
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC64_BARRIER_H
+#define __SPARC64_BARRIER_H
+
+/* These are here in an effort to more fully work around Spitfire Errata
+ * #51.  Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ *	call	sym_ccb_from_dsa, 0
+ *	 movge	%icc, 0, %l0
+ *	brz,pn	%o0, .LL1303
+ *	 mov	%o0, %l2
+ *	membar	#LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur.  Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
+			     " membar	" type "\n" \
+			     "1:\n" \
+			     : : : "memory"); \
+} while (0)
+
+/* The kernel always executes in TSO memory model these days,
+ * and furthermore most sparc64 chips implement more stringent
+ * memory ordering than required by the specifications.
+ */
+#define mb()	membar_safe("#StoreLoad")
+#define rmb()	__asm__ __volatile__("":::"memory")
+#define wmb()	__asm__ __volatile__("":::"memory")
+
+#define __smp_store_release(p, v)						\
+do {									\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	WRITE_ONCE(*p, v);						\
+} while (0)
+
+#define __smp_load_acquire(p)						\
+({									\
+	typeof(*p) ___p1 = READ_ONCE(*p);				\
+	compiletime_assert_atomic_type(*p);				\
+	barrier();							\
+	___p1;								\
+})
+
+#define __smp_mb__before_atomic()	barrier()
+#define __smp_mb__after_atomic()	barrier()
+
+#include <asm-generic/barrier.h>
+
+#endif /* !(__SPARC64_BARRIER_H) */
diff --git a/arch/sparc/include/asm/bbc.h b/arch/sparc/include/asm/bbc.h
new file mode 100644
index 0000000..00a4f11
--- /dev/null
+++ b/arch/sparc/include/asm/bbc.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * bbc.h: Defines for BootBus Controller found on UltraSPARC-III
+ *        systems.
+ *
+ * Copyright (C) 2000 David S. Miller (davem@redhat.com)
+ */
+
+#ifndef _SPARC64_BBC_H
+#define _SPARC64_BBC_H
+
+/* Register sizes are indicated by "B" (Byte, 1-byte),
+ * "H" (Half-word, 2 bytes), "W" (Word, 4 bytes) or
+ * "Q" (Quad, 8 bytes) inside brackets.
+ */
+
+#define BBC_AID		0x00	/* [B] Agent ID			*/
+#define BBC_DEVP	0x01	/* [B] Device Present		*/
+#define BBC_ARB		0x02	/* [B] Arbitration		*/
+#define BBC_QUIESCE	0x03	/* [B] Quiesce			*/
+#define BBC_WDACTION	0x04	/* [B] Watchdog Action		*/
+#define BBC_SPG		0x06	/* [B] Soft POR Gen		*/
+#define BBC_SXG		0x07	/* [B] Soft XIR Gen		*/
+#define BBC_PSRC	0x08	/* [W] POR Source		*/
+#define BBC_XSRC	0x0c	/* [B] XIR Source		*/
+#define BBC_CSC		0x0d	/* [B] Clock Synthesizers Control*/
+#define BBC_ES_CTRL	0x0e	/* [H] Energy Star Control	*/
+#define BBC_ES_ACT	0x10	/* [W] E* Assert Change Time	*/
+#define BBC_ES_DACT	0x14	/* [B] E* De-Assert Change Time	*/
+#define BBC_ES_DABT	0x15	/* [B] E* De-Assert Bypass Time	*/
+#define BBC_ES_ABT	0x16	/* [H] E* Assert Bypass Time	*/
+#define BBC_ES_PST	0x18	/* [W] E* PLL Settle Time	*/
+#define BBC_ES_FSL	0x1c	/* [W] E* Frequency Switch Latency*/
+#define BBC_EBUST	0x20	/* [Q] EBUS Timing		*/
+#define BBC_JTAG_CMD	0x28	/* [W] JTAG+ Command		*/
+#define BBC_JTAG_CTRL	0x2c	/* [B] JTAG+ Control		*/
+#define BBC_I2C_SEL	0x2d	/* [B] I2C Selection		*/
+#define BBC_I2C_0_S1	0x2e	/* [B] I2C ctrlr-0 reg S1	*/
+#define BBC_I2C_0_S0	0x2f	/* [B] I2C ctrlr-0 regs S0,S0',S2,S3*/
+#define BBC_I2C_1_S1	0x30	/* [B] I2C ctrlr-1 reg S1	*/
+#define BBC_I2C_1_S0	0x31	/* [B] I2C ctrlr-1 regs S0,S0',S2,S3*/
+#define BBC_KBD_BEEP	0x32	/* [B] Keyboard Beep		*/
+#define BBC_KBD_BCNT	0x34	/* [W] Keyboard Beep Counter	*/
+
+#define BBC_REGS_SIZE	0x40
+
+/* There is a 2K scratch ram area at offset 0x80000 but I doubt
+ * we will use it for anything.
+ */
+
+/* Agent ID register.  This register shows the Safari Agent ID
+ * for the processors.  The value returned depends upon which
+ * cpu is reading the register.
+ */
+#define BBC_AID_ID	0x07	/* Safari ID		*/
+#define BBC_AID_RESV	0xf8	/* Reserved		*/
+
+/* Device Present register.  One can determine which cpus are actually
+ * present in the machine by interrogating this register.
+ */
+#define BBC_DEVP_CPU0	0x01	/* Processor 0 present	*/
+#define BBC_DEVP_CPU1	0x02	/* Processor 1 present	*/
+#define BBC_DEVP_CPU2	0x04	/* Processor 2 present	*/
+#define BBC_DEVP_CPU3	0x08	/* Processor 3 present	*/
+#define BBC_DEVP_RESV	0xf0	/* Reserved		*/
+
+/* Arbitration register.  This register is used to block access to
+ * the BBC from a particular cpu.
+ */
+#define BBC_ARB_CPU0	0x01	/* Enable cpu 0 BBC arbitratrion */
+#define BBC_ARB_CPU1	0x02	/* Enable cpu 1 BBC arbitratrion */
+#define BBC_ARB_CPU2	0x04	/* Enable cpu 2 BBC arbitratrion */
+#define BBC_ARB_CPU3	0x08	/* Enable cpu 3 BBC arbitratrion */
+#define BBC_ARB_RESV	0xf0	/* Reserved			 */
+
+/* Quiesce register.  Bus and BBC segments for cpus can be disabled
+ * with this register, ie. for hot plugging.
+ */
+#define BBC_QUIESCE_S02	0x01	/* Quiesce Safari segment for cpu 0 and 2 */
+#define BBC_QUIESCE_S13	0x02	/* Quiesce Safari segment for cpu 1 and 3 */
+#define BBC_QUIESCE_B02	0x04	/* Quiesce BBC segment for cpu 0 and 2    */
+#define BBC_QUIESCE_B13	0x08	/* Quiesce BBC segment for cpu 1 and 3    */
+#define BBC_QUIESCE_FD0 0x10	/* Disable Fatal_Error[0] reporting	  */
+#define BBC_QUIESCE_FD1 0x20	/* Disable Fatal_Error[1] reporting	  */
+#define BBC_QUIESCE_FD2 0x40	/* Disable Fatal_Error[2] reporting	  */
+#define BBC_QUIESCE_FD3 0x80	/* Disable Fatal_Error[3] reporting	  */
+
+/* Watchdog Action register.  When the watchdog device timer expires
+ * a line is enabled to the BBC.  The action BBC takes when this line
+ * is asserted can be controlled by this regiser.
+ */
+#define BBC_WDACTION_RST  0x01	/* When set, watchdog causes system reset.
+				 * When clear, BBC ignores watchdog signal.
+				 */
+#define BBC_WDACTION_RESV 0xfe	/* Reserved */
+
+/* Soft_POR_GEN register.  The POR (Power On Reset) signal may be asserted
+ * for specific processors or all processors via this register.
+ */
+#define BBC_SPG_CPU0	0x01 /* Assert POR for processor 0	*/
+#define BBC_SPG_CPU1	0x02 /* Assert POR for processor 1	*/
+#define BBC_SPG_CPU2	0x04 /* Assert POR for processor 2	*/
+#define BBC_SPG_CPU3	0x08 /* Assert POR for processor 3	*/
+#define BBC_SPG_CPUALL	0x10 /* Reset all processors and reset
+			      * the entire system.
+			      */
+#define BBC_SPG_RESV	0xe0 /* Reserved			*/
+
+/* Soft_XIR_GEN register.  The XIR (eXternally Initiated Reset) signal
+ * may be asserted to specific processors via this register.
+ */
+#define BBC_SXG_CPU0	0x01 /* Assert XIR for processor 0	*/
+#define BBC_SXG_CPU1	0x02 /* Assert XIR for processor 1	*/
+#define BBC_SXG_CPU2	0x04 /* Assert XIR for processor 2	*/
+#define BBC_SXG_CPU3	0x08 /* Assert XIR for processor 3	*/
+#define BBC_SXG_RESV	0xf0 /* Reserved			*/
+
+/* POR Source register.  One may identify the cause of the most recent
+ * reset by reading this register.
+ */
+#define BBC_PSRC_SPG0	0x0001 /* CPU 0 reset via BBC_SPG register	*/
+#define BBC_PSRC_SPG1	0x0002 /* CPU 1 reset via BBC_SPG register	*/
+#define BBC_PSRC_SPG2	0x0004 /* CPU 2 reset via BBC_SPG register	*/
+#define BBC_PSRC_SPG3	0x0008 /* CPU 3 reset via BBC_SPG register	*/
+#define BBC_PSRC_SPGSYS	0x0010 /* System reset via BBC_SPG register	*/
+#define BBC_PSRC_JTAG	0x0020 /* System reset via JTAG+		*/
+#define BBC_PSRC_BUTTON	0x0040 /* System reset via push-button dongle	*/
+#define BBC_PSRC_PWRUP	0x0080 /* System reset via power-up		*/
+#define BBC_PSRC_FE0	0x0100 /* CPU 0 reported Fatal_Error		*/
+#define BBC_PSRC_FE1	0x0200 /* CPU 1 reported Fatal_Error		*/
+#define BBC_PSRC_FE2	0x0400 /* CPU 2 reported Fatal_Error		*/
+#define BBC_PSRC_FE3	0x0800 /* CPU 3 reported Fatal_Error		*/
+#define BBC_PSRC_FE4	0x1000 /* Schizo reported Fatal_Error		*/
+#define BBC_PSRC_FE5	0x2000 /* Safari device 5 reported Fatal_Error	*/
+#define BBC_PSRC_FE6	0x4000 /* CPMS reported Fatal_Error		*/
+#define BBC_PSRC_SYNTH	0x8000 /* System reset when on-board clock synthesizers
+				* were updated.
+				*/
+#define BBC_PSRC_WDT   0x10000 /* System reset via Super I/O watchdog	*/
+#define BBC_PSRC_RSC   0x20000 /* System reset via RSC remote monitoring
+				* device
+				*/
+
+/* XIR Source register.  The source of an XIR event sent to a processor may
+ * be determined via this register.
+ */
+#define BBC_XSRC_SXG0	0x01	/* CPU 0 received XIR via Soft_XIR_GEN reg */
+#define BBC_XSRC_SXG1	0x02	/* CPU 1 received XIR via Soft_XIR_GEN reg */
+#define BBC_XSRC_SXG2	0x04	/* CPU 2 received XIR via Soft_XIR_GEN reg */
+#define BBC_XSRC_SXG3	0x08	/* CPU 3 received XIR via Soft_XIR_GEN reg */
+#define BBC_XSRC_JTAG	0x10	/* All CPUs received XIR via JTAG+         */
+#define BBC_XSRC_W_OR_B	0x20	/* All CPUs received XIR either because:
+				 * a) Super I/O watchdog fired, or
+				 * b) XIR push button was activated
+				 */
+#define BBC_XSRC_RESV	0xc0	/* Reserved				   */
+
+/* Clock Synthesizers Control register.  This register provides the big-bang
+ * programming interface to the two clock synthesizers of the machine.
+ */
+#define BBC_CSC_SLOAD	0x01	/* Directly connected to S_LOAD pins	*/
+#define BBC_CSC_SDATA	0x02	/* Directly connected to S_DATA pins	*/
+#define BBC_CSC_SCLOCK	0x04	/* Directly connected to S_CLOCK pins	*/
+#define BBC_CSC_RESV	0x78	/* Reserved				*/
+#define BBC_CSC_RST	0x80	/* Generate system reset when S_LOAD==1	*/
+
+/* Energy Star Control register.  This register is used to generate the
+ * clock frequency change trigger to the main system devices (Schizo and
+ * the processors).  The transition occurs when bits in this register
+ * go from 0 to 1, only one bit must be set at once else no action
+ * occurs.  Basically the sequence of events is:
+ * a) Choose new frequency: full, 1/2 or 1/32
+ * b) Program this desired frequency into the cpus and Schizo.
+ * c) Set the same value in this register.
+ * d) 16 system clocks later, clear this register.
+ */
+#define BBC_ES_CTRL_1_1		0x01	/* Full frequency	*/
+#define BBC_ES_CTRL_1_2		0x02	/* 1/2 frequency	*/
+#define BBC_ES_CTRL_1_32	0x20	/* 1/32 frequency	*/
+#define BBC_ES_RESV		0xdc	/* Reserved		*/
+
+/* Energy Star Assert Change Time register.  This determines the number
+ * of BBC clock cycles (which is half the system frequency) between
+ * the detection of FREEZE_ACK being asserted and the assertion of
+ * the CLK_CHANGE_L[2:0] signals.
+ */
+#define BBC_ES_ACT_VAL	0xff
+
+/* Energy Star Assert Bypass Time register.  This determines the number
+ * of BBC clock cycles (which is half the system frequency) between
+ * the assertion of the CLK_CHANGE_L[2:0] signals and the assertion of
+ * the ESTAR_PLL_BYPASS signal.
+ */
+#define BBC_ES_ABT_VAL	0xffff
+
+/* Energy Star PLL Settle Time register.  This determines the number of
+ * BBC clock cycles (which is half the system frequency) between the
+ * de-assertion of CLK_CHANGE_L[2:0] and the de-assertion of the FREEZE_L
+ * signal.
+ */
+#define BBC_ES_PST_VAL	0xffffffff
+
+/* Energy Star Frequency Switch Latency register.  This is the number of
+ * BBC clocks between the de-assertion of CLK_CHANGE_L[2:0] and the first
+ * edge of the Safari clock at the new frequency.
+ */
+#define BBC_ES_FSL_VAL	0xffffffff
+
+/* Keyboard Beep control register.  This is a simple enabler for the audio
+ * beep sound.
+ */
+#define BBC_KBD_BEEP_ENABLE	0x01 /* Enable beep	*/
+#define BBC_KBD_BEEP_RESV	0xfe /* Reserved	*/
+
+/* Keyboard Beep Counter register.  There is a free-running counter inside
+ * the BBC which runs at half the system clock.  The bit set in this register
+ * determines when the audio sound is generated.  So for example if bit
+ * 10 is set, the audio beep will oscillate at 1/(2**12).  The keyboard beep
+ * generator automatically selects a different bit to use if the system clock
+ * is changed via Energy Star.
+ */
+#define BBC_KBD_BCNT_BITS	0x0007fc00
+#define BBC_KBC_BCNT_RESV	0xfff803ff
+
+#endif /* _SPARC64_BBC_H */
+
diff --git a/arch/sparc/include/asm/bitext.h b/arch/sparc/include/asm/bitext.h
new file mode 100644
index 0000000..2c2a2d8
--- /dev/null
+++ b/arch/sparc/include/asm/bitext.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * bitext.h: Bit string operations on the sparc, specific to architecture.
+ *
+ * Copyright 2002 Pete Zaitcev <zaitcev@yahoo.com>
+ */
+
+#ifndef _SPARC_BITEXT_H
+#define _SPARC_BITEXT_H
+
+#include <linux/spinlock.h>
+
+struct bit_map {
+	spinlock_t lock;
+	unsigned long *map;
+	int size;
+	int used;
+	int last_off;
+	int last_size;
+	int first_free;
+	int num_colors;
+};
+
+int bit_map_string_get(struct bit_map *t, int len, int align);
+void bit_map_clear(struct bit_map *t, int offset, int len);
+void bit_map_init(struct bit_map *t, unsigned long *map, int size);
+
+#endif /* defined(_SPARC_BITEXT_H) */
diff --git a/arch/sparc/include/asm/bitops.h b/arch/sparc/include/asm/bitops.h
new file mode 100644
index 0000000..4c431d2
--- /dev/null
+++ b/arch/sparc/include/asm/bitops.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_BITOPS_H
+#define ___ASM_SPARC_BITOPS_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/bitops_64.h>
+#else
+#include <asm/bitops_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h
new file mode 100644
index 0000000..0ceff3b
--- /dev/null
+++ b/arch/sparc/include/asm/bitops_32.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * bitops.h: Bit string operations on the Sparc.
+ *
+ * Copyright 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright 1996 Eddie C. Dost   (ecd@skynet.be)
+ * Copyright 2001 Anton Blanchard (anton@samba.org)
+ */
+
+#ifndef _SPARC_BITOPS_H
+#define _SPARC_BITOPS_H
+
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+
+#ifdef __KERNEL__
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+unsigned long ___set_bit(unsigned long *addr, unsigned long mask);
+unsigned long ___clear_bit(unsigned long *addr, unsigned long mask);
+unsigned long ___change_bit(unsigned long *addr, unsigned long mask);
+
+/*
+ * Set bit 'nr' in 32-bit quantity at address 'addr' where bit '0'
+ * is in the highest of the four bytes and bit '31' is the high bit
+ * within the first byte. Sparc is BIG-Endian. Unless noted otherwise
+ * all bit-ops return 0 if bit was previously clear and != 0 otherwise.
+ */
+static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *ADDR, mask;
+
+	ADDR = ((unsigned long *) addr) + (nr >> 5);
+	mask = 1 << (nr & 31);
+
+	return ___set_bit(ADDR, mask) != 0;
+}
+
+static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *ADDR, mask;
+
+	ADDR = ((unsigned long *) addr) + (nr >> 5);
+	mask = 1 << (nr & 31);
+
+	(void) ___set_bit(ADDR, mask);
+}
+
+static inline int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *ADDR, mask;
+
+	ADDR = ((unsigned long *) addr) + (nr >> 5);
+	mask = 1 << (nr & 31);
+
+	return ___clear_bit(ADDR, mask) != 0;
+}
+
+static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *ADDR, mask;
+
+	ADDR = ((unsigned long *) addr) + (nr >> 5);
+	mask = 1 << (nr & 31);
+
+	(void) ___clear_bit(ADDR, mask);
+}
+
+static inline int test_and_change_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *ADDR, mask;
+
+	ADDR = ((unsigned long *) addr) + (nr >> 5);
+	mask = 1 << (nr & 31);
+
+	return ___change_bit(ADDR, mask) != 0;
+}
+
+static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
+{
+	unsigned long *ADDR, mask;
+
+	ADDR = ((unsigned long *) addr) + (nr >> 5);
+	mask = 1 << (nr & 31);
+
+	(void) ___change_bit(ADDR, mask);
+}
+
+#include <asm-generic/bitops/non-atomic.h>
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* defined(_SPARC_BITOPS_H) */
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
new file mode 100644
index 0000000..ca7ea59
--- /dev/null
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * bitops.h: Bit string operations on the V9.
+ *
+ * Copyright 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC64_BITOPS_H
+#define _SPARC64_BITOPS_H
+
+#ifndef _LINUX_BITOPS_H
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+#include <asm/barrier.h>
+
+int test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
+int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
+int test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
+void set_bit(unsigned long nr, volatile unsigned long *addr);
+void clear_bit(unsigned long nr, volatile unsigned long *addr);
+void change_bit(unsigned long nr, volatile unsigned long *addr);
+
+int fls(unsigned int word);
+int __fls(unsigned long word);
+
+#include <asm-generic/bitops/non-atomic.h>
+
+#include <asm-generic/bitops/fls64.h>
+
+#ifdef __KERNEL__
+
+int ffs(int x);
+unsigned long __ffs(unsigned long);
+
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/sched.h>
+
+/*
+ * hweightN: returns the hamming weight (i.e. the number
+ * of bits set) of a N-bit word
+ */
+
+unsigned long __arch_hweight64(__u64 w);
+unsigned int __arch_hweight32(unsigned int w);
+unsigned int __arch_hweight16(unsigned int w);
+unsigned int __arch_hweight8(unsigned int w);
+
+#include <asm-generic/bitops/const_hweight.h>
+#include <asm-generic/bitops/lock.h>
+#endif /* __KERNEL__ */
+
+#include <asm-generic/bitops/find.h>
+
+#ifdef __KERNEL__
+
+#include <asm-generic/bitops/le.h>
+
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* defined(_SPARC64_BITOPS_H) */
diff --git a/arch/sparc/include/asm/btext.h b/arch/sparc/include/asm/btext.h
new file mode 100644
index 0000000..01cdc42
--- /dev/null
+++ b/arch/sparc/include/asm/btext.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_BTEXT_H
+#define _SPARC_BTEXT_H
+
+int btext_find_display(void);
+
+#endif /* _SPARC_BTEXT_H */
diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h
new file mode 100644
index 0000000..ea53e41
--- /dev/null
+++ b/arch/sparc/include/asm/bug.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_BUG_H
+#define _SPARC_BUG_H
+
+#ifdef CONFIG_BUG
+#include <linux/compiler.h>
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+void do_BUG(const char *file, int line);
+#define BUG() do {					\
+	do_BUG(__FILE__, __LINE__);			\
+	barrier_before_unreachable();			\
+	__builtin_trap();				\
+} while (0)
+#else
+#define BUG() do {					\
+	barrier_before_unreachable();			\
+	__builtin_trap();				\
+} while (0)
+#endif
+
+#define HAVE_ARCH_BUG
+#endif
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+void __noreturn die_if_kernel(char *str, struct pt_regs *regs);
+
+#endif
diff --git a/arch/sparc/include/asm/bugs.h b/arch/sparc/include/asm/bugs.h
new file mode 100644
index 0000000..02fa369
--- /dev/null
+++ b/arch/sparc/include/asm/bugs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* include/asm/bugs.h:  Sparc probes for various bugs.
+ *
+ * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef CONFIG_SPARC32
+#include <asm/cpudata.h>
+#endif
+
+extern unsigned long loops_per_jiffy;
+
+static void __init check_bugs(void)
+{
+#if defined(CONFIG_SPARC32) && !defined(CONFIG_SMP)
+	cpu_data(0).udelay_val = loops_per_jiffy;
+#endif
+}
diff --git a/arch/sparc/include/asm/cache.h b/arch/sparc/include/asm/cache.h
new file mode 100644
index 0000000..dcfd581
--- /dev/null
+++ b/arch/sparc/include/asm/cache.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* cache.h:  Cache specific code for the Sparc.  These include flushing
+ *           and direct tag/data line access.
+ *
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _SPARC_CACHE_H
+#define _SPARC_CACHE_H
+
+#define ARCH_SLAB_MINALIGN	__alignof__(unsigned long long)
+
+#define L1_CACHE_SHIFT 5
+#define L1_CACHE_BYTES 32
+
+#ifdef CONFIG_SPARC32
+#define SMP_CACHE_BYTES_SHIFT 5
+#else
+#define SMP_CACHE_BYTES_SHIFT 6
+#endif
+
+#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT)
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif /* !(_SPARC_CACHE_H) */
diff --git a/arch/sparc/include/asm/cacheflush.h b/arch/sparc/include/asm/cacheflush.h
new file mode 100644
index 0000000..881ac76
--- /dev/null
+++ b/arch/sparc/include/asm/cacheflush.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_CACHEFLUSH_H
+#define ___ASM_SPARC_CACHEFLUSH_H
+
+/* flush addr - to allow use of self-modifying code */
+#define flushi(addr)	__asm__ __volatile__ ("flush %0" : : "r" (addr) : "memory")
+
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/cacheflush_64.h>
+#else
+#include <asm/cacheflush_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h
new file mode 100644
index 0000000..fb66094
--- /dev/null
+++ b/arch/sparc/include/asm/cacheflush_32.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_CACHEFLUSH_H
+#define _SPARC_CACHEFLUSH_H
+
+#include <asm/cachetlb_32.h>
+
+#define flush_cache_all() \
+	sparc32_cachetlb_ops->cache_all()
+#define flush_cache_mm(mm) \
+	sparc32_cachetlb_ops->cache_mm(mm)
+#define flush_cache_dup_mm(mm) \
+	sparc32_cachetlb_ops->cache_mm(mm)
+#define flush_cache_range(vma,start,end) \
+	sparc32_cachetlb_ops->cache_range(vma, start, end)
+#define flush_cache_page(vma,addr,pfn) \
+	sparc32_cachetlb_ops->cache_page(vma, addr)
+#define flush_icache_range(start, end)		do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
+	do {							\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));\
+		memcpy(dst, src, len);				\
+	} while (0)
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	do {							\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));\
+		memcpy(dst, src, len);				\
+	} while (0)
+
+#define __flush_page_to_ram(addr) \
+	sparc32_cachetlb_ops->page_to_ram(addr)
+#define flush_sig_insns(mm,insn_addr) \
+	sparc32_cachetlb_ops->sig_insns(mm, insn_addr)
+#define flush_page_for_dma(addr) \
+	sparc32_cachetlb_ops->page_for_dma(addr)
+
+void sparc_flush_page_to_ram(struct page *page);
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+#define flush_dcache_page(page)			sparc_flush_page_to_ram(page)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+#define flush_cache_vmap(start, end)		flush_cache_all()
+#define flush_cache_vunmap(start, end)		flush_cache_all()
+
+/* When a context switch happens we must flush all user windows so that
+ * the windows of the current process are flushed onto its stack. This
+ * way the windows are all clean for the next process and the stack
+ * frames are up to date.
+ */
+void flush_user_windows(void);
+void kill_user_windows(void);
+void flushw_all(void);
+
+#endif /* _SPARC_CACHEFLUSH_H */
diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h
new file mode 100644
index 0000000..e751743
--- /dev/null
+++ b/arch/sparc/include/asm/cacheflush_64.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_CACHEFLUSH_H
+#define _SPARC64_CACHEFLUSH_H
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/mm.h>
+
+/* Cache flush operations. */
+#define flushw_all()	__asm__ __volatile__("flushw")
+
+void __flushw_user(void);
+#define flushw_user() __flushw_user()
+
+#define flush_user_windows flushw_user
+#define flush_register_windows flushw_all
+
+/* These are the same regardless of whether this is an SMP kernel or not. */
+#define flush_cache_mm(__mm) \
+	do { if ((__mm) == current->mm) flushw_user(); } while(0)
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+#define flush_cache_range(vma, start, end) \
+	flush_cache_mm((vma)->vm_mm)
+#define flush_cache_page(vma, page, pfn) \
+	flush_cache_mm((vma)->vm_mm)
+
+/*
+ * On spitfire, the icache doesn't snoop local stores and we don't
+ * use block commit stores (which invalidate icache lines) during
+ * module load, so we need this.
+ */
+void flush_icache_range(unsigned long start, unsigned long end);
+void __flush_icache_page(unsigned long);
+
+void __flush_dcache_page(void *addr, int flush_icache);
+void flush_dcache_page_impl(struct page *page);
+#ifdef CONFIG_SMP
+void smp_flush_dcache_page_impl(struct page *page, int cpu);
+void flush_dcache_page_all(struct mm_struct *mm, struct page *page);
+#else
+#define smp_flush_dcache_page_impl(page,cpu) flush_dcache_page_impl(page)
+#define flush_dcache_page_all(mm,page) flush_dcache_page_impl(page)
+#endif
+
+void __flush_dcache_range(unsigned long start, unsigned long end);
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+void flush_dcache_page(struct page *page);
+
+#define flush_icache_page(vma, pg)	do { } while(0)
+#define flush_icache_user_range(vma,pg,adr,len)	do { } while (0)
+
+void flush_ptrace_access(struct vm_area_struct *, struct page *,
+			 unsigned long uaddr, void *kaddr,
+			 unsigned long len, int write);
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len)		\
+	do {								\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));	\
+		memcpy(dst, src, len);					\
+		flush_ptrace_access(vma, page, vaddr, src, len, 0);	\
+	} while (0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) 		\
+	do {								\
+		flush_cache_page(vma, vaddr, page_to_pfn(page));	\
+		memcpy(dst, src, len);					\
+		flush_ptrace_access(vma, page, vaddr, dst, len, 1);	\
+	} while (0)
+
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _SPARC64_CACHEFLUSH_H */
diff --git a/arch/sparc/include/asm/cachetlb_32.h b/arch/sparc/include/asm/cachetlb_32.h
new file mode 100644
index 0000000..534da70
--- /dev/null
+++ b/arch/sparc/include/asm/cachetlb_32.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_CACHETLB_H
+#define _SPARC_CACHETLB_H
+
+struct mm_struct;
+struct vm_area_struct;
+
+struct sparc32_cachetlb_ops {
+	void (*cache_all)(void);
+	void (*cache_mm)(struct mm_struct *);
+	void (*cache_range)(struct vm_area_struct *, unsigned long,
+			    unsigned long);
+	void (*cache_page)(struct vm_area_struct *, unsigned long);
+
+	void (*tlb_all)(void);
+	void (*tlb_mm)(struct mm_struct *);
+	void (*tlb_range)(struct vm_area_struct *, unsigned long,
+			  unsigned long);
+	void (*tlb_page)(struct vm_area_struct *, unsigned long);
+
+	void (*page_to_ram)(unsigned long);
+	void (*sig_insns)(struct mm_struct *, unsigned long);
+	void (*page_for_dma)(unsigned long);
+};
+extern const struct sparc32_cachetlb_ops *sparc32_cachetlb_ops;
+#ifdef CONFIG_SMP
+extern const struct sparc32_cachetlb_ops *local_ops;
+#endif
+
+#endif /* SPARC_CACHETLB_H */
diff --git a/arch/sparc/include/asm/chafsr.h b/arch/sparc/include/asm/chafsr.h
new file mode 100644
index 0000000..01540ec
--- /dev/null
+++ b/arch/sparc/include/asm/chafsr.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_CHAFSR_H
+#define _SPARC64_CHAFSR_H
+
+/* Cheetah Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */
+
+/* Comments indicate which processor variants on which the bit definition
+ * is valid.  Codes are:
+ * ch	-->	cheetah
+ * ch+	-->	cheetah plus
+ * jp	-->	jalapeno
+ */
+
+/* All bits of this register except M_SYNDROME and E_SYNDROME are
+ * read, write 1 to clear.  M_SYNDROME and E_SYNDROME are read-only.
+ */
+
+/* Software bit set by linux trap handlers to indicate that the trap was
+ * signalled at %tl >= 1.
+ */
+#define CHAFSR_TL1		(1UL << 63UL) /* n/a */
+
+/* Unmapped error from system bus for prefetch queue or
+ * store queue read operation
+ */
+#define CHPAFSR_DTO		(1UL << 59UL) /* ch+ */
+
+/* Bus error from system bus for prefetch queue or store queue
+ * read operation
+ */
+#define CHPAFSR_DBERR		(1UL << 58UL) /* ch+ */
+
+/* Hardware corrected E-cache Tag ECC error */
+#define CHPAFSR_THCE		(1UL << 57UL) /* ch+ */
+/* System interface protocol error, hw timeout caused */
+#define JPAFSR_JETO		(1UL << 57UL) /* jp */
+
+/* SW handled correctable E-cache Tag ECC error */
+#define CHPAFSR_TSCE		(1UL << 56UL) /* ch+ */
+/* Parity error on system snoop results */
+#define JPAFSR_SCE		(1UL << 56UL) /* jp */
+
+/* Uncorrectable E-cache Tag ECC error */
+#define CHPAFSR_TUE		(1UL << 55UL) /* ch+ */
+/* System interface protocol error, illegal command detected */
+#define JPAFSR_JEIC		(1UL << 55UL) /* jp */
+
+/* Uncorrectable system bus data ECC error due to prefetch
+ * or store fill request
+ */
+#define CHPAFSR_DUE		(1UL << 54UL) /* ch+ */
+/* System interface protocol error, illegal ADTYPE detected */
+#define JPAFSR_JEIT		(1UL << 54UL) /* jp */
+
+/* Multiple errors of the same type have occurred.  This bit is set when
+ * an uncorrectable error or a SW correctable error occurs and the status
+ * bit to report that error is already set.  When multiple errors of
+ * different types are indicated by setting multiple status bits.
+ *
+ * This bit is not set if multiple HW corrected errors with the same
+ * status bit occur, only uncorrectable and SW correctable ones have
+ * this behavior.
+ *
+ * This bit is not set when multiple ECC errors happen within a single
+ * 64-byte system bus transaction.  Only the first ECC error in a 16-byte
+ * subunit will be logged.  All errors in subsequent 16-byte subunits
+ * from the same 64-byte transaction are ignored.
+ */
+#define CHAFSR_ME		(1UL << 53UL) /* ch,ch+,jp */
+
+/* Privileged state error has occurred.  This is a capture of PSTATE.PRIV
+ * at the time the error is detected.
+ */
+#define CHAFSR_PRIV		(1UL << 52UL) /* ch,ch+,jp */
+
+/* The following bits 51 (CHAFSR_PERR) to 33 (CHAFSR_CE) are sticky error
+ * bits and record the most recently detected errors.  Bits accumulate
+ * errors that have been detected since the last write to clear the bit.
+ */
+
+/* System interface protocol error.  The processor asserts its' ERROR
+ * pin when this event occurs and it also logs a specific cause code
+ * into a JTAG scannable flop.
+ */
+#define CHAFSR_PERR		(1UL << 51UL) /* ch,ch+,jp */
+
+/* Internal processor error.  The processor asserts its' ERROR
+ * pin when this event occurs and it also logs a specific cause code
+ * into a JTAG scannable flop.
+ */
+#define CHAFSR_IERR		(1UL << 50UL) /* ch,ch+,jp */
+
+/* System request parity error on incoming address */
+#define CHAFSR_ISAP		(1UL << 49UL) /* ch,ch+,jp */
+
+/* HW Corrected system bus MTAG ECC error */
+#define CHAFSR_EMC		(1UL << 48UL) /* ch,ch+ */
+/* Parity error on L2 cache tag SRAM */
+#define JPAFSR_ETP		(1UL << 48UL) /* jp */
+
+/* Uncorrectable system bus MTAG ECC error */
+#define CHAFSR_EMU		(1UL << 47UL) /* ch,ch+ */
+/* Out of range memory error has occurred */
+#define JPAFSR_OM		(1UL << 47UL) /* jp */
+
+/* HW Corrected system bus data ECC error for read of interrupt vector */
+#define CHAFSR_IVC		(1UL << 46UL) /* ch,ch+ */
+/* Error due to unsupported store */
+#define JPAFSR_UMS		(1UL << 46UL) /* jp */
+
+/* Uncorrectable system bus data ECC error for read of interrupt vector */
+#define CHAFSR_IVU		(1UL << 45UL) /* ch,ch+,jp */
+
+/* Unmapped error from system bus */
+#define CHAFSR_TO		(1UL << 44UL) /* ch,ch+,jp */
+
+/* Bus error response from system bus */
+#define CHAFSR_BERR		(1UL << 43UL) /* ch,ch+,jp */
+
+/* SW Correctable E-cache ECC error for instruction fetch or data access
+ * other than block load.
+ */
+#define CHAFSR_UCC		(1UL << 42UL) /* ch,ch+,jp */
+
+/* Uncorrectable E-cache ECC error for instruction fetch or data access
+ * other than block load.
+ */
+#define CHAFSR_UCU		(1UL << 41UL) /* ch,ch+,jp */
+
+/* Copyout HW Corrected ECC error */
+#define CHAFSR_CPC		(1UL << 40UL) /* ch,ch+,jp */
+
+/* Copyout Uncorrectable ECC error */
+#define CHAFSR_CPU		(1UL << 39UL) /* ch,ch+,jp */
+
+/* HW Corrected ECC error from E-cache for writeback */
+#define CHAFSR_WDC		(1UL << 38UL) /* ch,ch+,jp */
+
+/* Uncorrectable ECC error from E-cache for writeback */
+#define CHAFSR_WDU		(1UL << 37UL) /* ch,ch+,jp */
+
+/* HW Corrected ECC error from E-cache for store merge or block load */
+#define CHAFSR_EDC		(1UL << 36UL) /* ch,ch+,jp */
+
+/* Uncorrectable ECC error from E-cache for store merge or block load */
+#define CHAFSR_EDU		(1UL << 35UL) /* ch,ch+,jp */
+
+/* Uncorrectable system bus data ECC error for read of memory or I/O */
+#define CHAFSR_UE		(1UL << 34UL) /* ch,ch+,jp */
+
+/* HW Corrected system bus data ECC error for read of memory or I/O */
+#define CHAFSR_CE		(1UL << 33UL) /* ch,ch+,jp */
+
+/* Uncorrectable ECC error from remote cache/memory */
+#define JPAFSR_RUE		(1UL << 32UL) /* jp */
+
+/* Correctable ECC error from remote cache/memory */
+#define JPAFSR_RCE		(1UL << 31UL) /* jp */
+
+/* JBUS parity error on returned read data */
+#define JPAFSR_BP		(1UL << 30UL) /* jp */
+
+/* JBUS parity error on data for writeback or block store */
+#define JPAFSR_WBP		(1UL << 29UL) /* jp */
+
+/* Foreign read to DRAM incurring correctable ECC error */
+#define JPAFSR_FRC		(1UL << 28UL) /* jp */
+
+/* Foreign read to DRAM incurring uncorrectable ECC error */
+#define JPAFSR_FRU		(1UL << 27UL) /* jp */
+
+#define CHAFSR_ERRORS		(CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP | CHAFSR_EMC | \
+				 CHAFSR_EMU | CHAFSR_IVC | CHAFSR_IVU | CHAFSR_TO | \
+				 CHAFSR_BERR | CHAFSR_UCC | CHAFSR_UCU | CHAFSR_CPC | \
+				 CHAFSR_CPU | CHAFSR_WDC | CHAFSR_WDU | CHAFSR_EDC | \
+				 CHAFSR_EDU | CHAFSR_UE | CHAFSR_CE)
+#define CHPAFSR_ERRORS		(CHPAFSR_DTO | CHPAFSR_DBERR | CHPAFSR_THCE | \
+				 CHPAFSR_TSCE | CHPAFSR_TUE | CHPAFSR_DUE | \
+				 CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP | CHAFSR_EMC | \
+				 CHAFSR_EMU | CHAFSR_IVC | CHAFSR_IVU | CHAFSR_TO | \
+				 CHAFSR_BERR | CHAFSR_UCC | CHAFSR_UCU | CHAFSR_CPC | \
+				 CHAFSR_CPU | CHAFSR_WDC | CHAFSR_WDU | CHAFSR_EDC | \
+				 CHAFSR_EDU | CHAFSR_UE | CHAFSR_CE)
+#define JPAFSR_ERRORS		(JPAFSR_JETO | JPAFSR_SCE | JPAFSR_JEIC | \
+				 JPAFSR_JEIT | CHAFSR_PERR | CHAFSR_IERR | \
+				 CHAFSR_ISAP | JPAFSR_ETP | JPAFSR_OM | \
+				 JPAFSR_UMS | CHAFSR_IVU | CHAFSR_TO | \
+				 CHAFSR_BERR | CHAFSR_UCC | CHAFSR_UCU | \
+				 CHAFSR_CPC | CHAFSR_CPU | CHAFSR_WDC | \
+				 CHAFSR_WDU | CHAFSR_EDC | CHAFSR_EDU | \
+				 CHAFSR_UE | CHAFSR_CE | JPAFSR_RUE | \
+				 JPAFSR_RCE | JPAFSR_BP | JPAFSR_WBP | \
+				 JPAFSR_FRC | JPAFSR_FRU)
+
+/* Active JBUS request signal when error occurred */
+#define JPAFSR_JBREQ		(0x7UL << 24UL) /* jp */
+#define JPAFSR_JBREQ_SHIFT	24UL
+
+/* L2 cache way information */
+#define JPAFSR_ETW		(0x3UL << 22UL) /* jp */
+#define JPAFSR_ETW_SHIFT	22UL
+
+/* System bus MTAG ECC syndrome.  This field captures the status of the
+ * first occurrence of the highest-priority error according to the M_SYND
+ * overwrite policy.  After the AFSR sticky bit, corresponding to the error
+ * for which the M_SYND is reported, is cleared, the contents of the M_SYND
+ * field will be unchanged by will be unfrozen for further error capture.
+ */
+#define CHAFSR_M_SYNDROME	(0xfUL << 16UL) /* ch,ch+,jp */
+#define CHAFSR_M_SYNDROME_SHIFT	16UL
+
+/* Agenid Id of the foreign device causing the UE/CE errors */
+#define JPAFSR_AID		(0x1fUL << 9UL) /* jp */
+#define JPAFSR_AID_SHIFT	9UL
+
+/* System bus or E-cache data ECC syndrome.  This field captures the status
+ * of the first occurrence of the highest-priority error according to the
+ * E_SYND overwrite policy.  After the AFSR sticky bit, corresponding to the
+ * error for which the E_SYND is reported, is cleare, the contents of the E_SYND
+ * field will be unchanged but will be unfrozen for further error capture.
+ */
+#define CHAFSR_E_SYNDROME	(0x1ffUL << 0UL) /* ch,ch+,jp */
+#define CHAFSR_E_SYNDROME_SHIFT	0UL
+
+/* The AFSR must be explicitly cleared by software, it is not cleared automatically
+ * by a read.  Writes to bits <51:33> with bits set will clear the corresponding
+ * bits in the AFSR.  Bits associated with disrupting traps must be cleared before
+ * interrupts are re-enabled to prevent multiple traps for the same error.  I.e.
+ * PSTATE.IE and AFSR bits control delivery of disrupting traps.
+ *
+ * Since there is only one AFAR, when multiple events have been logged by the
+ * bits in the AFSR, at most one of these events will have its status captured
+ * in the AFAR.  The highest priority of those event bits will get AFAR logging.
+ * The AFAR will be unlocked and available to capture the address of another event
+ * as soon as the one bit in AFSR that corresponds to the event logged in AFAR is
+ * cleared.  For example, if AFSR.CE is detected, then AFSR.UE (which overwrites
+ * the AFAR), and AFSR.UE is cleared by not AFSR.CE, then the AFAR will be unlocked
+ * and ready for another event, even though AFSR.CE is still set.  The same rules
+ * also apply to the M_SYNDROME and E_SYNDROME fields of the AFSR.
+ */
+
+#endif /* _SPARC64_CHAFSR_H */
diff --git a/arch/sparc/include/asm/checksum.h b/arch/sparc/include/asm/checksum.h
new file mode 100644
index 0000000..c3be56e
--- /dev/null
+++ b/arch/sparc/include/asm/checksum.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_CHECKSUM_H
+#define ___ASM_SPARC_CHECKSUM_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/checksum_64.h>
+#else
+#include <asm/checksum_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
new file mode 100644
index 0000000..d1e53d7
--- /dev/null
+++ b/arch/sparc/include/asm/checksum_32.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_CHECKSUM_H
+#define __SPARC_CHECKSUM_H
+
+/*  checksum.h:  IP/UDP/TCP checksum routines on the Sparc.
+ *
+ *  Copyright(C) 1995 Linus Torvalds
+ *  Copyright(C) 1995 Miguel de Icaza
+ *  Copyright(C) 1996 David S. Miller
+ *  Copyright(C) 1996 Eddie C. Dost
+ *  Copyright(C) 1997 Jakub Jelinek
+ *
+ * derived from:
+ *	Alpha checksum c-code
+ *      ix86 inline assembly
+ *      RFC1071 Computing the Internet Checksum
+ */
+
+#include <linux/in6.h>
+#include <linux/uaccess.h>
+
+/* computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/* the same as csum_partial, but copies from fs:src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+
+unsigned int __csum_partial_copy_sparc_generic (const unsigned char *, unsigned char *);
+
+static inline __wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+{
+	register unsigned int ret asm("o0") = (unsigned int)src;
+	register char *d asm("o1") = dst;
+	register int l asm("g1") = len;
+
+	__asm__ __volatile__ (
+		"call __csum_partial_copy_sparc_generic\n\t"
+		" mov %6, %%g7\n"
+	: "=&r" (ret), "=&r" (d), "=&r" (l)
+	: "0" (ret), "1" (d), "2" (l), "r" (sum)
+	: "o2", "o3", "o4", "o5", "o7",
+	  "g2", "g3", "g4", "g5", "g7",
+	  "memory", "cc");
+	return (__force __wsum)ret;
+}
+
+static inline __wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+			    __wsum sum, int *err)
+  {
+	register unsigned long ret asm("o0") = (unsigned long)src;
+	register char *d asm("o1") = dst;
+	register int l asm("g1") = len;
+	register __wsum s asm("g7") = sum;
+
+	__asm__ __volatile__ (
+	".section __ex_table,#alloc\n\t"
+	".align 4\n\t"
+	".word 1f,2\n\t"
+	".previous\n"
+	"1:\n\t"
+	"call __csum_partial_copy_sparc_generic\n\t"
+	" st %8, [%%sp + 64]\n"
+	: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
+	: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
+	: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5",
+	  "cc", "memory");
+	return (__force __wsum)ret;
+}
+
+static inline __wsum
+csum_partial_copy_to_user(const void *src, void __user *dst, int len,
+			  __wsum sum, int *err)
+{
+	if (!access_ok (VERIFY_WRITE, dst, len)) {
+		*err = -EFAULT;
+		return sum;
+	} else {
+		register unsigned long ret asm("o0") = (unsigned long)src;
+		register char __user *d asm("o1") = dst;
+		register int l asm("g1") = len;
+		register __wsum s asm("g7") = sum;
+
+		__asm__ __volatile__ (
+		".section __ex_table,#alloc\n\t"
+		".align 4\n\t"
+		".word 1f,1\n\t"
+		".previous\n"
+		"1:\n\t"
+		"call __csum_partial_copy_sparc_generic\n\t"
+		" st %8, [%%sp + 64]\n"
+		: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
+		: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
+		: "o2", "o3", "o4", "o5", "o7",
+		  "g2", "g3", "g4", "g5",
+		  "cc", "memory");
+		return (__force __wsum)ret;
+	}
+}
+
+#define HAVE_CSUM_COPY_USER
+#define csum_and_copy_to_user csum_partial_copy_to_user
+
+/* ihl is always 5 or greater, almost always is 5, and iph is word aligned
+ * the majority of the time.
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	__sum16 sum;
+
+	/* Note: We must read %2 before we touch %0 for the first time,
+	 *       because GCC can legitimately use the same register for
+	 *       both operands.
+	 */
+	__asm__ __volatile__("sub\t%2, 4, %%g4\n\t"
+			     "ld\t[%1 + 0x00], %0\n\t"
+			     "ld\t[%1 + 0x04], %%g2\n\t"
+			     "ld\t[%1 + 0x08], %%g3\n\t"
+			     "addcc\t%%g2, %0, %0\n\t"
+			     "addxcc\t%%g3, %0, %0\n\t"
+			     "ld\t[%1 + 0x0c], %%g2\n\t"
+			     "ld\t[%1 + 0x10], %%g3\n\t"
+			     "addxcc\t%%g2, %0, %0\n\t"
+			     "addx\t%0, %%g0, %0\n"
+			     "1:\taddcc\t%%g3, %0, %0\n\t"
+			     "add\t%1, 4, %1\n\t"
+			     "addxcc\t%0, %%g0, %0\n\t"
+			     "subcc\t%%g4, 1, %%g4\n\t"
+			     "be,a\t2f\n\t"
+			     "sll\t%0, 16, %%g2\n\t"
+			     "b\t1b\n\t"
+			     "ld\t[%1 + 0x10], %%g3\n"
+			     "2:\taddcc\t%0, %%g2, %%g2\n\t"
+			     "srl\t%%g2, 16, %0\n\t"
+			     "addx\t%0, %%g0, %0\n\t"
+			     "xnor\t%%g0, %0, %0"
+			     : "=r" (sum), "=&r" (iph)
+			     : "r" (ihl), "1" (iph)
+			     : "g2", "g3", "g4", "cc", "memory");
+	return sum;
+}
+
+/* Fold a partial checksum without adding pseudo headers. */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	unsigned int tmp;
+
+	__asm__ __volatile__("addcc\t%0, %1, %1\n\t"
+			     "srl\t%1, 16, %1\n\t"
+			     "addx\t%1, %%g0, %1\n\t"
+			     "xnor\t%%g0, %1, %0"
+			     : "=&r" (sum), "=r" (tmp)
+			     : "0" (sum), "1" ((__force u32)sum<<16)
+			     : "cc");
+	return (__force __sum16)sum;
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+					__u32 len, __u8 proto,
+					__wsum sum)
+{
+	__asm__ __volatile__("addcc\t%1, %0, %0\n\t"
+			     "addxcc\t%2, %0, %0\n\t"
+			     "addxcc\t%3, %0, %0\n\t"
+			     "addx\t%0, %%g0, %0\n\t"
+			     : "=r" (sum), "=r" (saddr)
+			     : "r" (daddr), "r" (proto + len), "0" (sum),
+			       "1" (saddr)
+			     : "cc");
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+					__u32 len, __u8 proto,
+					__wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+				      const struct in6_addr *daddr,
+				      __u32 len, __u8 proto, __wsum sum)
+{
+	__asm__ __volatile__ (
+		"addcc	%3, %4, %%g4\n\t"
+		"addxcc	%5, %%g4, %%g4\n\t"
+		"ld	[%2 + 0x0c], %%g2\n\t"
+		"ld	[%2 + 0x08], %%g3\n\t"
+		"addxcc	%%g2, %%g4, %%g4\n\t"
+		"ld	[%2 + 0x04], %%g2\n\t"
+		"addxcc	%%g3, %%g4, %%g4\n\t"
+		"ld	[%2 + 0x00], %%g3\n\t"
+		"addxcc	%%g2, %%g4, %%g4\n\t"
+		"ld	[%1 + 0x0c], %%g2\n\t"
+		"addxcc	%%g3, %%g4, %%g4\n\t"
+		"ld	[%1 + 0x08], %%g3\n\t"
+		"addxcc	%%g2, %%g4, %%g4\n\t"
+		"ld	[%1 + 0x04], %%g2\n\t"
+		"addxcc	%%g3, %%g4, %%g4\n\t"
+		"ld	[%1 + 0x00], %%g3\n\t"
+		"addxcc	%%g2, %%g4, %%g4\n\t"
+		"addxcc	%%g3, %%g4, %0\n\t"
+		"addx	0, %0, %0\n"
+		: "=&r" (sum)
+		: "r" (saddr), "r" (daddr),
+		  "r"(htonl(len)), "r"(htonl(proto)), "r"(sum)
+		: "g2", "g3", "g4", "cc");
+
+	return csum_fold(sum);
+}
+
+/* this routine is used for miscellaneous IP-like checksums, mainly in icmp.c */
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+	return csum_fold(csum_partial(buff, len, 0));
+}
+
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+	__asm__ __volatile__(
+		"addcc   %0, %1, %0\n"
+		"addx    %0, %%g0, %0"
+		: "=r" (csum)
+		: "r" (addend), "0" (csum));
+
+	return csum;
+}
+
+#endif /* !(__SPARC_CHECKSUM_H) */
diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h
new file mode 100644
index 0000000..e524509
--- /dev/null
+++ b/arch/sparc/include/asm/checksum_64.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC64_CHECKSUM_H
+#define __SPARC64_CHECKSUM_H
+
+/*  checksum.h:  IP/UDP/TCP checksum routines on the V9.
+ *
+ *  Copyright(C) 1995 Linus Torvalds
+ *  Copyright(C) 1995 Miguel de Icaza
+ *  Copyright(C) 1996 David S. Miller
+ *  Copyright(C) 1996 Eddie C. Dost
+ *  Copyright(C) 1997 Jakub Jelinek
+ *
+ * derived from:
+ *	Alpha checksum c-code
+ *      ix86 inline assembly
+ *      RFC1071 Computing the Internet Checksum
+ */
+
+#include <linux/in6.h>
+#include <linux/uaccess.h>
+
+/* computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void * buff, int len, __wsum sum);
+
+/* the same as csum_partial, but copies from user space while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+				 int len, __wsum sum);
+
+long __csum_partial_copy_from_user(const void __user *src,
+				   void *dst, int len,
+				   __wsum sum);
+
+static inline __wsum
+csum_partial_copy_from_user(const void __user *src,
+			    void *dst, int len,
+			    __wsum sum, int *err)
+{
+	long ret = __csum_partial_copy_from_user(src, dst, len, sum);
+	if (ret < 0)
+		*err = -EFAULT;
+	return (__force __wsum) ret;
+}
+
+/*
+ *	Copy and checksum to user
+ */
+#define HAVE_CSUM_COPY_USER
+long __csum_partial_copy_to_user(const void *src,
+				 void __user *dst, int len,
+				 __wsum sum);
+
+static inline __wsum
+csum_and_copy_to_user(const void *src,
+		      void __user *dst, int len,
+		      __wsum sum, int *err)
+{
+	long ret = __csum_partial_copy_to_user(src, dst, len, sum);
+	if (ret < 0)
+		*err = -EFAULT;
+	return (__force __wsum) ret;
+}
+
+/* ihl is always 5 or greater, almost always is 5, and iph is word aligned
+ * the majority of the time.
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/* Fold a partial checksum without adding pseudo headers. */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	unsigned int tmp;
+
+	__asm__ __volatile__(
+"	addcc		%0, %1, %1\n"
+"	srl		%1, 16, %1\n"
+"	addc		%1, %%g0, %1\n"
+"	xnor		%%g0, %1, %0\n"
+	: "=&r" (sum), "=r" (tmp)
+	: "0" (sum), "1" ((__force u32)sum<<16)
+	: "cc");
+	return (__force __sum16)sum;
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+					__u32 len, __u8 proto,
+					__wsum sum)
+{
+	__asm__ __volatile__(
+"	addcc		%1, %0, %0\n"
+"	addccc		%2, %0, %0\n"
+"	addccc		%3, %0, %0\n"
+"	addc		%0, %%g0, %0\n"
+	: "=r" (sum), "=r" (saddr)
+	: "r" (daddr), "r" (proto + len), "0" (sum), "1" (saddr)
+	: "cc");
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+					__u32 len, __u8 proto,
+					__wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+				      const struct in6_addr *daddr,
+				      __u32 len, __u8 proto, __wsum sum)
+{
+	__asm__ __volatile__ (
+"	addcc		%3, %4, %%g7\n"
+"	addccc		%5, %%g7, %%g7\n"
+"	lduw		[%2 + 0x0c], %%g2\n"
+"	lduw		[%2 + 0x08], %%g3\n"
+"	addccc		%%g2, %%g7, %%g7\n"
+"	lduw		[%2 + 0x04], %%g2\n"
+"	addccc		%%g3, %%g7, %%g7\n"
+"	lduw		[%2 + 0x00], %%g3\n"
+"	addccc		%%g2, %%g7, %%g7\n"
+"	lduw		[%1 + 0x0c], %%g2\n"
+"	addccc		%%g3, %%g7, %%g7\n"
+"	lduw		[%1 + 0x08], %%g3\n"
+"	addccc		%%g2, %%g7, %%g7\n"
+"	lduw		[%1 + 0x04], %%g2\n"
+"	addccc		%%g3, %%g7, %%g7\n"
+"	lduw		[%1 + 0x00], %%g3\n"
+"	addccc		%%g2, %%g7, %%g7\n"
+"	addccc		%%g3, %%g7, %0\n"
+"	addc		0, %0, %0\n"
+	: "=&r" (sum)
+	: "r" (saddr), "r" (daddr), "r"(htonl(len)),
+	  "r"(htonl(proto)), "r"(sum)
+	: "g2", "g3", "g7", "cc");
+
+	return csum_fold(sum);
+}
+
+/* this routine is used for miscellaneous IP-like checksums, mainly in icmp.c */
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+	return csum_fold(csum_partial(buff, len, 0));
+}
+
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+	__asm__ __volatile__(
+		"addcc   %0, %1, %0\n"
+		"addx    %0, %%g0, %0"
+		: "=r" (csum)
+		: "r" (addend), "0" (csum));
+
+	return csum;
+}
+
+#endif /* !(__SPARC64_CHECKSUM_H) */
diff --git a/arch/sparc/include/asm/chmctrl.h b/arch/sparc/include/asm/chmctrl.h
new file mode 100644
index 0000000..96f043c
--- /dev/null
+++ b/arch/sparc/include/asm/chmctrl.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_CHMCTRL_H
+#define _SPARC64_CHMCTRL_H
+
+/* Cheetah memory controller programmable registers. */
+#define CHMCTRL_TCTRL1		0x00 /* Memory Timing Control I		*/
+#define CHMCTRL_TCTRL2		0x08 /* Memory Timing Control II	*/
+#define CHMCTRL_TCTRL3		0x38 /* Memory Timing Control III	*/
+#define CHMCTRL_TCTRL4		0x40 /* Memory Timing Control IV	*/
+#define CHMCTRL_DECODE1		0x10 /* Memory Address Decode I		*/
+#define CHMCTRL_DECODE2		0x18 /* Memory Address Decode II	*/
+#define CHMCTRL_DECODE3		0x20 /* Memory Address Decode III	*/
+#define CHMCTRL_DECODE4		0x28 /* Memory Address Decode IV	*/
+#define CHMCTRL_MACTRL		0x30 /* Memory Address Control		*/
+
+/* Memory Timing Control I */
+#define TCTRL1_SDRAMCTL_DLY	0xf000000000000000UL
+#define TCTRL1_SDRAMCTL_DLY_SHIFT     60
+#define TCTRL1_SDRAMCLK_DLY	0x0e00000000000000UL
+#define TCTRL1_SDRAMCLK_DLY_SHIFT     57
+#define TCTRL1_R		0x0100000000000000UL
+#define TCTRL1_R_SHIFT 		      56
+#define TCTRL1_AUTORFR_CYCLE	0x00fe000000000000UL
+#define TCTRL1_AUTORFR_CYCLE_SHIFT    49
+#define TCTRL1_RD_WAIT		0x0001f00000000000UL
+#define TCTRL1_RD_WAIT_SHIFT	      44
+#define TCTRL1_PC_CYCLE		0x00000fc000000000UL
+#define TCTRL1_PC_CYCLE_SHIFT	      38
+#define TCTRL1_WR_MORE_RAS_PW	0x0000003f00000000UL
+#define TCTRL1_WR_MORE_RAS_PW_SHIFT   32
+#define TCTRL1_RD_MORE_RAW_PW	0x00000000fc000000UL
+#define TCTRL1_RD_MORE_RAS_PW_SHIFT   26
+#define TCTRL1_ACT_WR_DLY	0x0000000003f00000UL
+#define TCTRL1_ACT_WR_DLY_SHIFT	      20
+#define TCTRL1_ACT_RD_DLY	0x00000000000fc000UL
+#define TCTRL1_ACT_RD_DLY_SHIFT	      14
+#define TCTRL1_BANK_PRESENT	0x0000000000003000UL
+#define TCTRL1_BANK_PRESENT_SHIFT     12
+#define TCTRL1_RFR_INT		0x0000000000000ff8UL
+#define TCTRL1_RFR_INT_SHIFT	      3
+#define TCTRL1_SET_MODE_REG	0x0000000000000004UL
+#define TCTRL1_SET_MODE_REG_SHIFT     2
+#define TCTRL1_RFR_ENABLE	0x0000000000000002UL
+#define TCTRL1_RFR_ENABLE_SHIFT	      1
+#define TCTRL1_PRECHG_ALL	0x0000000000000001UL
+#define TCTRL1_PRECHG_ALL_SHIFT	      0
+
+/* Memory Timing Control II */
+#define TCTRL2_WR_MSEL_DLY	0xfc00000000000000UL
+#define TCTRL2_WR_MSEL_DLY_SHIFT      58
+#define TCTRL2_RD_MSEL_DLY	0x03f0000000000000UL
+#define TCTRL2_RD_MSEL_DLY_SHIFT      52
+#define TCTRL2_WRDATA_THLD	0x000c000000000000UL
+#define TCTRL2_WRDATA_THLD_SHIFT      50
+#define TCTRL2_RDWR_RD_TI_DLY	0x0003f00000000000UL
+#define TCTRL2_RDWR_RD_TI_DLY_SHIFT   44
+#define TCTRL2_AUTOPRECHG_ENBL	0x0000080000000000UL
+#define TCTRL2_AUTOPRECHG_ENBL_SHIFT  43
+#define TCTRL2_RDWR_PI_MORE_DLY	0x000007c000000000UL
+#define TCTRL2_RDWR_PI_MORE_DLY_SHIFT 38
+#define TCTRL2_RDWR_1_DLY	0x0000003f00000000UL
+#define TCTRL2_RDWR_1_DLY_SHIFT       32
+#define TCTRL2_WRWR_PI_MORE_DLY	0x00000000f8000000UL
+#define TCTRL2_WRWR_PI_MORE_DLY_SHIFT 27
+#define TCTRL2_WRWR_1_DLY	0x0000000007e00000UL
+#define TCTRL2_WRWR_1_DLY_SHIFT       21
+#define TCTRL2_RDWR_RD_PI_MORE_DLY 0x00000000001f0000UL
+#define TCTRL2_RDWR_RD_PI_MORE_DLY_SHIFT 16
+#define TCTRL2_R		0x0000000000008000UL
+#define TCTRL2_R_SHIFT		      15
+#define TCTRL2_SDRAM_MODE_REG_DATA 0x0000000000007fffUL
+#define TCTRL2_SDRAM_MODE_REG_DATA_SHIFT 0
+
+/* Memory Timing Control III */
+#define TCTRL3_SDRAM_CTL_DLY	0xf000000000000000UL
+#define TCTRL3_SDRAM_CTL_DLY_SHIFT    60
+#define TCTRL3_SDRAM_CLK_DLY	0x0e00000000000000UL
+#define TCTRL3_SDRAM_CLK_DLY_SHIFT    57
+#define TCTRL3_R		0x0100000000000000UL
+#define TCTRL3_R_SHIFT		      56
+#define TCTRL3_AUTO_RFR_CYCLE	0x00fe000000000000UL
+#define TCTRL3_AUTO_RFR_CYCLE_SHIFT   49
+#define TCTRL3_RD_WAIT		0x0001f00000000000UL
+#define TCTRL3_RD_WAIT_SHIFT	      44
+#define TCTRL3_PC_CYCLE		0x00000fc000000000UL
+#define TCTRL3_PC_CYCLE_SHIFT	      38
+#define TCTRL3_WR_MORE_RAW_PW	0x0000003f00000000UL
+#define TCTRL3_WR_MORE_RAW_PW_SHIFT   32
+#define TCTRL3_RD_MORE_RAW_PW	0x00000000fc000000UL
+#define TCTRL3_RD_MORE_RAW_PW_SHIFT   26
+#define TCTRL3_ACT_WR_DLY	0x0000000003f00000UL
+#define TCTRL3_ACT_WR_DLY_SHIFT       20
+#define TCTRL3_ACT_RD_DLY	0x00000000000fc000UL
+#define TCTRL3_ACT_RD_DLY_SHIFT       14
+#define TCTRL3_BANK_PRESENT	0x0000000000003000UL
+#define TCTRL3_BANK_PRESENT_SHIFT     12
+#define TCTRL3_RFR_INT		0x0000000000000ff8UL
+#define TCTRL3_RFR_INT_SHIFT	      3
+#define TCTRL3_SET_MODE_REG	0x0000000000000004UL
+#define TCTRL3_SET_MODE_REG_SHIFT     2
+#define TCTRL3_RFR_ENABLE	0x0000000000000002UL
+#define TCTRL3_RFR_ENABLE_SHIFT       1
+#define TCTRL3_PRECHG_ALL	0x0000000000000001UL
+#define TCTRL3_PRECHG_ALL_SHIFT	      0
+
+/* Memory Timing Control IV */
+#define TCTRL4_WR_MSEL_DLY	0xfc00000000000000UL
+#define TCTRL4_WR_MSEL_DLY_SHIFT      58
+#define TCTRL4_RD_MSEL_DLY	0x03f0000000000000UL
+#define TCTRL4_RD_MSEL_DLY_SHIFT      52
+#define TCTRL4_WRDATA_THLD	0x000c000000000000UL
+#define TCTRL4_WRDATA_THLD_SHIFT      50
+#define TCTRL4_RDWR_RD_RI_DLY	0x0003f00000000000UL
+#define TCTRL4_RDWR_RD_RI_DLY_SHIFT   44
+#define TCTRL4_AUTO_PRECHG_ENBL	0x0000080000000000UL
+#define TCTRL4_AUTO_PRECHG_ENBL_SHIFT 43
+#define TCTRL4_RD_WR_PI_MORE_DLY 0x000007c000000000UL
+#define TCTRL4_RD_WR_PI_MORE_DLY_SHIFT 38
+#define TCTRL4_RD_WR_TI_DLY	0x0000003f00000000UL
+#define TCTRL4_RD_WR_TI_DLY_SHIFT     32
+#define TCTRL4_WR_WR_PI_MORE_DLY 0x00000000f8000000UL
+#define TCTRL4_WR_WR_PI_MORE_DLY_SHIFT 27
+#define TCTRL4_WR_WR_TI_DLY	0x0000000007e00000UL
+#define TCTRL4_WR_WR_TI_DLY_SHIFT     21
+#define TCTRL4_RDWR_RD_PI_MORE_DLY 0x00000000001f000UL0
+#define TCTRL4_RDWR_RD_PI_MORE_DLY_SHIFT 16
+#define TCTRL4_R		0x0000000000008000UL
+#define TCTRL4_R_SHIFT		      15
+#define TCTRL4_SDRAM_MODE_REG_DATA 0x0000000000007fffUL
+#define TCTRL4_SDRAM_MODE_REG_DATA_SHIFT 0
+
+/* All 4 memory address decoding registers have the
+ * same layout.
+ */
+#define MEM_DECODE_VALID	0x8000000000000000UL /* Valid */
+#define MEM_DECODE_VALID_SHIFT	      63
+#define MEM_DECODE_UK		0x001ffe0000000000UL /* Upper mask */
+#define MEM_DECODE_UK_SHIFT	      41
+#define MEM_DECODE_UM		0x0000001ffff00000UL /* Upper match */
+#define MEM_DECODE_UM_SHIFT	      20
+#define MEM_DECODE_LK		0x000000000003c000UL /* Lower mask */
+#define MEM_DECODE_LK_SHIFT	      14
+#define MEM_DECODE_LM		0x0000000000000f00UL /* Lower match */
+#define MEM_DECODE_LM_SHIFT           8
+
+#define PA_UPPER_BITS		0x000007fffc000000UL
+#define PA_UPPER_BITS_SHIFT	26
+#define PA_LOWER_BITS		0x00000000000003c0UL
+#define PA_LOWER_BITS_SHIFT	6
+
+#define MACTRL_R0		         0x8000000000000000UL
+#define MACTRL_R0_SHIFT		         63
+#define MACTRL_ADDR_LE_PW                0x7000000000000000UL
+#define MACTRL_ADDR_LE_PW_SHIFT		 60
+#define MACTRL_CMD_PW                    0x0f00000000000000UL
+#define MACTRL_CMD_PW_SHIFT		 56
+#define MACTRL_HALF_MODE_WR_MSEL_DLY     0x00fc000000000000UL
+#define MACTRL_HALF_MODE_WR_MSEL_DLY_SHIFT 50
+#define MACTRL_HALF_MODE_RD_MSEL_DLY     0x0003f00000000000UL
+#define MACTRL_HALF_MODE_RD_MSEL_DLY_SHIFT 44
+#define MACTRL_HALF_MODE_SDRAM_CTL_DLY   0x00000f0000000000UL
+#define MACTRL_HALF_MODE_SDRAM_CTL_DLY_SHIFT 40
+#define MACTRL_HALF_MODE_SDRAM_CLK_DLY   0x000000e000000000UL
+#define MACTRL_HALF_MODE_SDRAM_CLK_DLY_SHIFT 37
+#define MACTRL_R1                        0x0000001000000000UL
+#define MACTRL_R1_SHIFT                      36
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B3 0x0000000f00000000UL
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B3_SHIFT 32
+#define MACTRL_ENC_INTLV_B3              0x00000000f8000000UL
+#define MACTRL_ENC_INTLV_B3_SHIFT              27
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B2 0x0000000007800000UL
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B2_SHIFT 23
+#define MACTRL_ENC_INTLV_B2              0x00000000007c0000UL
+#define MACTRL_ENC_INTLV_B2_SHIFT              18
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B1 0x000000000003c000UL
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B1_SHIFT 14
+#define MACTRL_ENC_INTLV_B1              0x0000000000003e00UL
+#define MACTRL_ENC_INTLV_B1_SHIFT               9
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B0 0x00000000000001e0UL
+#define MACTRL_BANKSEL_N_ROWADDR_SIZE_B0_SHIFT  5
+#define MACTRL_ENC_INTLV_B0              0x000000000000001fUL
+#define MACTRL_ENC_INTLV_B0_SHIFT               0
+
+#endif /* _SPARC64_CHMCTRL_H */
diff --git a/arch/sparc/include/asm/clock.h b/arch/sparc/include/asm/clock.h
new file mode 100644
index 0000000..2cf99da
--- /dev/null
+++ b/arch/sparc/include/asm/clock.h
@@ -0,0 +1,11 @@
+/*
+ * clock.h:  Definitions for clock operations on the Sparc.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef _SPARC_CLOCK_H
+#define _SPARC_CLOCK_H
+
+/* Foo for now. */
+
+#endif /* !(_SPARC_CLOCK_H) */
diff --git a/arch/sparc/include/asm/clocksource.h b/arch/sparc/include/asm/clocksource.h
new file mode 100644
index 0000000..d63ef22
--- /dev/null
+++ b/arch/sparc/include/asm/clocksource.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ASM_SPARC_CLOCKSOURCE_H
+#define _ASM_SPARC_CLOCKSOURCE_H
+
+/* VDSO clocksources */
+#define VCLOCK_NONE   0  /* Nothing userspace can do. */
+#define VCLOCK_TICK   1  /* Use %tick.  */
+#define VCLOCK_STICK  2  /* Use %stick. */
+
+struct arch_clocksource_data {
+	int vclock_mode;
+};
+
+#endif /* _ASM_SPARC_CLOCKSOURCE_H */
diff --git a/arch/sparc/include/asm/cmpxchg.h b/arch/sparc/include/asm/cmpxchg.h
new file mode 100644
index 0000000..b7955c6
--- /dev/null
+++ b/arch/sparc/include/asm/cmpxchg.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_CMPXCHG_H
+#define ___ASM_SPARC_CMPXCHG_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/cmpxchg_64.h>
+#else
+#include <asm/cmpxchg_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
new file mode 100644
index 0000000..c73b5a3
--- /dev/null
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* 32-bit atomic xchg() and cmpxchg() definitions.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com.au)
+ * Copyright (C) 2007 Kyle McMartin (kyle@parisc-linux.org)
+ *
+ * Additions by Keith M Wesolowski (wesolows@foobazco.org) based
+ * on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>.
+ */
+
+#ifndef __ARCH_SPARC_CMPXCHG__
+#define __ARCH_SPARC_CMPXCHG__
+
+unsigned long __xchg_u32(volatile u32 *m, u32 new);
+void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int size)
+{
+	switch (size) {
+	case 4:
+		return __xchg_u32(ptr, x);
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+
+/* Emulate cmpxchg() the same way we emulate atomics,
+ * by hashing the object address and indexing into an array
+ * of spinlocks to get a bit of performance...
+ *
+ * See arch/sparc/lib/atomic32.c for implementation.
+ *
+ * Cribbed from <asm-parisc/atomic.h>
+ */
+
+/* bug catcher for when unsupported size is used - won't link */
+void __cmpxchg_called_with_bad_pointer(void);
+/* we only need to support cmpxchg of a u32 on sparc */
+unsigned long __cmpxchg_u32(volatile u32 *m, u32 old, u32 new_);
+
+/* don't worry...optimizer will get rid of most of this */
+static inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
+{
+	switch (size) {
+	case 4:
+		return __cmpxchg_u32((u32 *)ptr, (u32)old, (u32)new_);
+	default:
+		__cmpxchg_called_with_bad_pointer();
+		break;
+	}
+	return old;
+}
+
+#define cmpxchg(ptr, o, n)						\
+({									\
+	__typeof__(*(ptr)) _o_ = (o);					\
+	__typeof__(*(ptr)) _n_ = (n);					\
+	(__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,	\
+			(unsigned long)_n_, sizeof(*(ptr)));		\
+})
+
+u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new);
+#define cmpxchg64(ptr, old, new)	__cmpxchg_u64(ptr, old, new)
+
+#include <asm-generic/cmpxchg-local.h>
+
+/*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+ * them available.
+ */
+#define cmpxchg_local(ptr, o, n)				  	       \
+	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
+			(unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+
+#endif /* __ARCH_SPARC_CMPXCHG__ */
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
new file mode 100644
index 0000000..f71ef37
--- /dev/null
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* 64-bit atomic xchg() and cmpxchg() definitions.
+ *
+ * Copyright (C) 1996, 1997, 2000 David S. Miller (davem@redhat.com)
+ */
+
+#ifndef __ARCH_SPARC64_CMPXCHG__
+#define __ARCH_SPARC64_CMPXCHG__
+
+static inline unsigned long
+__cmpxchg_u32(volatile int *m, int old, int new)
+{
+	__asm__ __volatile__("cas [%2], %3, %0"
+			     : "=&r" (new)
+			     : "0" (new), "r" (m), "r" (old)
+			     : "memory");
+
+	return new;
+}
+
+static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
+{
+	unsigned long tmp1, tmp2;
+
+	__asm__ __volatile__(
+"	mov		%0, %1\n"
+"1:	lduw		[%4], %2\n"
+"	cas		[%4], %2, %0\n"
+"	cmp		%2, %0\n"
+"	bne,a,pn	%%icc, 1b\n"
+"	 mov		%1, %0\n"
+	: "=&r" (val), "=&r" (tmp1), "=&r" (tmp2)
+	: "0" (val), "r" (m)
+	: "cc", "memory");
+	return val;
+}
+
+static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long val)
+{
+	unsigned long tmp1, tmp2;
+
+	__asm__ __volatile__(
+"	mov		%0, %1\n"
+"1:	ldx		[%4], %2\n"
+"	casx		[%4], %2, %0\n"
+"	cmp		%2, %0\n"
+"	bne,a,pn	%%xcc, 1b\n"
+"	 mov		%1, %0\n"
+	: "=&r" (val), "=&r" (tmp1), "=&r" (tmp2)
+	: "0" (val), "r" (m)
+	: "cc", "memory");
+	return val;
+}
+
+#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+
+void __xchg_called_with_bad_pointer(void);
+
+/*
+ * Use 4 byte cas instruction to achieve 2 byte xchg. Main logic
+ * here is to get the bit shift of the byte we are interested in.
+ * The XOR is handy for reversing the bits for big-endian byte order.
+ */
+static inline unsigned long
+xchg16(__volatile__ unsigned short *m, unsigned short val)
+{
+	unsigned long maddr = (unsigned long)m;
+	int bit_shift = (((unsigned long)m & 2) ^ 2) << 3;
+	unsigned int mask = 0xffff << bit_shift;
+	unsigned int *ptr = (unsigned int  *) (maddr & ~2);
+	unsigned int old32, new32, load32;
+
+	/* Read the old value */
+	load32 = *ptr;
+
+	do {
+		old32 = load32;
+		new32 = (load32 & (~mask)) | val << bit_shift;
+		load32 = __cmpxchg_u32(ptr, old32, new32);
+	} while (load32 != old32);
+
+	return (load32 & mask) >> bit_shift;
+}
+
+static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
+				       int size)
+{
+	switch (size) {
+	case 2:
+		return xchg16(ptr, x);
+	case 4:
+		return xchg32(ptr, x);
+	case 8:
+		return xchg64(ptr, x);
+	}
+	__xchg_called_with_bad_pointer();
+	return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#include <asm-generic/cmpxchg-local.h>
+
+
+static inline unsigned long
+__cmpxchg_u64(volatile long *m, unsigned long old, unsigned long new)
+{
+	__asm__ __volatile__("casx [%2], %3, %0"
+			     : "=&r" (new)
+			     : "0" (new), "r" (m), "r" (old)
+			     : "memory");
+
+	return new;
+}
+
+/*
+ * Use 4 byte cas instruction to achieve 1 byte cmpxchg. Main logic
+ * here is to get the bit shift of the byte we are interested in.
+ * The XOR is handy for reversing the bits for big-endian byte order
+ */
+static inline unsigned long
+__cmpxchg_u8(volatile unsigned char *m, unsigned char old, unsigned char new)
+{
+	unsigned long maddr = (unsigned long)m;
+	int bit_shift = (((unsigned long)m & 3) ^ 3) << 3;
+	unsigned int mask = 0xff << bit_shift;
+	unsigned int *ptr = (unsigned int *) (maddr & ~3);
+	unsigned int old32, new32, load;
+	unsigned int load32 = *ptr;
+
+	do {
+		new32 = (load32 & ~mask) | (new << bit_shift);
+		old32 = (load32 & ~mask) | (old << bit_shift);
+		load32 = __cmpxchg_u32(ptr, old32, new32);
+		if (load32 == old32)
+			return old;
+		load = (load32 & mask) >> bit_shift;
+	} while (load == old);
+
+	return load;
+}
+
+/* This function doesn't exist, so you'll get a linker error
+   if something tries to do an invalid cmpxchg().  */
+void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
+{
+	switch (size) {
+		case 1:
+			return __cmpxchg_u8(ptr, old, new);
+		case 4:
+			return __cmpxchg_u32(ptr, old, new);
+		case 8:
+			return __cmpxchg_u64(ptr, old, new);
+	}
+	__cmpxchg_called_with_bad_pointer();
+	return old;
+}
+
+#define cmpxchg(ptr,o,n)						 \
+  ({									 \
+     __typeof__(*(ptr)) _o_ = (o);					 \
+     __typeof__(*(ptr)) _n_ = (n);					 \
+     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
+				    (unsigned long)_n_, sizeof(*(ptr))); \
+  })
+
+/*
+ * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
+ * them available.
+ */
+
+static inline unsigned long __cmpxchg_local(volatile void *ptr,
+				      unsigned long old,
+				      unsigned long new, int size)
+{
+	switch (size) {
+	case 4:
+	case 8:	return __cmpxchg(ptr, old, new, size);
+	default:
+		return __cmpxchg_local_generic(ptr, old, new, size);
+	}
+
+	return old;
+}
+
+#define cmpxchg_local(ptr, o, n)				  	\
+	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
+			(unsigned long)(n), sizeof(*(ptr))))
+#define cmpxchg64_local(ptr, o, n)					\
+  ({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
+	cmpxchg_local((ptr), (o), (n));					\
+  })
+#define cmpxchg64(ptr, o, n)	cmpxchg64_local((ptr), (o), (n))
+
+#endif /* __ARCH_SPARC64_CMPXCHG__ */
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
new file mode 100644
index 0000000..4eb51d2
--- /dev/null
+++ b/arch/sparc/include/asm/compat.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC64_COMPAT_H
+#define _ASM_SPARC64_COMPAT_H
+/*
+ * Architecture specific compatibility types
+ */
+#include <linux/types.h>
+
+#define COMPAT_USER_HZ		100
+#define COMPAT_UTS_MACHINE	"sparc\0\0"
+
+typedef u32		compat_size_t;
+typedef s32		compat_ssize_t;
+typedef s32		compat_clock_t;
+typedef s32		compat_pid_t;
+typedef u16		__compat_uid_t;
+typedef u16		__compat_gid_t;
+typedef u32		__compat_uid32_t;
+typedef u32		__compat_gid32_t;
+typedef u16		compat_mode_t;
+typedef u32		compat_ino_t;
+typedef u16		compat_dev_t;
+typedef s32		compat_off_t;
+typedef s64		compat_loff_t;
+typedef s16		compat_nlink_t;
+typedef u16		compat_ipc_pid_t;
+typedef s32		compat_daddr_t;
+typedef u32		compat_caddr_t;
+typedef __kernel_fsid_t	compat_fsid_t;
+typedef s32		compat_key_t;
+typedef s32		compat_timer_t;
+
+typedef s32		compat_int_t;
+typedef s32		compat_long_t;
+typedef s64		compat_s64;
+typedef u32		compat_uint_t;
+typedef u32		compat_ulong_t;
+typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
+
+struct compat_stat {
+	compat_dev_t	st_dev;
+	compat_ino_t	st_ino;
+	compat_mode_t	st_mode;
+	compat_nlink_t	st_nlink;
+	__compat_uid_t	st_uid;
+	__compat_gid_t	st_gid;
+	compat_dev_t	st_rdev;
+	compat_off_t	st_size;
+	compat_time_t	st_atime;
+	compat_ulong_t	st_atime_nsec;
+	compat_time_t	st_mtime;
+	compat_ulong_t	st_mtime_nsec;
+	compat_time_t	st_ctime;
+	compat_ulong_t	st_ctime_nsec;
+	compat_off_t	st_blksize;
+	compat_off_t	st_blocks;
+	u32		__unused4[2];
+};
+
+struct compat_stat64 {
+	unsigned long long	st_dev;
+
+	unsigned long long	st_ino;
+
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+
+	unsigned long long	st_rdev;
+
+	unsigned char	__pad3[8];
+
+	long long	st_size;
+	unsigned int	st_blksize;
+
+	unsigned char	__pad4[8];
+	unsigned int	st_blocks;
+
+	unsigned int	st_atime;
+	unsigned int	st_atime_nsec;
+
+	unsigned int	st_mtime;
+	unsigned int	st_mtime_nsec;
+
+	unsigned int	st_ctime;
+	unsigned int	st_ctime_nsec;
+
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+
+struct compat_flock {
+	short		l_type;
+	short		l_whence;
+	compat_off_t	l_start;
+	compat_off_t	l_len;
+	compat_pid_t	l_pid;
+	short		__unused;
+};
+
+#define F_GETLK64	12
+#define F_SETLK64	13
+#define F_SETLKW64	14
+
+struct compat_flock64 {
+	short		l_type;
+	short		l_whence;
+	compat_loff_t	l_start;
+	compat_loff_t	l_len;
+	compat_pid_t	l_pid;
+	short		__unused;
+};
+
+struct compat_statfs {
+	int		f_type;
+	int		f_bsize;
+	int		f_blocks;
+	int		f_bfree;
+	int		f_bavail;
+	int		f_files;
+	int		f_ffree;
+	compat_fsid_t	f_fsid;
+	int		f_namelen;	/* SunOS ignores this field. */
+	int		f_frsize;
+	int		f_flags;
+	int		f_spare[4];
+};
+
+#define COMPAT_RLIM_INFINITY 0x7fffffff
+
+typedef u32		compat_old_sigset_t;
+
+#define _COMPAT_NSIG		64
+#define _COMPAT_NSIG_BPW	32
+
+typedef u32		compat_sigset_word;
+
+#define COMPAT_OFF_T_MAX	0x7fffffff
+
+/*
+ * A pointer passed in from user mode. This should not
+ * be used for syscall parameters, just declare them
+ * as pointers because the syscall entry code will have
+ * appropriately converted them already.
+ */
+
+static inline void __user *compat_ptr(compat_uptr_t uptr)
+{
+	return (void __user *)(unsigned long)uptr;
+}
+
+static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+{
+	return (u32)(unsigned long)uptr;
+}
+
+#ifdef CONFIG_COMPAT
+static inline void __user *arch_compat_alloc_user_space(long len)
+{
+	struct pt_regs *regs = current_thread_info()->kregs;
+	unsigned long usp = regs->u_regs[UREG_I6];
+
+	if (test_thread_64bit_stack(usp))
+		usp += STACK_BIAS;
+
+	if (test_thread_flag(TIF_32BIT))
+		usp &= 0xffffffffUL;
+
+	usp -= len;
+	usp &= ~0x7UL;
+
+	return (void __user *) usp;
+}
+#endif
+
+struct compat_ipc64_perm {
+	compat_key_t key;
+	__compat_uid32_t uid;
+	__compat_gid32_t gid;
+	__compat_uid32_t cuid;
+	__compat_gid32_t cgid;
+	unsigned short __pad1;
+	compat_mode_t mode;
+	unsigned short __pad2;
+	unsigned short seq;
+	unsigned long __unused1;	/* yes they really are 64bit pads */
+	unsigned long __unused2;
+};
+
+struct compat_semid64_ds {
+	struct compat_ipc64_perm sem_perm;
+	unsigned int	sem_otime_high;
+	unsigned int	sem_otime;
+	unsigned int	sem_ctime_high;
+	unsigned int	sem_ctime;
+	u32		sem_nsems;
+	u32		__unused1;
+	u32		__unused2;
+};
+
+struct compat_msqid64_ds {
+	struct compat_ipc64_perm msg_perm;
+	unsigned int	msg_stime_high;
+	unsigned int	msg_stime;
+	unsigned int	msg_rtime_high;
+	unsigned int	msg_rtime;
+	unsigned int	msg_ctime_high;
+	unsigned int	msg_ctime;
+	unsigned int	msg_cbytes;
+	unsigned int	msg_qnum;
+	unsigned int	msg_qbytes;
+	compat_pid_t	msg_lspid;
+	compat_pid_t	msg_lrpid;
+	unsigned int	__unused1;
+	unsigned int	__unused2;
+};
+
+struct compat_shmid64_ds {
+	struct compat_ipc64_perm shm_perm;
+	unsigned int	shm_atime_high;
+	unsigned int	shm_atime;
+	unsigned int	shm_dtime_high;
+	unsigned int	shm_dtime;
+	unsigned int	shm_ctime_high;
+	unsigned int	shm_ctime;
+	compat_size_t	shm_segsz;
+	compat_pid_t	shm_cpid;
+	compat_pid_t	shm_lpid;
+	unsigned int	shm_nattch;
+	unsigned int	__unused1;
+	unsigned int	__unused2;
+};
+
+#ifdef CONFIG_COMPAT
+static inline int is_compat_task(void)
+{
+	return test_thread_flag(TIF_32BIT);
+}
+
+static inline bool in_compat_syscall(void)
+{
+	/* Vector 0x110 is LINUX_32BIT_SYSCALL_TRAP */
+	return pt_regs_trap_type(current_pt_regs()) == 0x110;
+}
+#define in_compat_syscall in_compat_syscall
+#endif
+
+#endif /* _ASM_SPARC64_COMPAT_H */
diff --git a/arch/sparc/include/asm/compat_signal.h b/arch/sparc/include/asm/compat_signal.h
new file mode 100644
index 0000000..e5f7a7f
--- /dev/null
+++ b/arch/sparc/include/asm/compat_signal.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _COMPAT_SIGNAL_H
+#define _COMPAT_SIGNAL_H
+
+#include <linux/compat.h>
+#include <asm/signal.h>
+
+#ifdef CONFIG_COMPAT
+struct __new_sigaction32 {
+	unsigned int		sa_handler;
+	unsigned int    	sa_flags;
+	unsigned int		sa_restorer;     /* not used by Linux/SPARC yet */
+	compat_sigset_t 	sa_mask;
+};
+
+struct __old_sigaction32 {
+	unsigned int		sa_handler;
+	compat_old_sigset_t  	sa_mask;
+	unsigned int    	sa_flags;
+	unsigned int		sa_restorer;     /* not used by Linux/SPARC yet */
+};
+#endif
+
+#endif /* !(_COMPAT_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/contregs.h b/arch/sparc/include/asm/contregs.h
new file mode 100644
index 0000000..4df56a6
--- /dev/null
+++ b/arch/sparc/include/asm/contregs.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_CONTREGS_H
+#define _SPARC_CONTREGS_H
+
+/* contregs.h:  Addresses of registers in the ASI_CONTROL alternate address
+ *              space. These are for the mmu's context register, etc.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+/* s=Swift, h=Ross_HyperSPARC, v=TI_Viking, t=Tsunami, r=Ross_Cypress        */
+#define AC_M_PCR      0x0000        /* shv Processor Control Reg             */
+#define AC_M_CTPR     0x0100        /* shv Context Table Pointer Reg         */
+#define AC_M_CXR      0x0200        /* shv Context Register                  */
+#define AC_M_SFSR     0x0300        /* shv Synchronous Fault Status Reg      */
+#define AC_M_SFAR     0x0400        /* shv Synchronous Fault Address Reg     */
+#define AC_M_AFSR     0x0500        /*  hv Asynchronous Fault Status Reg     */
+#define AC_M_AFAR     0x0600        /*  hv Asynchronous Fault Address Reg    */
+#define AC_M_RESET    0x0700        /*  hv Reset Reg                         */
+#define AC_M_RPR      0x1000        /*  hv Root Pointer Reg                  */
+#define AC_M_TSUTRCR  0x1000        /* s   TLB Replacement Ctrl Reg          */
+#define AC_M_IAPTP    0x1100        /*  hv Instruction Access PTP            */
+#define AC_M_DAPTP    0x1200        /*  hv Data Access PTP                   */
+#define AC_M_ITR      0x1300        /*  hv Index Tag Register                */
+#define AC_M_TRCR     0x1400        /*  hv TLB Replacement Control Reg       */
+#define AC_M_SFSRX    0x1300        /* s   Synch Fault Status Reg prim       */
+#define AC_M_SFARX    0x1400        /* s   Synch Fault Address Reg prim      */
+#define AC_M_RPR1     0x1500        /*  h  Root Pointer Reg (entry 2)        */
+#define AC_M_IAPTP1   0x1600        /*  h  Instruction Access PTP (entry 2)  */
+#define AC_M_DAPTP1   0x1700        /*  h  Data Access PTP (entry 2)         */
+
+#endif /* _SPARC_CONTREGS_H */
diff --git a/arch/sparc/include/asm/cpu_type.h b/arch/sparc/include/asm/cpu_type.h
new file mode 100644
index 0000000..2b59799
--- /dev/null
+++ b/arch/sparc/include/asm/cpu_type.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_CPU_TYPE_H
+#define __ASM_CPU_TYPE_H
+
+/*
+ * Sparc (general) CPU types
+ */
+enum sparc_cpu {
+  sun4m       = 0x00,
+  sun4d       = 0x01,
+  sun4e       = 0x02,
+  sun4u       = 0x03, /* V8 ploos ploos */
+  sun_unknown = 0x04,
+  ap1000      = 0x05, /* almost a sun4m */
+  sparc_leon  = 0x06, /* Leon SoC */
+};
+
+#ifdef CONFIG_SPARC32
+extern enum sparc_cpu sparc_cpu_model;
+
+#define SUN4M_NCPUS            4              /* Architectural limit of sun4m. */
+
+#else
+
+#define sparc_cpu_model sun4u
+
+#endif
+
+#endif /* __ASM_CPU_TYPE_H */
diff --git a/arch/sparc/include/asm/cpudata.h b/arch/sparc/include/asm/cpudata.h
new file mode 100644
index 0000000..d213165
--- /dev/null
+++ b/arch/sparc/include/asm/cpudata.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_CPUDATA_H
+#define ___ASM_SPARC_CPUDATA_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/threads.h>
+#include <linux/percpu.h>
+
+extern const struct seq_operations cpuinfo_op;
+
+#endif /* !(__ASSEMBLY__) */
+
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/cpudata_64.h>
+#else
+#include <asm/cpudata_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/cpudata_32.h b/arch/sparc/include/asm/cpudata_32.h
new file mode 100644
index 0000000..895d096
--- /dev/null
+++ b/arch/sparc/include/asm/cpudata_32.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* cpudata.h: Per-cpu parameters.
+ *
+ * Copyright (C) 2004 Keith M Wesolowski (wesolows@foobazco.org)
+ *
+ * Based on include/asm/cpudata.h and Linux 2.4 smp.h
+ * both (C) David S. Miller.
+ */
+
+#ifndef _SPARC_CPUDATA_H
+#define _SPARC_CPUDATA_H
+
+#include <linux/percpu.h>
+
+typedef struct {
+	unsigned long udelay_val;
+	unsigned long clock_tick;
+	unsigned int counter;
+#ifdef CONFIG_SMP
+	unsigned int irq_resched_count;
+	unsigned int irq_call_count;
+#endif
+	int prom_node;
+	int mid;
+	int next;
+} cpuinfo_sparc;
+
+DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
+#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data() (*this_cpu_ptr(&__cpu_data))
+
+#endif /* _SPARC_CPUDATA_H */
diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h
new file mode 100644
index 0000000..9c3fc03
--- /dev/null
+++ b/arch/sparc/include/asm/cpudata_64.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* cpudata.h: Per-cpu parameters.
+ *
+ * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _SPARC64_CPUDATA_H
+#define _SPARC64_CPUDATA_H
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+	/* Dcache line 1 */
+	unsigned int	__softirq_pending; /* must be 1st, see rtrap.S */
+	unsigned int	__nmi_count;
+	unsigned long	clock_tick;	/* %tick's per second */
+	unsigned long	__pad;
+	unsigned int	irq0_irqs;
+	unsigned int	__pad2;
+
+	/* Dcache line 2, rarely used */
+	unsigned int	dcache_size;
+	unsigned int	dcache_line_size;
+	unsigned int	icache_size;
+	unsigned int	icache_line_size;
+	unsigned int	ecache_size;
+	unsigned int	ecache_line_size;
+	unsigned short	sock_id;	/* physical package */
+	unsigned short	core_id;
+	unsigned short  max_cache_id;	/* groupings of highest shared cache */
+	signed short	proc_id;	/* strand (aka HW thread) id */
+} cpuinfo_sparc;
+
+DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
+#define cpu_data(__cpu)		per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data()	(*this_cpu_ptr(&__cpu_data))
+
+#endif /* !(__ASSEMBLY__) */
+
+#include <asm/trap_block.h>
+
+#endif /* _SPARC64_CPUDATA_H */
diff --git a/arch/sparc/include/asm/current.h b/arch/sparc/include/asm/current.h
new file mode 100644
index 0000000..c68312a
--- /dev/null
+++ b/arch/sparc/include/asm/current.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* include/asm/current.h
+ *
+ * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ * Copyright (C) 2002 Pete Zaitcev (zaitcev@yahoo.com)
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ *
+ *  Derived from "include/asm-s390/current.h" by
+ *  Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *  Derived from "include/asm-i386/current.h"
+*/
+#ifndef _SPARC_CURRENT_H
+#define _SPARC_CURRENT_H
+
+#include <linux/thread_info.h>
+
+#ifdef CONFIG_SPARC64
+register struct task_struct *current asm("g4");
+#endif
+
+#ifdef CONFIG_SPARC32
+/* We might want to consider using %g4 like sparc64 to shave a few cycles.
+ *
+ * Two stage process (inline + #define) for type-checking.
+ * We also obfuscate get_current() to check if anyone used that by mistake.
+ */
+struct task_struct;
+static inline struct task_struct *__get_current(void)
+{
+	return current_thread_info()->task;
+}
+#define current __get_current()
+#endif
+
+#endif /* !(_SPARC_CURRENT_H) */
diff --git a/arch/sparc/include/asm/dcr.h b/arch/sparc/include/asm/dcr.h
new file mode 100644
index 0000000..c810e1b
--- /dev/null
+++ b/arch/sparc/include/asm/dcr.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_DCR_H
+#define _SPARC64_DCR_H
+
+/* UltraSparc-III/III+ Dispatch Control Register, ASR 0x12 */
+#define DCR_DPE		0x0000000000001000 /* III+: D$ Parity Error Enable	*/
+#define DCR_OBS		0x0000000000000fc0 /* Observability Bus Controls	*/
+#define DCR_BPE		0x0000000000000020 /* Branch Predict Enable		*/
+#define DCR_RPE		0x0000000000000010 /* Return Address Prediction Enable	*/
+#define DCR_SI		0x0000000000000008 /* Single Instruction Disable	*/
+#define DCR_IPE		0x0000000000000004 /* III+: I$ Parity Error Enable	*/
+#define DCR_IFPOE	0x0000000000000002 /* IRQ FP Operation Enable		*/
+#define DCR_MS		0x0000000000000001 /* Multi-Scalar dispatch		*/
+
+#endif /* _SPARC64_DCR_H */
diff --git a/arch/sparc/include/asm/dcu.h b/arch/sparc/include/asm/dcu.h
new file mode 100644
index 0000000..93f3250
--- /dev/null
+++ b/arch/sparc/include/asm/dcu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_DCU_H
+#define _SPARC64_DCU_H
+
+#include <linux/const.h>
+
+/* UltraSparc-III Data Cache Unit Control Register */
+#define DCU_CP	_AC(0x0002000000000000,UL) /* Phys Cache Enable w/o mmu	*/
+#define DCU_CV	_AC(0x0001000000000000,UL) /* Virt Cache Enable w/o mmu	*/
+#define DCU_ME	_AC(0x0000800000000000,UL) /* NC-store Merging Enable	*/
+#define DCU_RE	_AC(0x0000400000000000,UL) /* RAW bypass Enable		*/
+#define DCU_PE	_AC(0x0000200000000000,UL) /* PCache Enable		*/
+#define DCU_HPE	_AC(0x0000100000000000,UL) /* HW prefetch Enable	*/
+#define DCU_SPE	_AC(0x0000080000000000,UL) /* SW prefetch Enable	*/
+#define DCU_SL	_AC(0x0000040000000000,UL) /* Secondary ld-steering Enab*/
+#define DCU_WE	_AC(0x0000020000000000,UL) /* WCache enable		*/
+#define DCU_PM	_AC(0x000001fe00000000,UL) /* PA Watchpoint Byte Mask	*/
+#define DCU_VM	_AC(0x00000001fe000000,UL) /* VA Watchpoint Byte Mask	*/
+#define DCU_PR	_AC(0x0000000001000000,UL) /* PA Watchpoint Read Enable	*/
+#define DCU_PW	_AC(0x0000000000800000,UL) /* PA Watchpoint Write Enable*/
+#define DCU_VR	_AC(0x0000000000400000,UL) /* VA Watchpoint Read Enable	*/
+#define DCU_VW	_AC(0x0000000000200000,UL) /* VA Watchpoint Write Enable*/
+#define DCU_DM	_AC(0x0000000000000008,UL) /* DMMU Enable		*/
+#define DCU_IM	_AC(0x0000000000000004,UL) /* IMMU Enable		*/
+#define DCU_DC	_AC(0x0000000000000002,UL) /* Data Cache Enable		*/
+#define DCU_IC	_AC(0x0000000000000001,UL) /* Instruction Cache Enable	*/
+
+#endif /* _SPARC64_DCU_H */
diff --git a/arch/sparc/include/asm/delay.h b/arch/sparc/include/asm/delay.h
new file mode 100644
index 0000000..c96af9b
--- /dev/null
+++ b/arch/sparc/include/asm/delay.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_DELAY_H
+#define ___ASM_SPARC_DELAY_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/delay_64.h>
+#else
+#include <asm/delay_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/delay_32.h b/arch/sparc/include/asm/delay_32.h
new file mode 100644
index 0000000..0e6dfe8
--- /dev/null
+++ b/arch/sparc/include/asm/delay_32.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * delay.h: Linux delay routines on the Sparc.
+ *
+ * Copyright (C) 1994 David S. Miller (davem@caip.rutgers.edu).
+ */
+
+#ifndef __SPARC_DELAY_H
+#define __SPARC_DELAY_H
+
+#include <asm/cpudata.h>
+
+static inline void __delay(unsigned long loops)
+{
+	__asm__ __volatile__("cmp %0, 0\n\t"
+			     "1: bne 1b\n\t"
+			     "subcc %0, 1, %0\n" :
+			     "=&r" (loops) :
+			     "0" (loops) :
+			     "cc");
+}
+
+/* This is too messy with inline asm on the Sparc. */
+void __udelay(unsigned long usecs, unsigned long lpj);
+void __ndelay(unsigned long nsecs, unsigned long lpj);
+
+#ifdef CONFIG_SMP
+#define __udelay_val	cpu_data(smp_processor_id()).udelay_val
+#else /* SMP */
+#define __udelay_val	loops_per_jiffy
+#endif /* SMP */
+#define udelay(__usecs)	__udelay(__usecs, __udelay_val)
+#define ndelay(__nsecs)	__ndelay(__nsecs, __udelay_val)
+
+#endif /* defined(__SPARC_DELAY_H) */
diff --git a/arch/sparc/include/asm/delay_64.h b/arch/sparc/include/asm/delay_64.h
new file mode 100644
index 0000000..22213b1
--- /dev/null
+++ b/arch/sparc/include/asm/delay_64.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* delay.h: Linux delay routines on sparc64.
+ *
+ * Copyright (C) 1996, 2004, 2007 David S. Miller (davem@davemloft.net).
+ */
+
+#ifndef _SPARC64_DELAY_H
+#define _SPARC64_DELAY_H
+
+#ifndef __ASSEMBLY__
+
+void __delay(unsigned long loops);
+void udelay(unsigned long usecs);
+#define mdelay(n)	udelay((n) * 1000)
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _SPARC64_DELAY_H */
diff --git a/arch/sparc/include/asm/device.h b/arch/sparc/include/asm/device.h
new file mode 100644
index 0000000..bb3f0b0
--- /dev/null
+++ b/arch/sparc/include/asm/device.h
@@ -0,0 +1,30 @@
+/*
+ * Arch specific extensions to struct device
+ *
+ * This file is released under the GPLv2
+ */
+#ifndef _ASM_SPARC_DEVICE_H
+#define _ASM_SPARC_DEVICE_H
+
+#include <asm/openprom.h>
+
+struct device_node;
+struct platform_device;
+
+struct dev_archdata {
+	void			*iommu;
+	void			*stc;
+	void			*host_controller;
+	struct platform_device	*op;
+	int			numa_node;
+};
+
+void of_propagate_archdata(struct platform_device *bus);
+
+struct pdev_archdata {
+	struct resource		resource[PROMREG_MAX];
+	unsigned int		irqs[PROMINTR_MAX];
+	int			num_irqs;
+};
+
+#endif /* _ASM_SPARC_DEVICE_H */
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
new file mode 100644
index 0000000..e175663
--- /dev/null
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_DMA_MAPPING_H
+#define ___ASM_SPARC_DMA_MAPPING_H
+
+#include <linux/scatterlist.h>
+#include <linux/mm.h>
+#include <linux/dma-debug.h>
+
+extern const struct dma_map_ops *dma_ops;
+
+extern struct bus_type pci_bus_type;
+
+static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
+{
+#ifdef CONFIG_SPARC_LEON
+	if (sparc_cpu_model == sparc_leon)
+		return &dma_noncoherent_ops;
+#endif
+#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
+	if (bus == &pci_bus_type)
+		return &dma_noncoherent_ops;
+#endif
+	return dma_ops;
+}
+
+#endif
diff --git a/arch/sparc/include/asm/dma.h b/arch/sparc/include/asm/dma.h
new file mode 100644
index 0000000..a1d7c86
--- /dev/null
+++ b/arch/sparc/include/asm/dma.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC_DMA_H
+#define _ASM_SPARC_DMA_H
+
+/* These are irrelevant for Sparc DMA, but we leave it in so that
+ * things can compile.
+ */
+#define MAX_DMA_CHANNELS 8
+#define DMA_MODE_READ    1
+#define DMA_MODE_WRITE   2
+#define MAX_DMA_ADDRESS  (~0UL)
+
+/* Useful constants */
+#define SIZE_16MB      (16*1024*1024)
+#define SIZE_64K       (64*1024)
+
+/* SBUS DMA controller reg offsets */
+#define DMA_CSR		0x00UL		/* rw  DMA control/status register    0x00   */
+#define DMA_ADDR	0x04UL		/* rw  DMA transfer address register  0x04   */
+#define DMA_COUNT	0x08UL		/* rw  DMA transfer count register    0x08   */
+#define DMA_TEST	0x0cUL		/* rw  DMA test/debug register        0x0c   */
+
+/* Fields in the cond_reg register */
+/* First, the version identification bits */
+#define DMA_DEVICE_ID    0xf0000000        /* Device identification bits */
+#define DMA_VERS0        0x00000000        /* Sunray DMA version */
+#define DMA_ESCV1        0x40000000        /* DMA ESC Version 1 */
+#define DMA_VERS1        0x80000000        /* DMA rev 1 */
+#define DMA_VERS2        0xa0000000        /* DMA rev 2 */
+#define DMA_VERHME       0xb0000000        /* DMA hme gate array */
+#define DMA_VERSPLUS     0x90000000        /* DMA rev 1 PLUS */
+
+#define DMA_HNDL_INTR    0x00000001        /* An IRQ needs to be handled */
+#define DMA_HNDL_ERROR   0x00000002        /* We need to take an error */
+#define DMA_FIFO_ISDRAIN 0x0000000c        /* The DMA FIFO is draining */
+#define DMA_INT_ENAB     0x00000010        /* Turn on interrupts */
+#define DMA_FIFO_INV     0x00000020        /* Invalidate the FIFO */
+#define DMA_ACC_SZ_ERR   0x00000040        /* The access size was bad */
+#define DMA_FIFO_STDRAIN 0x00000040        /* DMA_VERS1 Drain the FIFO */
+#define DMA_RST_SCSI     0x00000080        /* Reset the SCSI controller */
+#define DMA_RST_ENET     DMA_RST_SCSI      /* Reset the ENET controller */
+#define DMA_ST_WRITE     0x00000100        /* write from device to memory */
+#define DMA_ENABLE       0x00000200        /* Fire up DMA, handle requests */
+#define DMA_PEND_READ    0x00000400        /* DMA_VERS1/0/PLUS Pending Read */
+#define DMA_ESC_BURST    0x00000800        /* 1=16byte 0=32byte */
+#define DMA_READ_AHEAD   0x00001800        /* DMA read ahead partial longword */
+#define DMA_DSBL_RD_DRN  0x00001000        /* No EC drain on slave reads */
+#define DMA_BCNT_ENAB    0x00002000        /* If on, use the byte counter */
+#define DMA_TERM_CNTR    0x00004000        /* Terminal counter */
+#define DMA_SCSI_SBUS64  0x00008000        /* HME: Enable 64-bit SBUS mode. */
+#define DMA_CSR_DISAB    0x00010000        /* No FIFO drains during csr */
+#define DMA_SCSI_DISAB   0x00020000        /* No FIFO drains during reg */
+#define DMA_DSBL_WR_INV  0x00020000        /* No EC inval. on slave writes */
+#define DMA_ADD_ENABLE   0x00040000        /* Special ESC DVMA optimization */
+#define DMA_E_BURSTS	 0x000c0000	   /* ENET: SBUS r/w burst mask */
+#define DMA_E_BURST32	 0x00040000	   /* ENET: SBUS 32 byte r/w burst */
+#define DMA_E_BURST16	 0x00000000	   /* ENET: SBUS 16 byte r/w burst */
+#define DMA_BRST_SZ      0x000c0000        /* SCSI: SBUS r/w burst size */
+#define DMA_BRST64       0x000c0000        /* SCSI: 64byte bursts (HME on UltraSparc only) */
+#define DMA_BRST32       0x00040000        /* SCSI: 32byte bursts */
+#define DMA_BRST16       0x00000000        /* SCSI: 16byte bursts */
+#define DMA_BRST0        0x00080000        /* SCSI: no bursts (non-HME gate arrays) */
+#define DMA_ADDR_DISAB   0x00100000        /* No FIFO drains during addr */
+#define DMA_2CLKS        0x00200000        /* Each transfer = 2 clock ticks */
+#define DMA_3CLKS        0x00400000        /* Each transfer = 3 clock ticks */
+#define DMA_EN_ENETAUI   DMA_3CLKS         /* Put lance into AUI-cable mode */
+#define DMA_CNTR_DISAB   0x00800000        /* No IRQ when DMA_TERM_CNTR set */
+#define DMA_AUTO_NADDR   0x01000000        /* Use "auto nxt addr" feature */
+#define DMA_SCSI_ON      0x02000000        /* Enable SCSI dma */
+#define DMA_PARITY_OFF   0x02000000        /* HME: disable parity checking */
+#define DMA_LOADED_ADDR  0x04000000        /* Address has been loaded */
+#define DMA_LOADED_NADDR 0x08000000        /* Next address has been loaded */
+#define DMA_RESET_FAS366 0x08000000        /* HME: Assert RESET to FAS366 */
+
+/* Values describing the burst-size property from the PROM */
+#define DMA_BURST1       0x01
+#define DMA_BURST2       0x02
+#define DMA_BURST4       0x04
+#define DMA_BURST8       0x08
+#define DMA_BURST16      0x10
+#define DMA_BURST32      0x20
+#define DMA_BURST64      0x40
+#define DMA_BURSTBITS    0x7f
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy 	(0)
+#endif
+
+#ifdef CONFIG_SPARC32
+
+/* Routines for data transfer buffers. */
+struct device;
+struct scatterlist;
+
+struct sparc32_dma_ops {
+	__u32 (*get_scsi_one)(struct device *, char *, unsigned long);
+	void (*get_scsi_sgl)(struct device *, struct scatterlist *, int);
+	void (*release_scsi_one)(struct device *, __u32, unsigned long);
+	void (*release_scsi_sgl)(struct device *, struct scatterlist *,int);
+#ifdef CONFIG_SBUS
+	int (*map_dma_area)(struct device *, dma_addr_t *, unsigned long, unsigned long, int);
+	void (*unmap_dma_area)(struct device *, unsigned long, int);
+#endif
+};
+extern const struct sparc32_dma_ops *sparc32_dma_ops;
+
+#define mmu_get_scsi_one(dev,vaddr,len) \
+	sparc32_dma_ops->get_scsi_one(dev, vaddr, len)
+#define mmu_get_scsi_sgl(dev,sg,sz) \
+	sparc32_dma_ops->get_scsi_sgl(dev, sg, sz)
+#define mmu_release_scsi_one(dev,vaddr,len) \
+	sparc32_dma_ops->release_scsi_one(dev, vaddr,len)
+#define mmu_release_scsi_sgl(dev,sg,sz) \
+	sparc32_dma_ops->release_scsi_sgl(dev, sg, sz)
+
+#ifdef CONFIG_SBUS
+/*
+ * mmu_map/unmap are provided by iommu/iounit; Invalid to call on IIep.
+ *
+ * The mmu_map_dma_area establishes two mappings in one go.
+ * These mappings point to pages normally mapped at 'va' (linear address).
+ * First mapping is for CPU visible address at 'a', uncached.
+ * This is an alias, but it works because it is an uncached mapping.
+ * Second mapping is for device visible address, or "bus" address.
+ * The bus address is returned at '*pba'.
+ *
+ * These functions seem distinct, but are hard to split.
+ * On sun4m, page attributes depend on the CPU type, so we have to
+ * know if we are mapping RAM or I/O, so it has to be an additional argument
+ * to a separate mapping function for CPU visible mappings.
+ */
+#define sbus_map_dma_area(dev,pba,va,a,len) \
+	sparc32_dma_ops->map_dma_area(dev, pba, va, a, len)
+#define sbus_unmap_dma_area(dev,ba,len) \
+	sparc32_dma_ops->unmap_dma_area(dev, ba, len)
+#endif /* CONFIG_SBUS */
+
+#endif
+
+#endif /* !(_ASM_SPARC_DMA_H) */
diff --git a/arch/sparc/include/asm/ebus_dma.h b/arch/sparc/include/asm/ebus_dma.h
new file mode 100644
index 0000000..75563ed
--- /dev/null
+++ b/arch/sparc/include/asm/ebus_dma.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SPARC_EBUS_DMA_H
+#define __ASM_SPARC_EBUS_DMA_H
+
+struct ebus_dma_info {
+	spinlock_t	lock;
+	void __iomem	*regs;
+
+	unsigned int	flags;
+#define EBUS_DMA_FLAG_USE_EBDMA_HANDLER		0x00000001
+#define EBUS_DMA_FLAG_TCI_DISABLE		0x00000002
+
+	/* These are only valid is EBUS_DMA_FLAG_USE_EBDMA_HANDLER is
+	 * set.
+	 */
+	void (*callback)(struct ebus_dma_info *p, int event, void *cookie);
+	void *client_cookie;
+	unsigned int	irq;
+#define EBUS_DMA_EVENT_ERROR	1
+#define EBUS_DMA_EVENT_DMA	2
+#define EBUS_DMA_EVENT_DEVICE	4
+
+	unsigned char	name[64];
+};
+
+int ebus_dma_register(struct ebus_dma_info *p);
+int ebus_dma_irq_enable(struct ebus_dma_info *p, int on);
+void ebus_dma_unregister(struct ebus_dma_info *p);
+int ebus_dma_request(struct ebus_dma_info *p, dma_addr_t bus_addr,
+			    size_t len);
+void ebus_dma_prepare(struct ebus_dma_info *p, int write);
+unsigned int ebus_dma_residue(struct ebus_dma_info *p);
+unsigned int ebus_dma_addr(struct ebus_dma_info *p);
+void ebus_dma_enable(struct ebus_dma_info *p, int on);
+
+#endif /* __ASM_SPARC_EBUS_DMA_H */
diff --git a/arch/sparc/include/asm/ecc.h b/arch/sparc/include/asm/ecc.h
new file mode 100644
index 0000000..3d5edee
--- /dev/null
+++ b/arch/sparc/include/asm/ecc.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ecc.h: Definitions and defines for the external cache/memory
+ *        controller on the sun4m.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_ECC_H
+#define _SPARC_ECC_H
+
+/* These registers are accessed through the SRMMU passthrough ASI 0x20 */
+#define ECC_ENABLE     0x00000000       /* ECC enable register */
+#define ECC_FSTATUS    0x00000008       /* ECC fault status register */
+#define ECC_FADDR      0x00000010       /* ECC fault address register */
+#define ECC_DIGNOSTIC  0x00000018       /* ECC diagnostics register */
+#define ECC_MBAENAB    0x00000020       /* MBus arbiter enable register */
+#define ECC_DMESG      0x00001000       /* Diagnostic message passing area */
+
+/* ECC MBus Arbiter Enable register:
+ *
+ * ----------------------------------------
+ * |              |SBUS|MOD3|MOD2|MOD1|RSV|
+ * ----------------------------------------
+ *  31           5   4   3    2    1    0
+ *
+ * SBUS: Enable MBus Arbiter on the SBus 0=off 1=on
+ * MOD3: Enable MBus Arbiter on MBus module 3  0=off 1=on
+ * MOD2: Enable MBus Arbiter on MBus module 2  0=off 1=on
+ * MOD1: Enable MBus Arbiter on MBus module 1  0=off 1=on
+ */
+
+#define ECC_MBAE_SBUS     0x00000010
+#define ECC_MBAE_MOD3     0x00000008
+#define ECC_MBAE_MOD2     0x00000004
+#define ECC_MBAE_MOD1     0x00000002 
+
+/* ECC Fault Control Register layout:
+ *
+ * -----------------------------
+ * |    RESV   | ECHECK | EINT |
+ * -----------------------------
+ *  31        2     1       0
+ *
+ * ECHECK:  Enable ECC checking.  0=off 1=on
+ * EINT:  Enable Interrupts for correctable errors. 0=off 1=on
+ */ 
+#define ECC_FCR_CHECK    0x00000002
+#define ECC_FCR_INTENAB  0x00000001
+
+/* ECC Fault Address Register Zero layout:
+ *
+ * -----------------------------------------------------
+ * | MID | S | RSV |  VA   | BM |AT| C| SZ |TYP| PADDR |
+ * -----------------------------------------------------
+ *  31-28  27 26-22  21-14   13  12 11 10-8 7-4   3-0
+ *
+ * MID: ModuleID of the faulting processor. ie. who did it?
+ * S: Supervisor/Privileged access? 0=no 1=yes
+ * VA: Bits 19-12 of the virtual faulting address, these are the
+ *     superset bits in the virtual cache and can be used for
+ *     a flush operation if necessary.
+ * BM: Boot mode? 0=no 1=yes  This is just like the SRMMU boot
+ *     mode bit.
+ * AT: Did this fault happen during an atomic instruction? 0=no
+ *     1=yes.  This means either an 'ldstub' or 'swap' instruction
+ *     was in progress (but not finished) when this fault happened.
+ *     This indicated whether the bus was locked when the fault
+ *     occurred.
+ * C: Did the pte for this access indicate that it was cacheable?
+ *    0=no 1=yes
+ * SZ: The size of the transaction.
+ * TYP: The transaction type.
+ * PADDR: Bits 35-32 of the physical address for the fault.
+ */
+#define ECC_FADDR0_MIDMASK   0xf0000000
+#define ECC_FADDR0_S         0x08000000
+#define ECC_FADDR0_VADDR     0x003fc000
+#define ECC_FADDR0_BMODE     0x00002000
+#define ECC_FADDR0_ATOMIC    0x00001000
+#define ECC_FADDR0_CACHE     0x00000800
+#define ECC_FADDR0_SIZE      0x00000700
+#define ECC_FADDR0_TYPE      0x000000f0
+#define ECC_FADDR0_PADDR     0x0000000f
+
+/* ECC Fault Address Register One layout:
+ *
+ * -------------------------------------
+ * |          Physical Address 31-0    |
+ * -------------------------------------
+ *  31                               0
+ *
+ * You get the upper 4 bits of the physical address from the
+ * PADDR field in ECC Fault Address Zero register.
+ */
+
+/* ECC Fault Status Register layout:
+ *
+ * ----------------------------------------------
+ * | RESV|C2E|MULT|SYNDROME|DWORD|UNC|TIMEO|BS|C|
+ * ----------------------------------------------
+ *  31-18  17  16    15-8    7-4   3    2    1 0
+ *
+ * C2E: A C2 graphics error occurred. 0=no 1=yes (SS10 only)
+ * MULT: Multiple errors occurred ;-O 0=no 1=prom_panic(yes)
+ * SYNDROME: Controller is mentally unstable.
+ * DWORD:
+ * UNC: Uncorrectable error.  0=no 1=yes
+ * TIMEO: Timeout occurred. 0=no 1=yes
+ * BS: C2 graphics bad slot access. 0=no 1=yes (SS10 only)
+ * C: Correctable error? 0=no 1=yes
+ */
+
+#define ECC_FSR_C2ERR    0x00020000
+#define ECC_FSR_MULT     0x00010000
+#define ECC_FSR_SYND     0x0000ff00
+#define ECC_FSR_DWORD    0x000000f0
+#define ECC_FSR_UNC      0x00000008
+#define ECC_FSR_TIMEO    0x00000004
+#define ECC_FSR_BADSLOT  0x00000002
+#define ECC_FSR_C        0x00000001
+
+#endif /* !(_SPARC_ECC_H) */
diff --git a/arch/sparc/include/asm/eeprom.h b/arch/sparc/include/asm/eeprom.h
new file mode 100644
index 0000000..e17beec
--- /dev/null
+++ b/arch/sparc/include/asm/eeprom.h
@@ -0,0 +1,9 @@
+/*
+ * eeprom.h:  Definitions for the Sun eeprom.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+/* The EEPROM and the Mostek Mk48t02 use the same IO address space
+ * for their registers/data areas.  The IDPROM lives here too.
+ */
diff --git a/arch/sparc/include/asm/elf.h b/arch/sparc/include/asm/elf.h
new file mode 100644
index 0000000..bbfb4b0
--- /dev/null
+++ b/arch/sparc/include/asm/elf.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_ELF_H
+#define ___ASM_SPARC_ELF_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/elf_64.h>
+#else
+#include <asm/elf_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/elf_32.h b/arch/sparc/include/asm/elf_32.h
new file mode 100644
index 0000000..37a6016
--- /dev/null
+++ b/arch/sparc/include/asm/elf_32.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASMSPARC_ELF_H
+#define __ASMSPARC_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+
+/*
+ * Sparc section types
+ */
+#define STT_REGISTER		13
+
+/*
+ * Sparc ELF relocation types
+ */
+#define	R_SPARC_NONE		0
+#define	R_SPARC_8		1
+#define	R_SPARC_16		2
+#define	R_SPARC_32		3
+#define	R_SPARC_DISP8		4
+#define	R_SPARC_DISP16		5
+#define	R_SPARC_DISP32		6
+#define	R_SPARC_WDISP30		7
+#define	R_SPARC_WDISP22		8
+#define	R_SPARC_HI22		9
+#define	R_SPARC_22		10
+#define	R_SPARC_13		11
+#define	R_SPARC_LO10		12
+#define	R_SPARC_GOT10		13
+#define	R_SPARC_GOT13		14
+#define	R_SPARC_GOT22		15
+#define	R_SPARC_PC10		16
+#define	R_SPARC_PC22		17
+#define	R_SPARC_WPLT30		18
+#define	R_SPARC_COPY		19
+#define	R_SPARC_GLOB_DAT	20
+#define	R_SPARC_JMP_SLOT	21
+#define	R_SPARC_RELATIVE	22
+#define	R_SPARC_UA32		23
+#define R_SPARC_PLT32		24
+#define R_SPARC_HIPLT22		25
+#define R_SPARC_LOPLT10		26
+#define R_SPARC_PCPLT32		27
+#define R_SPARC_PCPLT22		28
+#define R_SPARC_PCPLT10		29
+#define R_SPARC_10		30
+#define R_SPARC_11		31
+#define R_SPARC_64		32
+#define R_SPARC_OLO10		33
+#define R_SPARC_WDISP16		40
+#define R_SPARC_WDISP19		41
+#define R_SPARC_7		43
+#define R_SPARC_5		44
+#define R_SPARC_6		45
+
+/* Bits present in AT_HWCAP, primarily for Sparc32.  */
+
+#define HWCAP_SPARC_FLUSH       1    /* CPU supports flush instruction. */
+#define HWCAP_SPARC_STBAR       2
+#define HWCAP_SPARC_SWAP        4
+#define HWCAP_SPARC_MULDIV      8
+#define HWCAP_SPARC_V9		16
+#define HWCAP_SPARC_ULTRA3	32
+
+#define CORE_DUMP_USE_REGSET
+
+/* Format is:
+ * 	G0 --> G7
+ *	O0 --> O7
+ *	L0 --> L7
+ *	I0 --> I7
+ *	PSR, PC, nPC, Y, WIM, TBR
+ */
+typedef unsigned long elf_greg_t;
+#define ELF_NGREG 38
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct {
+	union {
+		unsigned long	pr_regs[32];
+		double		pr_dregs[16];
+	} pr_fr;
+	unsigned long __unused;
+	unsigned long	pr_fsr;
+	unsigned char	pr_qcnt;
+	unsigned char	pr_q_entrysize;
+	unsigned char	pr_en;
+	unsigned int	pr_q[64];
+} elf_fpregset_t;
+
+#include <asm/mbus.h>
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_SPARC)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_ARCH	EM_SPARC
+#define ELF_CLASS	ELFCLASS32
+#define ELF_DATA	ELFDATA2MSB
+
+#define ELF_EXEC_PAGESIZE	4096
+
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         (TASK_UNMAPPED_BASE)
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this cpu supports.  This can NOT be done in userspace
+   on Sparc.  */
+
+/* Most sun4m's have them all.  */
+#define ELF_HWCAP	(HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | \
+			 HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo. */
+
+#define ELF_PLATFORM	(NULL)
+
+#endif /* !(__ASMSPARC_ELF_H) */
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
new file mode 100644
index 0000000..7e078bc
--- /dev/null
+++ b/arch/sparc/include/asm/elf_64.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SPARC64_ELF_H
+#define __ASM_SPARC64_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/extable_64.h>
+#include <asm/spitfire.h>
+#include <asm/adi.h>
+
+/*
+ * Sparc section types
+ */
+#define STT_REGISTER		13
+
+/*
+ * Sparc ELF relocation types
+ */
+#define	R_SPARC_NONE		0
+#define	R_SPARC_8		1
+#define	R_SPARC_16		2
+#define	R_SPARC_32		3
+#define	R_SPARC_DISP8		4
+#define	R_SPARC_DISP16		5
+#define	R_SPARC_DISP32		6
+#define	R_SPARC_WDISP30		7
+#define	R_SPARC_WDISP22		8
+#define	R_SPARC_HI22		9
+#define	R_SPARC_22		10
+#define	R_SPARC_13		11
+#define	R_SPARC_LO10		12
+#define	R_SPARC_GOT10		13
+#define	R_SPARC_GOT13		14
+#define	R_SPARC_GOT22		15
+#define	R_SPARC_PC10		16
+#define	R_SPARC_PC22		17
+#define	R_SPARC_WPLT30		18
+#define	R_SPARC_COPY		19
+#define	R_SPARC_GLOB_DAT	20
+#define	R_SPARC_JMP_SLOT	21
+#define	R_SPARC_RELATIVE	22
+#define	R_SPARC_UA32		23
+#define R_SPARC_PLT32		24
+#define R_SPARC_HIPLT22		25
+#define R_SPARC_LOPLT10		26
+#define R_SPARC_PCPLT32		27
+#define R_SPARC_PCPLT22		28
+#define R_SPARC_PCPLT10		29
+#define R_SPARC_10		30
+#define R_SPARC_11		31
+#define R_SPARC_64		32
+#define R_SPARC_OLO10		33
+#define R_SPARC_WDISP16		40
+#define R_SPARC_WDISP19		41
+#define R_SPARC_7		43
+#define R_SPARC_5		44
+#define R_SPARC_6		45
+
+/* Bits present in AT_HWCAP, primarily for Sparc32.  */
+#define HWCAP_SPARC_FLUSH       0x00000001
+#define HWCAP_SPARC_STBAR       0x00000002
+#define HWCAP_SPARC_SWAP        0x00000004
+#define HWCAP_SPARC_MULDIV      0x00000008
+#define HWCAP_SPARC_V9		0x00000010
+#define HWCAP_SPARC_ULTRA3	0x00000020
+#define HWCAP_SPARC_BLKINIT	0x00000040
+#define HWCAP_SPARC_N2		0x00000080
+
+/* Solaris compatible AT_HWCAP bits. */
+#define AV_SPARC_MUL32		0x00000100 /* 32x32 multiply is efficient */
+#define AV_SPARC_DIV32		0x00000200 /* 32x32 divide is efficient */
+#define AV_SPARC_FSMULD		0x00000400 /* 'fsmuld' is efficient */
+#define AV_SPARC_V8PLUS		0x00000800 /* v9 insn available to 32bit */
+#define AV_SPARC_POPC		0x00001000 /* 'popc' is efficient */
+#define AV_SPARC_VIS		0x00002000 /* VIS insns available */
+#define AV_SPARC_VIS2		0x00004000 /* VIS2 insns available */
+#define AV_SPARC_ASI_BLK_INIT	0x00008000 /* block init ASIs available */
+#define AV_SPARC_FMAF		0x00010000 /* fused multiply-add */
+#define AV_SPARC_VIS3		0x00020000 /* VIS3 insns available */
+#define AV_SPARC_HPC		0x00040000 /* HPC insns available */
+#define AV_SPARC_RANDOM		0x00080000 /* 'random' insn available */
+#define AV_SPARC_TRANS		0x00100000 /* transaction insns available */
+#define AV_SPARC_FJFMAU		0x00200000 /* unfused multiply-add */
+#define AV_SPARC_IMA		0x00400000 /* integer multiply-add */
+#define AV_SPARC_ASI_CACHE_SPARING \
+				0x00800000 /* cache sparing ASIs available */
+#define AV_SPARC_PAUSE		0x01000000 /* PAUSE available */
+#define AV_SPARC_CBCOND		0x02000000 /* CBCOND insns available */
+
+/* Solaris decided to enumerate every single crypto instruction type
+ * in the AT_HWCAP bits.  This is wasteful, since if crypto is present,
+ * you still need to look in the CFR register to see if the opcode is
+ * really available.  So we simply advertise only "crypto" support.
+ */
+#define HWCAP_SPARC_CRYPTO	0x04000000 /* CRYPTO insns available */
+#define HWCAP_SPARC_ADI		0x08000000 /* ADI available */
+
+#define CORE_DUMP_USE_REGSET
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_ARCH		EM_SPARCV9
+#define ELF_CLASS		ELFCLASS64
+#define ELF_DATA		ELFDATA2MSB
+
+/* Format of 64-bit elf_gregset_t is:
+ * 	G0 --> G7
+ * 	O0 --> O7
+ * 	L0 --> L7
+ * 	I0 --> I7
+ *	TSTATE
+ *	TPC
+ *	TNPC
+ *	Y
+ */
+typedef unsigned long elf_greg_t;
+#define ELF_NGREG 36
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct {
+	unsigned long	pr_regs[32];
+	unsigned long	pr_fsr;
+	unsigned long	pr_gsr;
+	unsigned long	pr_fprs;
+} elf_fpregset_t;
+
+/* Format of 32-bit elf_gregset_t is:
+ * 	G0 --> G7
+ *	O0 --> O7
+ *	L0 --> L7
+ *	I0 --> I7
+ *	PSR, PC, nPC, Y, WIM, TBR
+ */
+typedef unsigned int compat_elf_greg_t;
+#define COMPAT_ELF_NGREG 38
+typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
+
+typedef struct {
+	union {
+		unsigned int	pr_regs[32];
+		unsigned long	pr_dregs[16];
+	} pr_fr;
+	unsigned int __unused;
+	unsigned int	pr_fsr;
+	unsigned char	pr_qcnt;
+	unsigned char	pr_q_entrysize;
+	unsigned char	pr_en;
+	unsigned int	pr_q[64];
+} compat_elf_fpregset_t;
+
+/* UltraSparc extensions.  Still unused, but will be eventually.  */
+typedef struct {
+	unsigned int pr_type;
+	unsigned int pr_align;
+	union {
+		struct {
+			union {
+				unsigned int	pr_regs[32];
+				unsigned long	pr_dregs[16];
+				long double	pr_qregs[8];
+			} pr_xfr;
+		} pr_v8p;
+		unsigned int	pr_xfsr;
+		unsigned int	pr_fprs;
+		unsigned int	pr_xg[8];
+		unsigned int	pr_xo[8];
+		unsigned long	pr_tstate;
+		unsigned int	pr_filler[8];
+	} pr_un;
+} elf_xregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x)		((x)->e_machine == ELF_ARCH)
+#define compat_elf_check_arch(x)	((x)->e_machine == EM_SPARC || \
+					 (x)->e_machine == EM_SPARC32PLUS)
+#define compat_start_thread		start_thread32
+
+#define ELF_EXEC_PAGESIZE	PAGE_SIZE
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE		0x0000010000000000UL
+#define COMPAT_ELF_ET_DYN_BASE	0x0000000070000000UL
+
+extern unsigned long sparc64_elf_hwcap;
+#define ELF_HWCAP	sparc64_elf_hwcap
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM	(NULL)
+
+#define SET_PERSONALITY(ex)				\
+do {	if ((ex).e_ident[EI_CLASS] == ELFCLASS32)	\
+		set_thread_flag(TIF_32BIT);		\
+	else						\
+		clear_thread_flag(TIF_32BIT);		\
+	/* flush_thread will update pgd cache */	\
+	if (personality(current->personality) != PER_LINUX32)	\
+		set_personality(PER_LINUX |		\
+			(current->personality & (~PER_MASK)));	\
+} while (0)
+
+extern unsigned int vdso_enabled;
+
+#define	ARCH_DLINFO							\
+do {									\
+	extern struct adi_config adi_state;				\
+	if (vdso_enabled)						\
+		NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
+			    (unsigned long)current->mm->context.vdso);	\
+	NEW_AUX_ENT(AT_ADI_BLKSZ, adi_state.caps.blksz);		\
+	NEW_AUX_ENT(AT_ADI_NBITS, adi_state.caps.nbits);		\
+	NEW_AUX_ENT(AT_ADI_UEONADI, adi_state.caps.ue_on_adi);		\
+} while (0)
+
+struct linux_binprm;
+
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES	1
+extern int arch_setup_additional_pages(struct linux_binprm *bprm,
+					int uses_interp);
+#endif /* !(__ASM_SPARC64_ELF_H) */
diff --git a/arch/sparc/include/asm/estate.h b/arch/sparc/include/asm/estate.h
new file mode 100644
index 0000000..e5e0f84
--- /dev/null
+++ b/arch/sparc/include/asm/estate.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_ESTATE_H
+#define _SPARC64_ESTATE_H
+
+/* UltraSPARC-III E-cache Error Enable */
+#define ESTATE_ERROR_FMT	0x0000000000040000 /* Force MTAG ECC		*/
+#define ESTATE_ERROR_FMESS	0x000000000003c000 /* Forced MTAG ECC val	*/
+#define ESTATE_ERROR_FMD	0x0000000000002000 /* Force DATA ECC		*/
+#define ESTATE_ERROR_FDECC	0x0000000000001ff0 /* Forced DATA ECC val	*/
+#define ESTATE_ERROR_UCEEN	0x0000000000000008 /* See below			*/
+#define ESTATE_ERROR_NCEEN	0x0000000000000002 /* See below			*/
+#define ESTATE_ERROR_CEEN	0x0000000000000001 /* See below			*/
+
+/* UCEEN enables the fast_ECC_error trap for: 1) software correctable E-cache
+ * errors 2) uncorrectable E-cache errors.  Such events only occur on reads
+ * of the E-cache by the local processor for: 1) data loads 2) instruction
+ * fetches 3) atomic operations.  Such events _cannot_ occur for: 1) merge
+ * 2) writeback 2) copyout.  The AFSR bits associated with these traps are
+ * UCC and UCU.
+ */
+
+/* NCEEN enables instruction_access_error, data_access_error, and ECC_error traps
+ * for uncorrectable ECC errors and system errors.
+ *
+ * Uncorrectable system bus data error or MTAG ECC error, system bus TimeOUT,
+ * or system bus BusERR:
+ * 1) As the result of an instruction fetch, will generate instruction_access_error
+ * 2) As the result of a load etc. will generate data_access_error.
+ * 3) As the result of store merge completion, writeback, or copyout will
+ *    generate a disrupting ECC_error trap.
+ * 4) As the result of such errors on instruction vector fetch can generate any
+ *    of the 3 trap types.
+ *
+ * The AFSR bits associated with these traps are EMU, EDU, WDU, CPU, IVU, UE,
+ * BERR, and TO.
+ */
+
+/* CEEN enables the ECC_error trap for hardware corrected ECC errors.  System bus
+ * reads resulting in a hardware corrected data or MTAG ECC error will generate an
+ * ECC_error disrupting trap with this bit enabled.
+ *
+ * This same trap will also be generated when a hardware corrected ECC error results
+ * during store merge, writeback, and copyout operations.
+ */
+
+/* In general, if the trap enable bits above are disabled the AFSR bits will still
+ * log the events even though the trap will not be generated by the processor.
+ */
+
+#endif /* _SPARC64_ESTATE_H */
diff --git a/arch/sparc/include/asm/extable_64.h b/arch/sparc/include/asm/extable_64.h
new file mode 100644
index 0000000..5a01719
--- /dev/null
+++ b/arch/sparc/include/asm/extable_64.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_EXTABLE64_H
+#define __ASM_EXTABLE64_H
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+        unsigned int insn, fixup;
+};
+
+#endif
diff --git a/arch/sparc/include/asm/fb.h b/arch/sparc/include/asm/fb.h
new file mode 100644
index 0000000..f699962
--- /dev/null
+++ b/arch/sparc/include/asm/fb.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_FB_H_
+#define _SPARC_FB_H_
+#include <linux/console.h>
+#include <linux/fb.h>
+#include <linux/fs.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+
+static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
+				unsigned long off)
+{
+#ifdef CONFIG_SPARC64
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#endif
+}
+
+static inline int fb_is_primary_device(struct fb_info *info)
+{
+	struct device *dev = info->device;
+	struct device_node *node;
+
+	if (console_set_on_cmdline)
+		return 0;
+
+	node = dev->of_node;
+	if (node &&
+	    node == of_console_device)
+		return 1;
+
+	return 0;
+}
+
+#endif /* _SPARC_FB_H_ */
diff --git a/arch/sparc/include/asm/fbio.h b/arch/sparc/include/asm/fbio.h
new file mode 100644
index 0000000..02654cb
--- /dev/null
+++ b/arch/sparc/include/asm/fbio.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_FBIO_H
+#define __LINUX_FBIO_H
+
+#include <uapi/asm/fbio.h>
+
+#define FBIOPUTCMAP_SPARC _IOW('F', 3, struct fbcmap)
+#define FBIOGETCMAP_SPARC _IOW('F', 4, struct fbcmap)
+/* Addresses on the fd of a cgsix that are mappable */
+#define CG6_FBC    0x70000000
+#define CG6_TEC    0x70001000
+#define CG6_BTREGS 0x70002000
+#define CG6_FHC    0x70004000
+#define CG6_THC    0x70005000
+#define CG6_ROM    0x70006000
+#define CG6_RAM    0x70016000
+#define CG6_DHC    0x80000000
+
+#define CG3_MMAP_OFFSET 0x4000000
+
+/* Addresses on the fd of a tcx that are mappable */
+#define TCX_RAM8BIT   		0x00000000
+#define TCX_RAM24BIT   		0x01000000
+#define TCX_UNK3   		0x10000000
+#define TCX_UNK4   		0x20000000
+#define TCX_CONTROLPLANE   	0x28000000
+#define TCX_UNK6   		0x30000000
+#define TCX_UNK7   		0x38000000
+#define TCX_TEC    		0x70000000
+#define TCX_BTREGS 		0x70002000
+#define TCX_THC    		0x70004000
+#define TCX_DHC    		0x70008000
+#define TCX_ALT	   		0x7000a000
+#define TCX_SYNC   		0x7000e000
+#define TCX_UNK2    		0x70010000
+
+/* CG14 definitions */
+
+/* Offsets into the OBIO space: */
+#define CG14_REGS        0       /* registers */
+#define CG14_CURSORREGS  0x1000  /* cursor registers */
+#define CG14_DACREGS     0x2000  /* DAC registers */
+#define CG14_XLUT        0x3000  /* X Look Up Table -- ??? */
+#define CG14_CLUT1       0x4000  /* Color Look Up Table */
+#define CG14_CLUT2       0x5000  /* Color Look Up Table */
+#define CG14_CLUT3       0x6000  /* Color Look Up Table */
+#define CG14_AUTO	 0xf000
+
+struct  fbcmap32 {
+	int             index;          /* first element (0 origin) */
+	int             count;
+	u32		red;
+	u32		green;
+	u32		blue;
+};
+
+#define FBIOPUTCMAP32	_IOW('F', 3, struct fbcmap32)
+#define FBIOGETCMAP32	_IOW('F', 4, struct fbcmap32)
+
+struct fbcursor32 {
+	short set;		/* what to set, choose from the list above */
+	short enable;		/* cursor on/off */
+	struct fbcurpos pos;	/* cursor position */
+	struct fbcurpos hot;	/* cursor hot spot */
+	struct fbcmap32 cmap;	/* color map info */
+	struct fbcurpos size;	/* cursor bit map size */
+	u32	image;		/* cursor image bits */
+	u32	mask;		/* cursor mask bits */
+};
+
+#define FBIOSCURSOR32	_IOW('F', 24, struct fbcursor32)
+#define FBIOGCURSOR32	_IOW('F', 25, struct fbcursor32)
+#endif /* __LINUX_FBIO_H */
diff --git a/arch/sparc/include/asm/fhc.h b/arch/sparc/include/asm/fhc.h
new file mode 100644
index 0000000..0627fa1
--- /dev/null
+++ b/arch/sparc/include/asm/fhc.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* fhc.h: FHC and Clock board register definitions.
+ *
+ * Copyright (C) 1997, 1999 David S. Miller (davem@redhat.com)
+ */
+
+#ifndef _SPARC64_FHC_H
+#define _SPARC64_FHC_H
+
+/* Clock board register offsets. */
+#define CLOCK_CTRL	0x00UL	/* Main control */
+#define CLOCK_STAT1	0x10UL	/* Status one */
+#define CLOCK_STAT2	0x20UL	/* Status two */
+#define CLOCK_PWRSTAT	0x30UL	/* Power status */
+#define CLOCK_PWRPRES	0x40UL	/* Power presence */
+#define CLOCK_TEMP	0x50UL	/* Temperature */
+#define CLOCK_IRQDIAG	0x60UL	/* IRQ diagnostics */
+#define CLOCK_PWRSTAT2	0x70UL	/* Power status two */
+
+#define CLOCK_CTRL_LLED		0x04	/* Left LED, 0 == on */
+#define CLOCK_CTRL_MLED		0x02	/* Mid LED, 1 == on */
+#define CLOCK_CTRL_RLED		0x01	/* RIght LED, 1 == on */
+
+/* Firehose controller register offsets */
+#define FHC_PREGS_ID	0x00UL	/* FHC ID */
+#define  FHC_ID_VERS		0xf0000000 /* Version of this FHC		*/
+#define  FHC_ID_PARTID		0x0ffff000 /* Part ID code (0x0f9f == FHC)	*/
+#define  FHC_ID_MANUF		0x0000007e /* Manufacturer (0x3e == SUN's JEDEC)*/
+#define  FHC_ID_RESV		0x00000001 /* Read as one			*/
+#define FHC_PREGS_RCS	0x10UL	/* FHC Reset Control/Status Register */
+#define  FHC_RCS_POR		0x80000000 /* Last reset was a power cycle	*/
+#define  FHC_RCS_SPOR		0x40000000 /* Last reset was sw power on reset	*/
+#define  FHC_RCS_SXIR		0x20000000 /* Last reset was sw XIR reset	*/
+#define  FHC_RCS_BPOR		0x10000000 /* Last reset was due to POR button	*/
+#define  FHC_RCS_BXIR		0x08000000 /* Last reset was due to XIR button	*/
+#define  FHC_RCS_WEVENT		0x04000000 /* CPU reset was due to wakeup event	*/
+#define  FHC_RCS_CFATAL		0x02000000 /* Centerplane Fatal Error signalled	*/
+#define  FHC_RCS_FENAB		0x01000000 /* Fatal errors elicit system reset	*/
+#define FHC_PREGS_CTRL	0x20UL	/* FHC Control Register */
+#define  FHC_CONTROL_ICS	0x00100000 /* Ignore Centerplane Signals	*/
+#define  FHC_CONTROL_FRST	0x00080000 /* Fatal Error Reset Enable		*/
+#define  FHC_CONTROL_LFAT	0x00040000 /* AC/DC signalled a local error	*/
+#define  FHC_CONTROL_SLINE	0x00010000 /* Firmware Synchronization Line	*/
+#define  FHC_CONTROL_DCD	0x00008000 /* DC-->DC Converter Disable		*/
+#define  FHC_CONTROL_POFF	0x00004000 /* AC/DC Controller PLL Disable	*/
+#define  FHC_CONTROL_FOFF	0x00002000 /* FHC Controller PLL Disable	*/
+#define  FHC_CONTROL_AOFF	0x00001000 /* CPU A SRAM/SBD Low Power Mode	*/
+#define  FHC_CONTROL_BOFF	0x00000800 /* CPU B SRAM/SBD Low Power Mode	*/
+#define  FHC_CONTROL_PSOFF	0x00000400 /* Turns off this FHC's power supply	*/
+#define  FHC_CONTROL_IXIST	0x00000200 /* 0=FHC tells clock board it exists	*/
+#define  FHC_CONTROL_XMSTR	0x00000100 /* 1=Causes this FHC to be XIR master*/
+#define  FHC_CONTROL_LLED	0x00000040 /* 0=Left LED ON			*/
+#define  FHC_CONTROL_MLED	0x00000020 /* 1=Middle LED ON			*/
+#define  FHC_CONTROL_RLED	0x00000010 /* 1=Right LED			*/
+#define  FHC_CONTROL_BPINS	0x00000003 /* Spare Bidirectional Pins		*/
+#define FHC_PREGS_BSR	0x30UL	/* FHC Board Status Register */
+#define  FHC_BSR_DA64		0x00040000 /* Port A: 0=128bit 1=64bit data path */
+#define  FHC_BSR_DB64		0x00020000 /* Port B: 0=128bit 1=64bit data path */
+#define  FHC_BSR_BID		0x0001e000 /* Board ID                           */
+#define  FHC_BSR_SA		0x00001c00 /* Port A UPA Speed (from the pins)   */
+#define  FHC_BSR_SB		0x00000380 /* Port B UPA Speed (from the pins)   */
+#define  FHC_BSR_NDIAG		0x00000040 /* Not in Diag Mode                   */
+#define  FHC_BSR_NTBED		0x00000020 /* Not in TestBED Mode                */
+#define  FHC_BSR_NIA		0x0000001c /* Jumper, bit 18 in PROM space       */
+#define  FHC_BSR_SI		0x00000001 /* Spare input pin value              */
+#define FHC_PREGS_ECC	0x40UL	/* FHC ECC Control Register (16 bits) */
+#define FHC_PREGS_JCTRL	0xf0UL	/* FHC JTAG Control Register */
+#define  FHC_JTAG_CTRL_MENAB	0x80000000 /* Indicates this is JTAG Master	 */
+#define  FHC_JTAG_CTRL_MNONE	0x40000000 /* Indicates no JTAG Master present	 */
+#define FHC_PREGS_JCMD	0x100UL	/* FHC JTAG Command Register */
+#define FHC_IREG_IGN	0x00UL	/* This FHC's IGN */
+#define FHC_FFREGS_IMAP	0x00UL	/* FHC Fanfail IMAP */
+#define FHC_FFREGS_ICLR	0x10UL	/* FHC Fanfail ICLR */
+#define FHC_SREGS_IMAP	0x00UL	/* FHC System IMAP */
+#define FHC_SREGS_ICLR	0x10UL	/* FHC System ICLR */
+#define FHC_UREGS_IMAP	0x00UL	/* FHC Uart IMAP */
+#define FHC_UREGS_ICLR	0x10UL	/* FHC Uart ICLR */
+#define FHC_TREGS_IMAP	0x00UL	/* FHC TOD IMAP */
+#define FHC_TREGS_ICLR	0x10UL	/* FHC TOD ICLR */
+
+#endif /* !(_SPARC64_FHC_H) */
diff --git a/arch/sparc/include/asm/floppy.h b/arch/sparc/include/asm/floppy.h
new file mode 100644
index 0000000..4b31580
--- /dev/null
+++ b/arch/sparc/include/asm/floppy.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_FLOPPY_H
+#define ___ASM_SPARC_FLOPPY_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/floppy_64.h>
+#else
+#include <asm/floppy_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h
new file mode 100644
index 0000000..b519acf
--- /dev/null
+++ b/arch/sparc/include/asm/floppy_32.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* asm/floppy.h: Sparc specific parts of the Floppy driver.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef __ASM_SPARC_FLOPPY_H
+#define __ASM_SPARC_FLOPPY_H
+
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/pgtable.h>
+#include <asm/idprom.h>
+#include <asm/oplib.h>
+#include <asm/auxio.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/irq.h>
+
+/* We don't need no stinkin' I/O port allocation crap. */
+#undef release_region
+#undef request_region
+#define release_region(X, Y)	do { } while(0)
+#define request_region(X, Y, Z)	(1)
+
+/* References:
+ * 1) Netbsd Sun floppy driver.
+ * 2) NCR 82077 controller manual
+ * 3) Intel 82077 controller manual
+ */
+struct sun_flpy_controller {
+	volatile unsigned char status_82072;  /* Main Status reg. */
+#define dcr_82072              status_82072   /* Digital Control reg. */
+#define status1_82077          status_82072   /* Auxiliary Status reg. 1 */
+
+	volatile unsigned char data_82072;    /* Data fifo. */
+#define status2_82077          data_82072     /* Auxiliary Status reg. 2 */
+
+	volatile unsigned char dor_82077;     /* Digital Output reg. */
+	volatile unsigned char tapectl_82077; /* What the? Tape control reg? */
+
+	volatile unsigned char status_82077;  /* Main Status Register. */
+#define drs_82077              status_82077   /* Digital Rate Select reg. */
+
+	volatile unsigned char data_82077;    /* Data fifo. */
+	volatile unsigned char ___unused;
+	volatile unsigned char dir_82077;     /* Digital Input reg. */
+#define dcr_82077              dir_82077      /* Config Control reg. */
+};
+
+/* You'll only ever find one controller on a SparcStation anyways. */
+static struct sun_flpy_controller *sun_fdc = NULL;
+
+struct sun_floppy_ops {
+	unsigned char (*fd_inb)(int port);
+	void (*fd_outb)(unsigned char value, int port);
+};
+
+static struct sun_floppy_ops sun_fdops;
+
+#define fd_inb(port)              sun_fdops.fd_inb(port)
+#define fd_outb(value,port)       sun_fdops.fd_outb(value,port)
+#define fd_enable_dma()           sun_fd_enable_dma()
+#define fd_disable_dma()          sun_fd_disable_dma()
+#define fd_request_dma()          (0) /* nothing... */
+#define fd_free_dma()             /* nothing... */
+#define fd_clear_dma_ff()         /* nothing... */
+#define fd_set_dma_mode(mode)     sun_fd_set_dma_mode(mode)
+#define fd_set_dma_addr(addr)     sun_fd_set_dma_addr(addr)
+#define fd_set_dma_count(count)   sun_fd_set_dma_count(count)
+#define fd_enable_irq()           /* nothing... */
+#define fd_disable_irq()          /* nothing... */
+#define fd_request_irq()          sun_fd_request_irq()
+#define fd_free_irq()             /* nothing... */
+#if 0  /* P3: added by Alain, these cause a MMU corruption. 19960524 XXX */
+#define fd_dma_mem_alloc(size)    ((unsigned long) vmalloc(size))
+#define fd_dma_mem_free(addr,size) (vfree((void *)(addr)))
+#endif
+
+/* XXX This isn't really correct. XXX */
+#define get_dma_residue(x)        (0)
+
+#define FLOPPY0_TYPE  4
+#define FLOPPY1_TYPE  0
+
+/* Super paranoid... */
+#undef HAVE_DISABLE_HLT
+
+/* Here is where we catch the floppy driver trying to initialize,
+ * therefore this is where we call the PROM device tree probing
+ * routine etc. on the Sparc.
+ */
+#define FDC1                      sun_floppy_init()
+
+#define N_FDC    1
+#define N_DRIVE  8
+
+/* No 64k boundary crossing problems on the Sparc. */
+#define CROSS_64KB(a,s) (0)
+
+/* Routines unique to each controller type on a Sun. */
+static void sun_set_dor(unsigned char value, int fdc_82077)
+{
+	if (fdc_82077)
+		sun_fdc->dor_82077 = value;
+}
+
+static unsigned char sun_read_dir(void)
+{
+	return sun_fdc->dir_82077;
+}
+
+static unsigned char sun_82072_fd_inb(int port)
+{
+	udelay(5);
+	switch(port & 7) {
+	default:
+		printk("floppy: Asked to read unknown port %d\n", port);
+		panic("floppy: Port bolixed.");
+	case 4: /* FD_STATUS */
+		return sun_fdc->status_82072 & ~STATUS_DMA;
+	case 5: /* FD_DATA */
+		return sun_fdc->data_82072;
+	case 7: /* FD_DIR */
+		return sun_read_dir();
+	}
+	panic("sun_82072_fd_inb: How did I get here?");
+}
+
+static void sun_82072_fd_outb(unsigned char value, int port)
+{
+	udelay(5);
+	switch(port & 7) {
+	default:
+		printk("floppy: Asked to write to unknown port %d\n", port);
+		panic("floppy: Port bolixed.");
+	case 2: /* FD_DOR */
+		sun_set_dor(value, 0);
+		break;
+	case 5: /* FD_DATA */
+		sun_fdc->data_82072 = value;
+		break;
+	case 7: /* FD_DCR */
+		sun_fdc->dcr_82072 = value;
+		break;
+	case 4: /* FD_STATUS */
+		sun_fdc->status_82072 = value;
+		break;
+	}
+	return;
+}
+
+static unsigned char sun_82077_fd_inb(int port)
+{
+	udelay(5);
+	switch(port & 7) {
+	default:
+		printk("floppy: Asked to read unknown port %d\n", port);
+		panic("floppy: Port bolixed.");
+	case 0: /* FD_STATUS_0 */
+		return sun_fdc->status1_82077;
+	case 1: /* FD_STATUS_1 */
+		return sun_fdc->status2_82077;
+	case 2: /* FD_DOR */
+		return sun_fdc->dor_82077;
+	case 3: /* FD_TDR */
+		return sun_fdc->tapectl_82077;
+	case 4: /* FD_STATUS */
+		return sun_fdc->status_82077 & ~STATUS_DMA;
+	case 5: /* FD_DATA */
+		return sun_fdc->data_82077;
+	case 7: /* FD_DIR */
+		return sun_read_dir();
+	}
+	panic("sun_82077_fd_inb: How did I get here?");
+}
+
+static void sun_82077_fd_outb(unsigned char value, int port)
+{
+	udelay(5);
+	switch(port & 7) {
+	default:
+		printk("floppy: Asked to write to unknown port %d\n", port);
+		panic("floppy: Port bolixed.");
+	case 2: /* FD_DOR */
+		sun_set_dor(value, 1);
+		break;
+	case 5: /* FD_DATA */
+		sun_fdc->data_82077 = value;
+		break;
+	case 7: /* FD_DCR */
+		sun_fdc->dcr_82077 = value;
+		break;
+	case 4: /* FD_STATUS */
+		sun_fdc->status_82077 = value;
+		break;
+	case 3: /* FD_TDR */
+		sun_fdc->tapectl_82077 = value;
+		break;
+	}
+	return;
+}
+
+/* For pseudo-dma (Sun floppy drives have no real DMA available to
+ * them so we must eat the data fifo bytes directly ourselves) we have
+ * three state variables.  doing_pdma tells our inline low-level
+ * assembly floppy interrupt entry point whether it should sit and eat
+ * bytes from the fifo or just transfer control up to the higher level
+ * floppy interrupt c-code.  I tried very hard but I could not get the
+ * pseudo-dma to work in c-code without getting many overruns and
+ * underruns.  If non-zero, doing_pdma encodes the direction of
+ * the transfer for debugging.  1=read 2=write
+ */
+
+/* Common routines to all controller types on the Sparc. */
+static inline void virtual_dma_init(void)
+{
+	/* nothing... */
+}
+
+static inline void sun_fd_disable_dma(void)
+{
+	doing_pdma = 0;
+	pdma_base = NULL;
+}
+
+static inline void sun_fd_set_dma_mode(int mode)
+{
+	switch(mode) {
+	case DMA_MODE_READ:
+		doing_pdma = 1;
+		break;
+	case DMA_MODE_WRITE:
+		doing_pdma = 2;
+		break;
+	default:
+		printk("Unknown dma mode %d\n", mode);
+		panic("floppy: Giving up...");
+	}
+}
+
+static inline void sun_fd_set_dma_addr(char *buffer)
+{
+	pdma_vaddr = buffer;
+}
+
+static inline void sun_fd_set_dma_count(int length)
+{
+	pdma_size = length;
+}
+
+static inline void sun_fd_enable_dma(void)
+{
+	pdma_base = pdma_vaddr;
+	pdma_areasize = pdma_size;
+}
+
+int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler);
+
+static int sun_fd_request_irq(void)
+{
+	static int once = 0;
+
+	if (!once) {
+		once = 1;
+		return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt);
+	} else {
+		return 0;
+	}
+}
+
+static struct linux_prom_registers fd_regs[2];
+
+static int sun_floppy_init(void)
+{
+	struct platform_device *op;
+	struct device_node *dp;
+	struct resource r;
+	char state[128];
+	phandle fd_node;
+	phandle tnode;
+	int num_regs;
+
+	use_virtual_dma = 1;
+
+	/* Forget it if we aren't on a machine that could possibly
+	 * ever have a floppy drive.
+	 */
+	if (sparc_cpu_model != sun4m) {
+		/* We certainly don't have a floppy controller. */
+		goto no_sun_fdc;
+	}
+	/* Well, try to find one. */
+	tnode = prom_getchild(prom_root_node);
+	fd_node = prom_searchsiblings(tnode, "obio");
+	if (fd_node != 0) {
+		tnode = prom_getchild(fd_node);
+		fd_node = prom_searchsiblings(tnode, "SUNW,fdtwo");
+	} else {
+		fd_node = prom_searchsiblings(tnode, "fd");
+	}
+	if (fd_node == 0) {
+		goto no_sun_fdc;
+	}
+
+	/* The sun4m lets us know if the controller is actually usable. */
+	if (prom_getproperty(fd_node, "status", state, sizeof(state)) != -1) {
+		if(!strcmp(state, "disabled")) {
+			goto no_sun_fdc;
+		}
+	}
+	num_regs = prom_getproperty(fd_node, "reg", (char *) fd_regs, sizeof(fd_regs));
+	num_regs = (num_regs / sizeof(fd_regs[0]));
+	prom_apply_obio_ranges(fd_regs, num_regs);
+	memset(&r, 0, sizeof(r));
+	r.flags = fd_regs[0].which_io;
+	r.start = fd_regs[0].phys_addr;
+	sun_fdc = of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy");
+
+	/* Look up irq in platform_device.
+	 * We try "SUNW,fdtwo" and "fd"
+	 */
+	op = NULL;
+	for_each_node_by_name(dp, "SUNW,fdtwo") {
+		op = of_find_device_by_node(dp);
+		if (op)
+			break;
+	}
+	if (!op) {
+		for_each_node_by_name(dp, "fd") {
+			op = of_find_device_by_node(dp);
+			if (op)
+				break;
+		}
+	}
+	if (!op)
+		goto no_sun_fdc;
+
+	FLOPPY_IRQ = op->archdata.irqs[0];
+
+	/* Last minute sanity check... */
+	if (sun_fdc->status_82072 == 0xff) {
+		sun_fdc = NULL;
+		goto no_sun_fdc;
+	}
+
+	sun_fdops.fd_inb = sun_82077_fd_inb;
+	sun_fdops.fd_outb = sun_82077_fd_outb;
+	fdc_status = &sun_fdc->status_82077;
+
+	if (sun_fdc->dor_82077 == 0x80) {
+		sun_fdc->dor_82077 = 0x02;
+		if (sun_fdc->dor_82077 == 0x80) {
+			sun_fdops.fd_inb = sun_82072_fd_inb;
+			sun_fdops.fd_outb = sun_82072_fd_outb;
+			fdc_status = &sun_fdc->status_82072;
+		}
+	}
+
+	/* Success... */
+	allowed_drive_mask = 0x01;
+	return (int) sun_fdc;
+
+no_sun_fdc:
+	return -1;
+}
+
+static int sparc_eject(void)
+{
+	set_dor(0x00, 0xff, 0x90);
+	udelay(500);
+	set_dor(0x00, 0x6f, 0x00);
+	udelay(500);
+	return 0;
+}
+
+#define fd_eject(drive) sparc_eject()
+
+#define EXTRA_FLOPPY_PARAMS
+
+static DEFINE_SPINLOCK(dma_spin_lock);
+
+#define claim_dma_lock() \
+({	unsigned long flags; \
+	spin_lock_irqsave(&dma_spin_lock, flags); \
+	flags; \
+})
+
+#define release_dma_lock(__flags) \
+	spin_unlock_irqrestore(&dma_spin_lock, __flags);
+
+#endif /* !(__ASM_SPARC_FLOPPY_H) */
diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h
new file mode 100644
index 0000000..2a050ea
--- /dev/null
+++ b/arch/sparc/include/asm/floppy_64.h
@@ -0,0 +1,774 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* floppy.h: Sparc specific parts of the Floppy driver.
+ *
+ * Copyright (C) 1996, 2007, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *
+ * Ultra/PCI support added: Sep 1997  Eddie C. Dost  (ecd@skynet.be)
+ */
+
+#ifndef __ASM_SPARC64_FLOPPY_H
+#define __ASM_SPARC64_FLOPPY_H
+
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/auxio.h>
+
+/*
+ * Define this to enable exchanging drive 0 and 1 if only drive 1 is
+ * probed on PCI machines.
+ */
+#undef PCI_FDC_SWAP_DRIVES
+
+
+/* References:
+ * 1) Netbsd Sun floppy driver.
+ * 2) NCR 82077 controller manual
+ * 3) Intel 82077 controller manual
+ */
+struct sun_flpy_controller {
+	volatile unsigned char status1_82077; /* Auxiliary Status reg. 1 */
+	volatile unsigned char status2_82077; /* Auxiliary Status reg. 2 */
+	volatile unsigned char dor_82077;     /* Digital Output reg. */
+	volatile unsigned char tapectl_82077; /* Tape Control reg */
+	volatile unsigned char status_82077;  /* Main Status Register. */
+#define drs_82077              status_82077   /* Digital Rate Select reg. */
+	volatile unsigned char data_82077;    /* Data fifo. */
+	volatile unsigned char ___unused;
+	volatile unsigned char dir_82077;     /* Digital Input reg. */
+#define dcr_82077              dir_82077      /* Config Control reg. */
+};
+
+/* You'll only ever find one controller on an Ultra anyways. */
+static struct sun_flpy_controller *sun_fdc = (struct sun_flpy_controller *)-1;
+unsigned long fdc_status;
+static struct platform_device *floppy_op = NULL;
+
+struct sun_floppy_ops {
+	unsigned char	(*fd_inb) (unsigned long port);
+	void		(*fd_outb) (unsigned char value, unsigned long port);
+	void		(*fd_enable_dma) (void);
+	void		(*fd_disable_dma) (void);
+	void		(*fd_set_dma_mode) (int);
+	void		(*fd_set_dma_addr) (char *);
+	void		(*fd_set_dma_count) (int);
+	unsigned int	(*get_dma_residue) (void);
+	int		(*fd_request_irq) (void);
+	void		(*fd_free_irq) (void);
+	int		(*fd_eject) (int);
+};
+
+static struct sun_floppy_ops sun_fdops;
+
+#define fd_inb(port)              sun_fdops.fd_inb(port)
+#define fd_outb(value,port)       sun_fdops.fd_outb(value,port)
+#define fd_enable_dma()           sun_fdops.fd_enable_dma()
+#define fd_disable_dma()          sun_fdops.fd_disable_dma()
+#define fd_request_dma()          (0) /* nothing... */
+#define fd_free_dma()             /* nothing... */
+#define fd_clear_dma_ff()         /* nothing... */
+#define fd_set_dma_mode(mode)     sun_fdops.fd_set_dma_mode(mode)
+#define fd_set_dma_addr(addr)     sun_fdops.fd_set_dma_addr(addr)
+#define fd_set_dma_count(count)   sun_fdops.fd_set_dma_count(count)
+#define get_dma_residue(x)        sun_fdops.get_dma_residue()
+#define fd_request_irq()          sun_fdops.fd_request_irq()
+#define fd_free_irq()             sun_fdops.fd_free_irq()
+#define fd_eject(drive)           sun_fdops.fd_eject(drive)
+
+/* Super paranoid... */
+#undef HAVE_DISABLE_HLT
+
+static int sun_floppy_types[2] = { 0, 0 };
+
+/* Here is where we catch the floppy driver trying to initialize,
+ * therefore this is where we call the PROM device tree probing
+ * routine etc. on the Sparc.
+ */
+#define FLOPPY0_TYPE		sun_floppy_init()
+#define FLOPPY1_TYPE		sun_floppy_types[1]
+
+#define FDC1			((unsigned long)sun_fdc)
+
+#define N_FDC    1
+#define N_DRIVE  8
+
+/* No 64k boundary crossing problems on the Sparc. */
+#define CROSS_64KB(a,s) (0)
+
+static unsigned char sun_82077_fd_inb(unsigned long port)
+{
+	udelay(5);
+	switch(port & 7) {
+	default:
+		printk("floppy: Asked to read unknown port %lx\n", port);
+		panic("floppy: Port bolixed.");
+	case 4: /* FD_STATUS */
+		return sbus_readb(&sun_fdc->status_82077) & ~STATUS_DMA;
+	case 5: /* FD_DATA */
+		return sbus_readb(&sun_fdc->data_82077);
+	case 7: /* FD_DIR */
+		/* XXX: Is DCL on 0x80 in sun4m? */
+		return sbus_readb(&sun_fdc->dir_82077);
+	}
+	panic("sun_82072_fd_inb: How did I get here?");
+}
+
+static void sun_82077_fd_outb(unsigned char value, unsigned long port)
+{
+	udelay(5);
+	switch(port & 7) {
+	default:
+		printk("floppy: Asked to write to unknown port %lx\n", port);
+		panic("floppy: Port bolixed.");
+	case 2: /* FD_DOR */
+		/* Happily, the 82077 has a real DOR register. */
+		sbus_writeb(value, &sun_fdc->dor_82077);
+		break;
+	case 5: /* FD_DATA */
+		sbus_writeb(value, &sun_fdc->data_82077);
+		break;
+	case 7: /* FD_DCR */
+		sbus_writeb(value, &sun_fdc->dcr_82077);
+		break;
+	case 4: /* FD_STATUS */
+		sbus_writeb(value, &sun_fdc->status_82077);
+		break;
+	}
+	return;
+}
+
+/* For pseudo-dma (Sun floppy drives have no real DMA available to
+ * them so we must eat the data fifo bytes directly ourselves) we have
+ * three state variables.  doing_pdma tells our inline low-level
+ * assembly floppy interrupt entry point whether it should sit and eat
+ * bytes from the fifo or just transfer control up to the higher level
+ * floppy interrupt c-code.  I tried very hard but I could not get the
+ * pseudo-dma to work in c-code without getting many overruns and
+ * underruns.  If non-zero, doing_pdma encodes the direction of
+ * the transfer for debugging.  1=read 2=write
+ */
+unsigned char *pdma_vaddr;
+unsigned long pdma_size;
+volatile int doing_pdma = 0;
+
+/* This is software state */
+char *pdma_base = NULL;
+unsigned long pdma_areasize;
+
+/* Common routines to all controller types on the Sparc. */
+static void sun_fd_disable_dma(void)
+{
+	doing_pdma = 0;
+	pdma_base = NULL;
+}
+
+static void sun_fd_set_dma_mode(int mode)
+{
+	switch(mode) {
+	case DMA_MODE_READ:
+		doing_pdma = 1;
+		break;
+	case DMA_MODE_WRITE:
+		doing_pdma = 2;
+		break;
+	default:
+		printk("Unknown dma mode %d\n", mode);
+		panic("floppy: Giving up...");
+	}
+}
+
+static void sun_fd_set_dma_addr(char *buffer)
+{
+	pdma_vaddr = buffer;
+}
+
+static void sun_fd_set_dma_count(int length)
+{
+	pdma_size = length;
+}
+
+static void sun_fd_enable_dma(void)
+{
+	pdma_base = pdma_vaddr;
+	pdma_areasize = pdma_size;
+}
+
+irqreturn_t sparc_floppy_irq(int irq, void *dev_cookie)
+{
+	if (likely(doing_pdma)) {
+		void __iomem *stat = (void __iomem *) fdc_status;
+		unsigned char *vaddr = pdma_vaddr;
+		unsigned long size = pdma_size;
+		u8 val;
+
+		while (size) {
+			val = readb(stat);
+			if (unlikely(!(val & 0x80))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				return IRQ_HANDLED;
+			}
+			if (unlikely(!(val & 0x20))) {
+				pdma_vaddr = vaddr;
+				pdma_size = size;
+				doing_pdma = 0;
+				goto main_interrupt;
+			}
+			if (val & 0x40) {
+				/* read */
+				*vaddr++ = readb(stat + 1);
+			} else {
+				unsigned char data = *vaddr++;
+
+				/* write */
+				writeb(data, stat + 1);
+			}
+			size--;
+		}
+
+		pdma_vaddr = vaddr;
+		pdma_size = size;
+
+		/* Send Terminal Count pulse to floppy controller. */
+		val = readb(auxio_register);
+		val |= AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
+		val &= ~AUXIO_AUX1_FTCNT;
+		writeb(val, auxio_register);
+
+		doing_pdma = 0;
+	}
+
+main_interrupt:
+	return floppy_interrupt(irq, dev_cookie);
+}
+
+static int sun_fd_request_irq(void)
+{
+	static int once = 0;
+	int error;
+
+	if(!once) {
+		once = 1;
+
+		error = request_irq(FLOPPY_IRQ, sparc_floppy_irq,
+				    0, "floppy", NULL);
+
+		return ((error == 0) ? 0 : -1);
+	}
+	return 0;
+}
+
+static void sun_fd_free_irq(void)
+{
+}
+
+static unsigned int sun_get_dma_residue(void)
+{
+	/* XXX This isn't really correct. XXX */
+	return 0;
+}
+
+static int sun_fd_eject(int drive)
+{
+	set_dor(0x00, 0xff, 0x90);
+	udelay(500);
+	set_dor(0x00, 0x6f, 0x00);
+	udelay(500);
+	return 0;
+}
+
+#include <asm/ebus_dma.h>
+#include <asm/ns87303.h>
+
+static struct ebus_dma_info sun_pci_fd_ebus_dma;
+static struct device *sun_floppy_dev;
+static int sun_pci_broken_drive = -1;
+
+struct sun_pci_dma_op {
+	unsigned int 	addr;
+	int		len;
+	int		direction;
+	char		*buf;
+};
+static struct sun_pci_dma_op sun_pci_dma_current = { -1U, 0, 0, NULL};
+static struct sun_pci_dma_op sun_pci_dma_pending = { -1U, 0, 0, NULL};
+
+irqreturn_t floppy_interrupt(int irq, void *dev_id);
+
+static unsigned char sun_pci_fd_inb(unsigned long port)
+{
+	udelay(5);
+	return inb(port);
+}
+
+static void sun_pci_fd_outb(unsigned char val, unsigned long port)
+{
+	udelay(5);
+	outb(val, port);
+}
+
+static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port)
+{
+	udelay(5);
+	/*
+	 * XXX: Due to SUN's broken floppy connector on AX and AXi
+	 *      we need to turn on MOTOR_0 also, if the floppy is
+	 *      jumpered to DS1 (like most PC floppies are). I hope
+	 *      this does not hurt correct hardware like the AXmp.
+	 *      (Eddie, Sep 12 1998).
+	 */
+	if (port == ((unsigned long)sun_fdc) + 2) {
+		if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x20)) {
+			val |= 0x10;
+		}
+	}
+	outb(val, port);
+}
+
+#ifdef PCI_FDC_SWAP_DRIVES
+static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port)
+{
+	udelay(5);
+	/*
+	 * XXX: Due to SUN's broken floppy connector on AX and AXi
+	 *      we need to turn on MOTOR_0 also, if the floppy is
+	 *      jumpered to DS1 (like most PC floppies are). I hope
+	 *      this does not hurt correct hardware like the AXmp.
+	 *      (Eddie, Sep 12 1998).
+	 */
+	if (port == ((unsigned long)sun_fdc) + 2) {
+		if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x10)) {
+			val &= ~(0x03);
+			val |= 0x21;
+		}
+	}
+	outb(val, port);
+}
+#endif /* PCI_FDC_SWAP_DRIVES */
+
+static void sun_pci_fd_enable_dma(void)
+{
+	BUG_ON((NULL == sun_pci_dma_pending.buf) 	||
+	    (0	  == sun_pci_dma_pending.len) 	||
+	    (0	  == sun_pci_dma_pending.direction));
+
+	sun_pci_dma_current.buf = sun_pci_dma_pending.buf;
+	sun_pci_dma_current.len = sun_pci_dma_pending.len;
+	sun_pci_dma_current.direction = sun_pci_dma_pending.direction;
+
+	sun_pci_dma_pending.buf  = NULL;
+	sun_pci_dma_pending.len  = 0;
+	sun_pci_dma_pending.direction = 0;
+	sun_pci_dma_pending.addr = -1U;
+
+	sun_pci_dma_current.addr =
+		dma_map_single(sun_floppy_dev,
+			       sun_pci_dma_current.buf,
+			       sun_pci_dma_current.len,
+			       sun_pci_dma_current.direction);
+
+	ebus_dma_enable(&sun_pci_fd_ebus_dma, 1);
+
+	if (ebus_dma_request(&sun_pci_fd_ebus_dma,
+			     sun_pci_dma_current.addr,
+			     sun_pci_dma_current.len))
+		BUG();
+}
+
+static void sun_pci_fd_disable_dma(void)
+{
+	ebus_dma_enable(&sun_pci_fd_ebus_dma, 0);
+	if (sun_pci_dma_current.addr != -1U)
+		dma_unmap_single(sun_floppy_dev,
+				 sun_pci_dma_current.addr,
+				 sun_pci_dma_current.len,
+				 sun_pci_dma_current.direction);
+	sun_pci_dma_current.addr = -1U;
+}
+
+static void sun_pci_fd_set_dma_mode(int mode)
+{
+	if (mode == DMA_MODE_WRITE)
+		sun_pci_dma_pending.direction = DMA_TO_DEVICE;
+	else
+		sun_pci_dma_pending.direction = DMA_FROM_DEVICE;
+
+	ebus_dma_prepare(&sun_pci_fd_ebus_dma, mode != DMA_MODE_WRITE);
+}
+
+static void sun_pci_fd_set_dma_count(int length)
+{
+	sun_pci_dma_pending.len = length;
+}
+
+static void sun_pci_fd_set_dma_addr(char *buffer)
+{
+	sun_pci_dma_pending.buf = buffer;
+}
+
+static unsigned int sun_pci_get_dma_residue(void)
+{
+	return ebus_dma_residue(&sun_pci_fd_ebus_dma);
+}
+
+static int sun_pci_fd_request_irq(void)
+{
+	return ebus_dma_irq_enable(&sun_pci_fd_ebus_dma, 1);
+}
+
+static void sun_pci_fd_free_irq(void)
+{
+	ebus_dma_irq_enable(&sun_pci_fd_ebus_dma, 0);
+}
+
+static int sun_pci_fd_eject(int drive)
+{
+	return -EINVAL;
+}
+
+void sun_pci_fd_dma_callback(struct ebus_dma_info *p, int event, void *cookie)
+{
+	floppy_interrupt(0, NULL);
+}
+
+/*
+ * Floppy probing, we'd like to use /dev/fd0 for a single Floppy on PCI,
+ * even if this is configured using DS1, thus looks like /dev/fd1 with
+ * the cabling used in Ultras.
+ */
+#define DOR	(port + 2)
+#define MSR	(port + 4)
+#define FIFO	(port + 5)
+
+static void sun_pci_fd_out_byte(unsigned long port, unsigned char val,
+			        unsigned long reg)
+{
+	unsigned char status;
+	int timeout = 1000;
+
+	while (!((status = inb(MSR)) & 0x80) && --timeout)
+		udelay(100);
+	outb(val, reg);
+}
+
+static unsigned char sun_pci_fd_sensei(unsigned long port)
+{
+	unsigned char result[2] = { 0x70, 0x00 };
+	unsigned char status;
+	int i = 0;
+
+	sun_pci_fd_out_byte(port, 0x08, FIFO);
+	do {
+		int timeout = 1000;
+
+		while (!((status = inb(MSR)) & 0x80) && --timeout)
+			udelay(100);
+
+		if (!timeout)
+			break;
+
+		if ((status & 0xf0) == 0xd0)
+			result[i++] = inb(FIFO);
+		else
+			break;
+	} while (i < 2);
+
+	return result[0];
+}
+
+static void sun_pci_fd_reset(unsigned long port)
+{
+	unsigned char mask = 0x00;
+	unsigned char status;
+	int timeout = 10000;
+
+	outb(0x80, MSR);
+	do {
+		status = sun_pci_fd_sensei(port);
+		if ((status & 0xc0) == 0xc0)
+			mask |= 1 << (status & 0x03);
+		else
+			udelay(100);
+	} while ((mask != 0x0f) && --timeout);
+}
+
+static int sun_pci_fd_test_drive(unsigned long port, int drive)
+{
+	unsigned char status, data;
+	int timeout = 1000;
+	int ready;
+
+	sun_pci_fd_reset(port);
+
+	data = (0x10 << drive) | 0x0c | drive;
+	sun_pci_fd_out_byte(port, data, DOR);
+
+	sun_pci_fd_out_byte(port, 0x07, FIFO);
+	sun_pci_fd_out_byte(port, drive & 0x03, FIFO);
+
+	do {
+		udelay(100);
+		status = sun_pci_fd_sensei(port);
+	} while (((status & 0xc0) == 0x80) && --timeout);
+
+	if (!timeout)
+		ready = 0;
+	else
+		ready = (status & 0x10) ? 0 : 1;
+
+	sun_pci_fd_reset(port);
+	return ready;
+}
+#undef FIFO
+#undef MSR
+#undef DOR
+
+static int __init ebus_fdthree_p(struct device_node *dp)
+{
+	if (!strcmp(dp->name, "fdthree"))
+		return 1;
+	if (!strcmp(dp->name, "floppy")) {
+		const char *compat;
+
+		compat = of_get_property(dp, "compatible", NULL);
+		if (compat && !strcmp(compat, "fdthree"))
+			return 1;
+	}
+	return 0;
+}
+
+static unsigned long __init sun_floppy_init(void)
+{
+	static int initialized = 0;
+	struct device_node *dp;
+	struct platform_device *op;
+	const char *prop;
+	char state[128];
+
+	if (initialized)
+		return sun_floppy_types[0];
+	initialized = 1;
+
+	op = NULL;
+
+	for_each_node_by_name(dp, "SUNW,fdtwo") {
+		if (strcmp(dp->parent->name, "sbus"))
+			continue;
+		op = of_find_device_by_node(dp);
+		if (op)
+			break;
+	}
+	if (op) {
+		floppy_op = op;
+		FLOPPY_IRQ = op->archdata.irqs[0];
+	} else {
+		struct device_node *ebus_dp;
+		void __iomem *auxio_reg;
+		const char *state_prop;
+		unsigned long config;
+
+		dp = NULL;
+		for_each_node_by_name(ebus_dp, "ebus") {
+			for (dp = ebus_dp->child; dp; dp = dp->sibling) {
+				if (ebus_fdthree_p(dp))
+					goto found_fdthree;
+			}
+		}
+	found_fdthree:
+		if (!dp)
+			return 0;
+
+		op = of_find_device_by_node(dp);
+		if (!op)
+			return 0;
+
+		state_prop = of_get_property(op->dev.of_node, "status", NULL);
+		if (state_prop && !strncmp(state_prop, "disabled", 8))
+			return 0;
+
+		FLOPPY_IRQ = op->archdata.irqs[0];
+
+		/* Make sure the high density bit is set, some systems
+		 * (most notably Ultra5/Ultra10) come up with it clear.
+		 */
+		auxio_reg = (void __iomem *) op->resource[2].start;
+		writel(readl(auxio_reg)|0x2, auxio_reg);
+
+		sun_floppy_dev = &op->dev;
+
+		spin_lock_init(&sun_pci_fd_ebus_dma.lock);
+
+		/* XXX ioremap */
+		sun_pci_fd_ebus_dma.regs = (void __iomem *)
+			op->resource[1].start;
+		if (!sun_pci_fd_ebus_dma.regs)
+			return 0;
+
+		sun_pci_fd_ebus_dma.flags = (EBUS_DMA_FLAG_USE_EBDMA_HANDLER |
+					     EBUS_DMA_FLAG_TCI_DISABLE);
+		sun_pci_fd_ebus_dma.callback = sun_pci_fd_dma_callback;
+		sun_pci_fd_ebus_dma.client_cookie = NULL;
+		sun_pci_fd_ebus_dma.irq = FLOPPY_IRQ;
+		strcpy(sun_pci_fd_ebus_dma.name, "floppy");
+		if (ebus_dma_register(&sun_pci_fd_ebus_dma))
+			return 0;
+
+		/* XXX ioremap */
+		sun_fdc = (struct sun_flpy_controller *) op->resource[0].start;
+
+		sun_fdops.fd_inb = sun_pci_fd_inb;
+		sun_fdops.fd_outb = sun_pci_fd_outb;
+
+		can_use_virtual_dma = use_virtual_dma = 0;
+		sun_fdops.fd_enable_dma = sun_pci_fd_enable_dma;
+		sun_fdops.fd_disable_dma = sun_pci_fd_disable_dma;
+		sun_fdops.fd_set_dma_mode = sun_pci_fd_set_dma_mode;
+		sun_fdops.fd_set_dma_addr = sun_pci_fd_set_dma_addr;
+		sun_fdops.fd_set_dma_count = sun_pci_fd_set_dma_count;
+		sun_fdops.get_dma_residue = sun_pci_get_dma_residue;
+
+		sun_fdops.fd_request_irq = sun_pci_fd_request_irq;
+		sun_fdops.fd_free_irq = sun_pci_fd_free_irq;
+
+		sun_fdops.fd_eject = sun_pci_fd_eject;
+
+		fdc_status = (unsigned long) &sun_fdc->status_82077;
+
+		/*
+		 * XXX: Find out on which machines this is really needed.
+		 */
+		if (1) {
+			sun_pci_broken_drive = 1;
+			sun_fdops.fd_outb = sun_pci_fd_broken_outb;
+		}
+
+		allowed_drive_mask = 0;
+		if (sun_pci_fd_test_drive((unsigned long)sun_fdc, 0))
+			sun_floppy_types[0] = 4;
+		if (sun_pci_fd_test_drive((unsigned long)sun_fdc, 1))
+			sun_floppy_types[1] = 4;
+
+		/*
+		 * Find NS87303 SuperIO config registers (through ecpp).
+		 */
+		config = 0;
+		for (dp = ebus_dp->child; dp; dp = dp->sibling) {
+			if (!strcmp(dp->name, "ecpp")) {
+				struct platform_device *ecpp_op;
+
+				ecpp_op = of_find_device_by_node(dp);
+				if (ecpp_op)
+					config = ecpp_op->resource[1].start;
+				goto config_done;
+			}
+		}
+	config_done:
+
+		/*
+		 * Sanity check, is this really the NS87303?
+		 */
+		switch (config & 0x3ff) {
+		case 0x02e:
+		case 0x15c:
+		case 0x26e:
+		case 0x398:
+			break;
+		default:
+			config = 0;
+		}
+
+		if (!config)
+			return sun_floppy_types[0];
+
+		/* Enable PC-AT mode. */
+		ns87303_modify(config, ASC, 0, 0xc0);
+
+#ifdef PCI_FDC_SWAP_DRIVES
+		/*
+		 * If only Floppy 1 is present, swap drives.
+		 */
+		if (!sun_floppy_types[0] && sun_floppy_types[1]) {
+			/*
+			 * Set the drive exchange bit in FCR on NS87303,
+			 * make sure other bits are sane before doing so.
+			 */
+			ns87303_modify(config, FER, FER_EDM, 0);
+			ns87303_modify(config, ASC, ASC_DRV2_SEL, 0);
+			ns87303_modify(config, FCR, 0, FCR_LDE);
+
+			config = sun_floppy_types[0];
+			sun_floppy_types[0] = sun_floppy_types[1];
+			sun_floppy_types[1] = config;
+
+			if (sun_pci_broken_drive != -1) {
+				sun_pci_broken_drive = 1 - sun_pci_broken_drive;
+				sun_fdops.fd_outb = sun_pci_fd_lde_broken_outb;
+			}
+		}
+#endif /* PCI_FDC_SWAP_DRIVES */
+
+		return sun_floppy_types[0];
+	}
+	prop = of_get_property(op->dev.of_node, "status", NULL);
+	if (prop && !strncmp(state, "disabled", 8))
+		return 0;
+
+	/*
+	 * We cannot do of_ioremap here: it does request_region,
+	 * which the generic floppy driver tries to do once again.
+	 * But we must use the sdev resource values as they have
+	 * had parent ranges applied.
+	 */
+	sun_fdc = (struct sun_flpy_controller *)
+		(op->resource[0].start +
+		 ((op->resource[0].flags & 0x1ffUL) << 32UL));
+
+	/* Last minute sanity check... */
+	if (sbus_readb(&sun_fdc->status1_82077) == 0xff) {
+		sun_fdc = (struct sun_flpy_controller *)-1;
+		return 0;
+	}
+
+        sun_fdops.fd_inb = sun_82077_fd_inb;
+        sun_fdops.fd_outb = sun_82077_fd_outb;
+
+	can_use_virtual_dma = use_virtual_dma = 1;
+	sun_fdops.fd_enable_dma = sun_fd_enable_dma;
+	sun_fdops.fd_disable_dma = sun_fd_disable_dma;
+	sun_fdops.fd_set_dma_mode = sun_fd_set_dma_mode;
+	sun_fdops.fd_set_dma_addr = sun_fd_set_dma_addr;
+	sun_fdops.fd_set_dma_count = sun_fd_set_dma_count;
+	sun_fdops.get_dma_residue = sun_get_dma_residue;
+
+	sun_fdops.fd_request_irq = sun_fd_request_irq;
+	sun_fdops.fd_free_irq = sun_fd_free_irq;
+
+	sun_fdops.fd_eject = sun_fd_eject;
+
+        fdc_status = (unsigned long) &sun_fdc->status_82077;
+
+	/* Success... */
+	allowed_drive_mask = 0x01;
+	sun_floppy_types[0] = 4;
+	sun_floppy_types[1] = 0;
+
+	return sun_floppy_types[0];
+}
+
+#define EXTRA_FLOPPY_PARAMS
+
+static DEFINE_SPINLOCK(dma_spin_lock);
+
+#define claim_dma_lock() \
+({	unsigned long flags; \
+	spin_lock_irqsave(&dma_spin_lock, flags); \
+	flags; \
+})
+
+#define release_dma_lock(__flags) \
+	spin_unlock_irqrestore(&dma_spin_lock, __flags);
+
+#endif /* !(__ASM_SPARC64_FLOPPY_H) */
diff --git a/arch/sparc/include/asm/fpumacro.h b/arch/sparc/include/asm/fpumacro.h
new file mode 100644
index 0000000..bc378df
--- /dev/null
+++ b/arch/sparc/include/asm/fpumacro.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* fpumacro.h: FPU related macros.
+ *
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC64_FPUMACRO_H
+#define _SPARC64_FPUMACRO_H
+
+#include <asm/asi.h>
+#include <asm/visasm.h>
+
+struct fpustate {
+	u32	regs[64];
+};
+
+#define FPUSTATE (struct fpustate *)(current_thread_info()->fpregs)
+
+static inline unsigned long fprs_read(void)
+{
+	unsigned long retval;
+
+	__asm__ __volatile__("rd %%fprs, %0" : "=r" (retval));
+
+	return retval;
+}
+
+static inline void fprs_write(unsigned long val)
+{
+	__asm__ __volatile__("wr %0, 0x0, %%fprs" : : "r" (val));
+}
+
+#endif /* !(_SPARC64_FPUMACRO_H) */
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
new file mode 100644
index 0000000..d3aa1a5
--- /dev/null
+++ b/arch/sparc/include/asm/ftrace.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC64_FTRACE
+#define _ASM_SPARC64_FTRACE
+
+#ifdef CONFIG_MCOUNT
+#define MCOUNT_ADDR		((unsigned long)(_mcount))
+#define MCOUNT_INSN_SIZE	4 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+void _mcount(void);
+#endif
+
+#endif /* CONFIG_MCOUNT */
+
+#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY)
+#define HAVE_FUNCTION_GRAPH_FP_TEST
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/* reloction of mcount call site is the same as the address */
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /*  CONFIG_DYNAMIC_FTRACE */
+
+unsigned long prepare_ftrace_return(unsigned long parent,
+				    unsigned long self_addr,
+				    unsigned long frame_pointer);
+
+#endif /* _ASM_SPARC64_FTRACE */
diff --git a/arch/sparc/include/asm/futex.h b/arch/sparc/include/asm/futex.h
new file mode 100644
index 0000000..75a13a2
--- /dev/null
+++ b/arch/sparc/include/asm/futex.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_FUTEX_H
+#define ___ASM_SPARC_FUTEX_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/futex_64.h>
+#else
+#include <asm/futex_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/futex_32.h b/arch/sparc/include/asm/futex_32.h
new file mode 100644
index 0000000..6a332a9
--- /dev/null
+++ b/arch/sparc/include/asm/futex_32.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
+
+#include <asm-generic/futex.h>
+
+#endif
diff --git a/arch/sparc/include/asm/futex_64.h b/arch/sparc/include/asm/futex_64.h
new file mode 100644
index 0000000..0865ce7
--- /dev/null
+++ b/arch/sparc/include/asm/futex_64.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_FUTEX_H
+#define _SPARC64_FUTEX_H
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#define __futex_cas_op(insn, ret, oldval, uaddr, oparg)	\
+	__asm__ __volatile__(				\
+	"\n1:	lduwa	[%3] %%asi, %2\n"		\
+	"	" insn "\n"				\
+	"2:	casa	[%3] %%asi, %2, %1\n"		\
+	"	cmp	%2, %1\n"			\
+	"	bne,pn	%%icc, 1b\n"			\
+	"	 mov	0, %0\n"			\
+	"3:\n"						\
+	"	.section .fixup,#alloc,#execinstr\n"	\
+	"	.align	4\n"				\
+	"4:	sethi	%%hi(3b), %0\n"			\
+	"	jmpl	%0 + %%lo(3b), %%g0\n"		\
+	"	 mov	%5, %0\n"			\
+	"	.previous\n"				\
+	"	.section __ex_table,\"a\"\n"		\
+	"	.align	4\n"				\
+	"	.word	1b, 4b\n"			\
+	"	.word	2b, 4b\n"			\
+	"	.previous\n"				\
+	: "=&r" (ret), "=&r" (oldval), "=&r" (tem)	\
+	: "r" (uaddr), "r" (oparg), "i" (-EFAULT)	\
+	: "memory")
+
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+		u32 __user *uaddr)
+{
+	int oldval = 0, ret, tem;
+
+	if (unlikely((((unsigned long) uaddr) & 0x3UL)))
+		return -EINVAL;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_cas_op("mov\t%4, %1", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_cas_op("add\t%2, %4, %1", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_cas_op("or\t%2, %4, %1", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_cas_op("andn\t%2, %4, %1", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_cas_op("xor\t%2, %4, %1", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	int ret = 0;
+
+	__asm__ __volatile__(
+	"\n1:	casa	[%4] %%asi, %3, %1\n"
+	"2:\n"
+	"	.section .fixup,#alloc,#execinstr\n"
+	"	.align	4\n"
+	"3:	sethi	%%hi(2b), %0\n"
+	"	jmpl	%0 + %%lo(2b), %%g0\n"
+	"	mov	%5, %0\n"
+	"	.previous\n"
+	"	.section __ex_table,\"a\"\n"
+	"	.align	4\n"
+	"	.word	1b, 3b\n"
+	"	.previous\n"
+	: "+r" (ret), "=r" (newval)
+	: "1" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
+	: "memory");
+
+	*uval = newval;
+	return ret;
+}
+
+#endif /* !(_SPARC64_FUTEX_H) */
diff --git a/arch/sparc/include/asm/hardirq.h b/arch/sparc/include/asm/hardirq.h
new file mode 100644
index 0000000..a185f66
--- /dev/null
+++ b/arch/sparc/include/asm/hardirq.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_HARDIRQ_H
+#define ___ASM_SPARC_HARDIRQ_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/hardirq_64.h>
+#else
+#include <asm/hardirq_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/hardirq_32.h b/arch/sparc/include/asm/hardirq_32.h
new file mode 100644
index 0000000..9830d87
--- /dev/null
+++ b/arch/sparc/include/asm/hardirq_32.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* hardirq.h: 32-bit Sparc hard IRQ support.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1998-2000 Anton Blanchard (anton@samba.org)
+ */
+
+#ifndef __SPARC_HARDIRQ_H
+#define __SPARC_HARDIRQ_H
+
+#include <asm-generic/hardirq.h>
+
+#endif /* __SPARC_HARDIRQ_H */
diff --git a/arch/sparc/include/asm/hardirq_64.h b/arch/sparc/include/asm/hardirq_64.h
new file mode 100644
index 0000000..75b92bf
--- /dev/null
+++ b/arch/sparc/include/asm/hardirq_64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* hardirq.h: 64-bit Sparc hard IRQ support.
+ *
+ * Copyright (C) 1997, 1998, 2005 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef __SPARC64_HARDIRQ_H
+#define __SPARC64_HARDIRQ_H
+
+#include <asm/cpudata.h>
+
+#define __ARCH_IRQ_STAT
+
+#define local_softirq_pending_ref \
+	__cpu_data.__softirq_pending
+
+void ack_bad_irq(unsigned int irq);
+
+#endif /* !(__SPARC64_HARDIRQ_H) */
diff --git a/arch/sparc/include/asm/head.h b/arch/sparc/include/asm/head.h
new file mode 100644
index 0000000..25299b7
--- /dev/null
+++ b/arch/sparc/include/asm/head.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_HEAD_H
+#define ___ASM_SPARC_HEAD_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/head_64.h>
+#else
+#include <asm/head_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/head_32.h b/arch/sparc/include/asm/head_32.h
new file mode 100644
index 0000000..d2809c8
--- /dev/null
+++ b/arch/sparc/include/asm/head_32.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_HEAD_H
+#define __SPARC_HEAD_H
+
+#define KERNBASE        0xf0000000  /* First address the kernel will eventually be */
+
+#define WRITE_PAUSE      nop; nop; nop; /* Have to do this after %wim/%psr chg */
+
+/* Here are some trap goodies */
+
+/* Generic trap entry. */
+#define TRAP_ENTRY(type, label) \
+	rd %psr, %l0; b label; rd %wim, %l3; nop;
+
+/* Data/text faults */
+#define SRMMU_TFAULT rd %psr, %l0; rd %wim, %l3; b srmmu_fault; mov 1, %l7;
+#define SRMMU_DFAULT rd %psr, %l0; rd %wim, %l3; b srmmu_fault; mov 0, %l7;
+
+/* This is for traps we should NEVER get. */
+#define BAD_TRAP(num) \
+        rd %psr, %l0; mov num, %l7; b bad_trap_handler; rd %wim, %l3;
+
+/* This is for traps when we want just skip the instruction which caused it */
+#define SKIP_TRAP(type, name) \
+	jmpl %l2, %g0; rett %l2 + 4; nop; nop;
+
+/* Notice that for the system calls we pull a trick.  We load up a
+ * different pointer to the system call vector table in %l7, but call
+ * the same generic system call low-level entry point.  The trap table
+ * entry sequences are also HyperSparc pipeline friendly ;-)
+ */
+
+/* Software trap for Linux system calls. */
+#define LINUX_SYSCALL_TRAP \
+        sethi %hi(sys_call_table), %l7; \
+        or %l7, %lo(sys_call_table), %l7; \
+        b linux_sparc_syscall; \
+        rd %psr, %l0;
+
+#define BREAKPOINT_TRAP \
+	b breakpoint_trap; \
+	rd %psr,%l0; \
+	nop; \
+	nop;
+
+#ifdef CONFIG_KGDB
+#define KGDB_TRAP(num)                  \
+	mov num, %l7;                   \
+	b kgdb_trap_low;                \
+	rd %psr,%l0;                    \
+	nop;
+#else
+#define KGDB_TRAP(num) \
+	BAD_TRAP(num)
+#endif
+
+/* The Get Condition Codes software trap for userland. */
+#define GETCC_TRAP \
+        b getcc_trap_handler; rd %psr, %l0; nop; nop;
+
+/* The Set Condition Codes software trap for userland. */
+#define SETCC_TRAP \
+        b setcc_trap_handler; rd %psr, %l0; nop; nop;
+
+/* The Get PSR software trap for userland. */
+#define GETPSR_TRAP \
+	rd %psr, %i0; jmp %l2; rett %l2 + 4; nop;
+
+/* This is for hard interrupts from level 1-14, 15 is non-maskable (nmi) and
+ * gets handled with another macro.
+ */
+#define TRAP_ENTRY_INTERRUPT(int_level) \
+        mov int_level, %l7; rd %psr, %l0; b real_irq_entry; rd %wim, %l3;
+
+/* Window overflows/underflows are special and we need to try to be as
+ * efficient as possible here....
+ */
+#define WINDOW_SPILL \
+        rd %psr, %l0; rd %wim, %l3; b spill_window_entry; andcc %l0, PSR_PS, %g0;
+
+#define WINDOW_FILL \
+        rd %psr, %l0; rd %wim, %l3; b fill_window_entry; andcc %l0, PSR_PS, %g0;
+
+#endif /* __SPARC_HEAD_H */
diff --git a/arch/sparc/include/asm/head_64.h b/arch/sparc/include/asm/head_64.h
new file mode 100644
index 0000000..69a2062
--- /dev/null
+++ b/arch/sparc/include/asm/head_64.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_HEAD_H
+#define _SPARC64_HEAD_H
+
+#include <asm/pstate.h>
+
+	/* wrpr	%g0, val, %gl */
+#define SET_GL(val)	\
+	.word	0xa1902000 | val
+
+	/* rdpr %gl, %gN */
+#define GET_GL_GLOBAL(N)	\
+	.word	0x81540000 | (N << 25)
+
+#define KERNBASE	0x400000
+
+#define	PTREGS_OFF	(STACK_BIAS + STACKFRAME_SZ)
+
+#define	RTRAP_PSTATE		(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_IE)
+#define	RTRAP_PSTATE_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV)
+#define RTRAP_PSTATE_AG_IRQOFF	(PSTATE_TSO|PSTATE_PEF|PSTATE_PRIV|PSTATE_AG)
+
+#define __CHEETAH_ID	0x003e0014
+#define __JALAPENO_ID	0x003e0016
+#define __SERRANO_ID	0x003e0022
+
+#define CHEETAH_MANUF		0x003e
+#define CHEETAH_IMPL		0x0014 /* Ultra-III   */
+#define CHEETAH_PLUS_IMPL	0x0015 /* Ultra-III+  */
+#define JALAPENO_IMPL		0x0016 /* Ultra-IIIi  */
+#define JAGUAR_IMPL		0x0018 /* Ultra-IV    */
+#define PANTHER_IMPL		0x0019 /* Ultra-IV+   */
+#define SERRANO_IMPL		0x0022 /* Ultra-IIIi+ */
+
+#define BRANCH_IF_SUN4V(tmp1,label)		\
+	sethi	%hi(is_sun4v), %tmp1;		\
+	lduw	[%tmp1 + %lo(is_sun4v)], %tmp1; \
+	brnz,pn	%tmp1, label;			\
+	 nop
+
+#define BRANCH_IF_CHEETAH_BASE(tmp1,tmp2,label)	\
+	rdpr	%ver, %tmp1;			\
+	sethi	%hi(__CHEETAH_ID), %tmp2;	\
+	srlx	%tmp1, 32, %tmp1;		\
+	or	%tmp2, %lo(__CHEETAH_ID), %tmp2;\
+	cmp	%tmp1, %tmp2;			\
+	be,pn	%icc, label;			\
+	 nop;
+
+#define BRANCH_IF_JALAPENO(tmp1,tmp2,label)	\
+	rdpr	%ver, %tmp1;			\
+	sethi	%hi(__JALAPENO_ID), %tmp2;	\
+	srlx	%tmp1, 32, %tmp1;		\
+	or	%tmp2, %lo(__JALAPENO_ID), %tmp2;\
+	cmp	%tmp1, %tmp2;			\
+	be,pn	%icc, label;			\
+	 nop;
+
+#define BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(tmp1,tmp2,label)	\
+	rdpr	%ver, %tmp1;			\
+	srlx	%tmp1, (32 + 16), %tmp2;	\
+	cmp	%tmp2, CHEETAH_MANUF;		\
+	bne,pt	%xcc, 99f;			\
+	 sllx	%tmp1, 16, %tmp1;		\
+	srlx	%tmp1, (32 + 16), %tmp2;	\
+	cmp	%tmp2, CHEETAH_PLUS_IMPL;	\
+	bgeu,pt	%xcc, label;			\
+99:	 nop;
+
+#define BRANCH_IF_ANY_CHEETAH(tmp1,tmp2,label)	\
+	rdpr	%ver, %tmp1;			\
+	srlx	%tmp1, (32 + 16), %tmp2;	\
+	cmp	%tmp2, CHEETAH_MANUF;		\
+	bne,pt	%xcc, 99f;			\
+	 sllx	%tmp1, 16, %tmp1;		\
+	srlx	%tmp1, (32 + 16), %tmp2;	\
+	cmp	%tmp2, CHEETAH_IMPL;		\
+	bgeu,pt	%xcc, label;			\
+99:	 nop;
+
+#endif /* !(_SPARC64_HEAD_H) */
diff --git a/arch/sparc/include/asm/hibernate.h b/arch/sparc/include/asm/hibernate.h
new file mode 100644
index 0000000..3bb0a96
--- /dev/null
+++ b/arch/sparc/include/asm/hibernate.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * hibernate.h:  Hibernaton support specific for sparc64.
+ *
+ * Copyright (C) 2013 Kirill V Tkhai (tkhai@yandex.ru)
+ */
+
+#ifndef ___SPARC_HIBERNATE_H
+#define ___SPARC_HIBERNATE_H
+
+struct saved_context {
+	unsigned long fp;
+	unsigned long cwp;
+	unsigned long wstate;
+
+	unsigned long tick;
+	unsigned long pstate;
+
+	unsigned long g4;
+	unsigned long g5;
+	unsigned long g6;
+};
+
+#endif
diff --git a/arch/sparc/include/asm/highmem.h b/arch/sparc/include/asm/highmem.h
new file mode 100644
index 0000000..18d7769
--- /dev/null
+++ b/arch/sparc/include/asm/highmem.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * highmem.h: virtual kernel memory mappings for high memory
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual addresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *		      Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with 
+ * up to 16 Terrabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#ifdef __KERNEL__
+
+#include <linux/interrupt.h>
+#include <asm/vaddrs.h>
+#include <asm/kmap_types.h>
+#include <asm/pgtable.h>
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pgprot_t kmap_prot;
+extern pte_t *pkmap_page_table;
+
+void kmap_init(void) __init;
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.  Currently the simplest way to do this is to align the
+ * pkmap region on a pagetable boundary (4MB).
+ */
+#define LAST_PKMAP 1024
+#define PKMAP_SIZE (LAST_PKMAP << PAGE_SHIFT)
+#define PKMAP_BASE PMD_ALIGN(SRMMU_NOCACHE_VADDR + (SRMMU_MAX_NOCACHE_PAGES << PAGE_SHIFT))
+
+#define LAST_PKMAP_MASK (LAST_PKMAP - 1)
+#define PKMAP_NR(virt)  ((virt - PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)  (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define PKMAP_END (PKMAP_ADDR(LAST_PKMAP))
+
+void *kmap_high(struct page *page);
+void kunmap_high(struct page *page);
+
+static inline void *kmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return page_address(page);
+	return kmap_high(page);
+}
+
+static inline void kunmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+
+void *kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+
+#define flush_cache_kmaps()	flush_cache_all()
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_HIGHMEM_H */
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
new file mode 100644
index 0000000..300557c
--- /dev/null
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC64_HUGETLB_H
+#define _ASM_SPARC64_HUGETLB_H
+
+#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
+
+#ifdef CONFIG_HUGETLB_PAGE
+struct pud_huge_patch_entry {
+	unsigned int addr;
+	unsigned int insn;
+};
+extern struct pud_huge_patch_entry __pud_huge_patch, __pud_huge_patch_end;
+#endif
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t pte);
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep);
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+					 unsigned long addr,
+					 unsigned long len) {
+	return 0;
+}
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(struct file *file,
+			unsigned long addr, unsigned long len)
+{
+	struct hstate *h = hstate_file(file);
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (addr & ~huge_page_mask(h))
+		return -EINVAL;
+	return 0;
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+					 unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+	return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+	return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	pte_t old_pte = *ptep;
+	set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+					     unsigned long addr, pte_t *ptep,
+					     pte_t pte, int dirty)
+{
+	int changed = !pte_same(*ptep, pte);
+	if (changed) {
+		set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+		flush_tlb_page(vma, addr);
+	}
+	return changed;
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+	return *ptep;
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
+			    unsigned long end, unsigned long floor,
+			    unsigned long ceiling);
+
+#endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/sparc/include/asm/hvtramp.h b/arch/sparc/include/asm/hvtramp.h
new file mode 100644
index 0000000..688ea43
--- /dev/null
+++ b/arch/sparc/include/asm/hvtramp.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_HVTRAP_H
+#define _SPARC64_HVTRAP_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct hvtramp_mapping {
+	__u64		vaddr;
+	__u64		tte;
+};
+
+struct hvtramp_descr {
+	__u32			cpu;
+	__u32			num_mappings;
+	__u64			fault_info_va;
+	__u64			fault_info_pa;
+	__u64			thread_reg;
+	struct hvtramp_mapping	maps[1];
+};
+
+void hv_cpu_startup(unsigned long hvdescr_pa);
+
+#endif
+
+#define HVTRAMP_DESCR_CPU		0x00
+#define HVTRAMP_DESCR_NUM_MAPPINGS	0x04
+#define HVTRAMP_DESCR_FAULT_INFO_VA	0x08
+#define HVTRAMP_DESCR_FAULT_INFO_PA	0x10
+#define HVTRAMP_DESCR_THREAD_REG	0x18
+#define HVTRAMP_DESCR_MAPS		0x20
+
+#define HVTRAMP_MAPPING_VADDR		0x00
+#define HVTRAMP_MAPPING_TTE		0x08
+#define HVTRAMP_MAPPING_SIZE		0x10
+
+#endif /* _SPARC64_HVTRAP_H */
diff --git a/arch/sparc/include/asm/hw_irq.h b/arch/sparc/include/asm/hw_irq.h
new file mode 100644
index 0000000..8d30a76
--- /dev/null
+++ b/arch/sparc/include/asm/hw_irq.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_SPARC_HW_IRQ_H
+#define __ASM_SPARC_HW_IRQ_H
+
+/* Dummy include. */
+
+#endif
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
new file mode 100644
index 0000000..08650d5
--- /dev/null
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -0,0 +1,3527 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_HYPERVISOR_H
+#define _SPARC64_HYPERVISOR_H
+
+/* Sun4v hypervisor interfaces and defines.
+ *
+ * Hypervisor calls are made via traps to software traps number 0x80
+ * and above.  Registers %o0 to %o5 serve as argument, status, and
+ * return value registers.
+ *
+ * There are two kinds of these traps.  First there are the normal
+ * "fast traps" which use software trap 0x80 and encode the function
+ * to invoke by number in register %o5.  Argument and return value
+ * handling is as follows:
+ *
+ * -----------------------------------------------
+ * |  %o5  | function number |     undefined     |
+ * |  %o0  |   argument 0    |   return status   |
+ * |  %o1  |   argument 1    |   return value 1  |
+ * |  %o2  |   argument 2    |   return value 2  |
+ * |  %o3  |   argument 3    |   return value 3  |
+ * |  %o4  |   argument 4    |   return value 4  |
+ * -----------------------------------------------
+ *
+ * The second type are "hyper-fast traps" which encode the function
+ * number in the software trap number itself.  So these use trap
+ * numbers > 0x80.  The register usage for hyper-fast traps is as
+ * follows:
+ *
+ * -----------------------------------------------
+ * |  %o0  |   argument 0    |   return status   |
+ * |  %o1  |   argument 1    |   return value 1  |
+ * |  %o2  |   argument 2    |   return value 2  |
+ * |  %o3  |   argument 3    |   return value 3  |
+ * |  %o4  |   argument 4    |   return value 4  |
+ * -----------------------------------------------
+ *
+ * Registers providing explicit arguments to the hypervisor calls
+ * are volatile across the call.  Upon return their values are
+ * undefined unless explicitly specified as containing a particular
+ * return value by the specific call.  The return status is always
+ * returned in register %o0, zero indicates a successful execution of
+ * the hypervisor call and other values indicate an error status as
+ * defined below.  So, for example, if a hyper-fast trap takes
+ * arguments 0, 1, and 2, then %o0, %o1, and %o2 are volatile across
+ * the call and %o3, %o4, and %o5 would be preserved.
+ *
+ * If the hypervisor trap is invalid, or the fast trap function number
+ * is invalid, HV_EBADTRAP will be returned in %o0.  Also, all 64-bits
+ * of the argument and return values are significant.
+ */
+
+/* Trap numbers.  */
+#define HV_FAST_TRAP		0x80
+#define HV_MMU_MAP_ADDR_TRAP	0x83
+#define HV_MMU_UNMAP_ADDR_TRAP	0x84
+#define HV_TTRACE_ADDENTRY_TRAP	0x85
+#define HV_CORE_TRAP		0xff
+
+/* Error codes.  */
+#define HV_EOK				0  /* Successful return            */
+#define HV_ENOCPU			1  /* Invalid CPU id               */
+#define HV_ENORADDR			2  /* Invalid real address         */
+#define HV_ENOINTR			3  /* Invalid interrupt id         */
+#define HV_EBADPGSZ			4  /* Invalid pagesize encoding    */
+#define HV_EBADTSB			5  /* Invalid TSB description      */
+#define HV_EINVAL			6  /* Invalid argument             */
+#define HV_EBADTRAP			7  /* Invalid function number      */
+#define HV_EBADALIGN			8  /* Invalid address alignment    */
+#define HV_EWOULDBLOCK			9  /* Cannot complete w/o blocking */
+#define HV_ENOACCESS			10 /* No access to resource        */
+#define HV_EIO				11 /* I/O error                    */
+#define HV_ECPUERROR			12 /* CPU in error state           */
+#define HV_ENOTSUPPORTED		13 /* Function not supported       */
+#define HV_ENOMAP			14 /* No mapping found             */
+#define HV_ETOOMANY			15 /* Too many items specified     */
+#define HV_ECHANNEL			16 /* Invalid LDC channel          */
+#define HV_EBUSY			17 /* Resource busy                */
+#define HV_EUNAVAILABLE			23 /* Resource or operation not
+					    * currently available, but may
+					    * become available in the future
+					    */
+
+/* mach_exit()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_EXIT
+ * ARG0:	exit code
+ * ERRORS:	This service does not return.
+ *
+ * Stop all CPUs in the virtual domain and place them into the stopped
+ * state.  The 64-bit exit code may be passed to a service entity as
+ * the domain's exit status.  On systems without a service entity, the
+ * domain will undergo a reset, and the boot firmware will be
+ * reloaded.
+ *
+ * This function will never return to the guest that invokes it.
+ *
+ * Note: By convention an exit code of zero denotes a successful exit by
+ *       the guest code.  A non-zero exit code denotes a guest specific
+ *       error indication.
+ *
+ */
+#define HV_FAST_MACH_EXIT		0x00
+
+#ifndef __ASSEMBLY__
+void sun4v_mach_exit(unsigned long exit_code);
+#endif
+
+/* Domain services.  */
+
+/* mach_desc()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_DESC
+ * ARG0:	buffer
+ * ARG1:	length
+ * RET0:	status
+ * RET1:	length
+ * ERRORS:	HV_EBADALIGN	Buffer is badly aligned
+ *		HV_ENORADDR	Buffer is to an illegal real address.
+ *		HV_EINVAL	Buffer length is too small for complete
+ *				machine description.
+ *
+ * Copy the most current machine description into the buffer indicated
+ * by the real address in ARG0.  The buffer provided must be 16 byte
+ * aligned.  Upon success or HV_EINVAL, this service returns the
+ * actual size of the machine description in the RET1 return value.
+ *
+ * Note: A method of determining the appropriate buffer size for the
+ *       machine description is to first call this service with a buffer
+ *       length of 0 bytes.
+ */
+#define HV_FAST_MACH_DESC		0x01
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_mach_desc(unsigned long buffer_pa,
+			      unsigned long buf_len,
+			      unsigned long *real_buf_len);
+#endif
+
+/* mach_sir()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_SIR
+ * ERRORS:	This service does not return.
+ *
+ * Perform a software initiated reset of the virtual machine domain.
+ * All CPUs are captured as soon as possible, all hardware devices are
+ * returned to the entry default state, and the domain is restarted at
+ * the SIR (trap type 0x04) real trap table (RTBA) entry point on one
+ * of the CPUs.  The single CPU restarted is selected as determined by
+ * platform specific policy.  Memory is preserved across this
+ * operation.
+ */
+#define HV_FAST_MACH_SIR		0x02
+
+#ifndef __ASSEMBLY__
+void sun4v_mach_sir(void);
+#endif
+
+/* mach_set_watchdog()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_SET_WATCHDOG
+ * ARG0:	timeout in milliseconds
+ * RET0:	status
+ * RET1:	time remaining in milliseconds
+ *
+ * A guest uses this API to set a watchdog timer.  Once the gues has set
+ * the timer, it must call the timer service again either to disable or
+ * postpone the expiration.  If the timer expires before being reset or
+ * disabled, then the hypervisor take a platform specific action leading
+ * to guest termination within a bounded time period.  The platform action
+ * may include recovery actions such as reporting the expiration to a
+ * Service Processor, and/or automatically restarting the gues.
+ *
+ * The 'timeout' parameter is specified in milliseconds, however the
+ * implementated granularity is given by the 'watchdog-resolution'
+ * property in the 'platform' node of the guest's machine description.
+ * The largest allowed timeout value is specified by the
+ * 'watchdog-max-timeout' property of the 'platform' node.
+ *
+ * If the 'timeout' argument is not zero, the watchdog timer is set to
+ * expire after a minimum of 'timeout' milliseconds.
+ *
+ * If the 'timeout' argument is zero, the watchdog timer is disabled.
+ *
+ * If the 'timeout' value exceeds the value of the 'max-watchdog-timeout'
+ * property, the hypervisor leaves the watchdog timer state unchanged,
+ * and returns a status of EINVAL.
+ *
+ * The 'time remaining' return value is valid regardless of whether the
+ * return status is EOK or EINVAL.  A non-zero return value indicates the
+ * number of milliseconds that were remaining until the timer was to expire.
+ * If less than one millisecond remains, the return value is '1'.  If the
+ * watchdog timer was disabled at the time of the call, the return value is
+ * zero.
+ *
+ * If the hypervisor cannot support the exact timeout value requested, but
+ * can support a larger timeout value, the hypervisor may round the actual
+ * timeout to a value larger than the requested timeout, consequently the
+ * 'time remaining' return value may be larger than the previously requested
+ * timeout value.
+ *
+ * Any guest OS debugger should be aware that the watchdog service may be in
+ * use.  Consequently, it is recommended that the watchdog service is
+ * disabled upon debugger entry (e.g. reaching a breakpoint), and then
+ * re-enabled upon returning to normal execution.  The API has been designed
+ * with this in mind, and the 'time remaining' result of the disable call may
+ * be used directly as the timeout argument of the re-enable call.
+ */
+#define HV_FAST_MACH_SET_WATCHDOG	0x05
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_mach_set_watchdog(unsigned long timeout,
+				      unsigned long *orig_timeout);
+#endif
+
+/* CPU services.
+ *
+ * CPUs represent devices that can execute software threads.  A single
+ * chip that contains multiple cores or strands is represented as
+ * multiple CPUs with unique CPU identifiers.  CPUs are exported to
+ * OBP via the machine description (and to the OS via the OBP device
+ * tree).  CPUs are always in one of three states: stopped, running,
+ * or error.
+ *
+ * A CPU ID is a pre-assigned 16-bit value that uniquely identifies a
+ * CPU within a logical domain.  Operations that are to be performed
+ * on multiple CPUs specify them via a CPU list.  A CPU list is an
+ * array in real memory, of which each 16-bit word is a CPU ID.  CPU
+ * lists are passed through the API as two arguments.  The first is
+ * the number of entries (16-bit words) in the CPU list, and the
+ * second is the (real address) pointer to the CPU ID list.
+ */
+
+/* cpu_start()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_START
+ * ARG0:	CPU ID
+ * ARG1:	PC
+ * ARG2:	RTBA
+ * ARG3:	target ARG0
+ * RET0:	status
+ * ERRORS:	ENOCPU		Invalid CPU ID
+ *		EINVAL		Target CPU ID is not in the stopped state
+ *		ENORADDR	Invalid PC or RTBA real address
+ *		EBADALIGN	Unaligned PC or unaligned RTBA
+ *		EWOULDBLOCK	Starting resources are not available
+ *
+ * Start CPU with given CPU ID with PC in %pc and with a real trap
+ * base address value of RTBA.  The indicated CPU must be in the
+ * stopped state.  The supplied RTBA must be aligned on a 256 byte
+ * boundary.  On successful completion, the specified CPU will be in
+ * the running state and will be supplied with "target ARG0" in %o0
+ * and RTBA in %tba.
+ */
+#define HV_FAST_CPU_START		0x10
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_start(unsigned long cpuid,
+			      unsigned long pc,
+			      unsigned long rtba,
+			      unsigned long arg0);
+#endif
+
+/* cpu_stop()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_STOP
+ * ARG0:	CPU ID
+ * RET0:	status
+ * ERRORS:	ENOCPU		Invalid CPU ID
+ *		EINVAL		Target CPU ID is the current cpu
+ *		EINVAL		Target CPU ID is not in the running state
+ *		EWOULDBLOCK	Stopping resources are not available
+ *		ENOTSUPPORTED	Not supported on this platform
+ *
+ * The specified CPU is stopped.  The indicated CPU must be in the
+ * running state.  On completion, it will be in the stopped state.  It
+ * is not legal to stop the current CPU.
+ *
+ * Note: As this service cannot be used to stop the current cpu, this service
+ *       may not be used to stop the last running CPU in a domain.  To stop
+ *       and exit a running domain, a guest must use the mach_exit() service.
+ */
+#define HV_FAST_CPU_STOP		0x11
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_stop(unsigned long cpuid);
+#endif
+
+/* cpu_yield()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_YIELD
+ * RET0:	status
+ * ERRORS:	No possible error.
+ *
+ * Suspend execution on the current CPU.  Execution will resume when
+ * an interrupt (device, %stick_compare, or cross-call) is targeted to
+ * the CPU.  On some CPUs, this API may be used by the hypervisor to
+ * save power by disabling hardware strands.
+ */
+#define HV_FAST_CPU_YIELD		0x12
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_yield(void);
+#endif
+
+/* cpu_poke()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_POKE
+ * RET0:	status
+ * ERRORS:	ENOCPU		cpuid refers to a CPU that does not exist
+ *		EINVAL		cpuid is current CPU
+ *
+ * Poke CPU cpuid. If the target CPU is currently suspended having
+ * invoked the cpu-yield service, that vCPU will be resumed.
+ * Poke interrupts may only be sent to valid, non-local CPUs.
+ * It is not legal to poke the current vCPU.
+ */
+#define HV_FAST_CPU_POKE                0x13
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_poke(unsigned long cpuid);
+#endif
+
+/* cpu_qconf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_QCONF
+ * ARG0:	queue
+ * ARG1:	base real address
+ * ARG2:	number of entries
+ * RET0:	status
+ * ERRORS:	ENORADDR	Invalid base real address
+ *		EINVAL		Invalid queue or number of entries is less
+ *				than 2 or too large.
+ *		EBADALIGN	Base real address is not correctly aligned
+ *				for size.
+ *
+ * Configure the given queue to be placed at the given base real
+ * address, with the given number of entries.  The number of entries
+ * must be a power of 2.  The base real address must be aligned
+ * exactly to match the queue size.  Each queue entry is 64 bytes
+ * long, so for example a 32 entry queue must be aligned on a 2048
+ * byte real address boundary.
+ *
+ * The specified queue is unconfigured if the number of entries is given
+ * as zero.
+ *
+ * For the current version of this API service, the argument queue is defined
+ * as follows:
+ *
+ *	queue		description
+ *	-----		-------------------------
+ *	0x3c		cpu mondo queue
+ *	0x3d		device mondo queue
+ *	0x3e		resumable error queue
+ *	0x3f		non-resumable error queue
+ *
+ * Note: The maximum number of entries for each queue for a specific cpu may
+ *       be determined from the machine description.
+ */
+#define HV_FAST_CPU_QCONF		0x14
+#define  HV_CPU_QUEUE_CPU_MONDO		 0x3c
+#define  HV_CPU_QUEUE_DEVICE_MONDO	 0x3d
+#define  HV_CPU_QUEUE_RES_ERROR		 0x3e
+#define  HV_CPU_QUEUE_NONRES_ERROR	 0x3f
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_qconf(unsigned long type,
+			      unsigned long queue_paddr,
+			      unsigned long num_queue_entries);
+#endif
+
+/* cpu_qinfo()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_QINFO
+ * ARG0:	queue
+ * RET0:	status
+ * RET1:	base real address
+ * RET1:	number of entries
+ * ERRORS:	EINVAL		Invalid queue
+ *
+ * Return the configuration info for the given queue.  The base real
+ * address and number of entries of the defined queue are returned.
+ * The queue argument values are the same as for cpu_qconf() above.
+ *
+ * If the specified queue is a valid queue number, but no queue has
+ * been defined, the number of entries will be set to zero and the
+ * base real address returned is undefined.
+ */
+#define HV_FAST_CPU_QINFO		0x15
+
+/* cpu_mondo_send()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_MONDO_SEND
+ * ARG0-1:	CPU list
+ * ARG2:	data real address
+ * RET0:	status
+ * ERRORS:	EBADALIGN	Mondo data is not 64-byte aligned or CPU list
+ *				is not 2-byte aligned.
+ *		ENORADDR	Invalid data mondo address, or invalid cpu list
+ *				address.
+ *		ENOCPU		Invalid cpu in CPU list
+ *		EWOULDBLOCK	Some or all of the listed CPUs did not receive
+ *				the mondo
+ *		ECPUERROR	One or more of the listed CPUs are in error
+ *				state, use HV_FAST_CPU_STATE to see which ones
+ *		EINVAL		CPU list includes caller's CPU ID
+ *
+ * Send a mondo interrupt to the CPUs in the given CPU list with the
+ * 64-bytes at the given data real address.  The data must be 64-byte
+ * aligned.  The mondo data will be delivered to the cpu_mondo queues
+ * of the recipient CPUs.
+ *
+ * In all cases, error or not, the CPUs in the CPU list to which the
+ * mondo has been successfully delivered will be indicated by having
+ * their entry in CPU list updated with the value 0xffff.
+ */
+#define HV_FAST_CPU_MONDO_SEND		0x42
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_cpu_mondo_send(unsigned long cpu_count,
+				   unsigned long cpu_list_pa,
+				   unsigned long mondo_block_pa);
+#endif
+
+/* cpu_myid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_MYID
+ * RET0:	status
+ * RET1:	CPU ID
+ * ERRORS:	No errors defined.
+ *
+ * Return the hypervisor ID handle for the current CPU.  Use by a
+ * virtual CPU to discover it's own identity.
+ */
+#define HV_FAST_CPU_MYID		0x16
+
+/* cpu_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_STATE
+ * ARG0:	CPU ID
+ * RET0:	status
+ * RET1:	state
+ * ERRORS:	ENOCPU		Invalid CPU ID
+ *
+ * Retrieve the current state of the CPU with the given CPU ID.
+ */
+#define HV_FAST_CPU_STATE		0x17
+#define  HV_CPU_STATE_STOPPED		 0x01
+#define  HV_CPU_STATE_RUNNING		 0x02
+#define  HV_CPU_STATE_ERROR		 0x03
+
+#ifndef __ASSEMBLY__
+long sun4v_cpu_state(unsigned long cpuid);
+#endif
+
+/* cpu_set_rtba()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_SET_RTBA
+ * ARG0:	RTBA
+ * RET0:	status
+ * RET1:	previous RTBA
+ * ERRORS:	ENORADDR	Invalid RTBA real address
+ *		EBADALIGN	RTBA is incorrectly aligned for a trap table
+ *
+ * Set the real trap base address of the local cpu to the given RTBA.
+ * The supplied RTBA must be aligned on a 256 byte boundary.  Upon
+ * success the previous value of the RTBA is returned in RET1.
+ *
+ * Note: This service does not affect %tba
+ */
+#define HV_FAST_CPU_SET_RTBA		0x18
+
+/* cpu_set_rtba()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CPU_GET_RTBA
+ * RET0:	status
+ * RET1:	previous RTBA
+ * ERRORS:	No possible error.
+ *
+ * Returns the current value of RTBA in RET1.
+ */
+#define HV_FAST_CPU_GET_RTBA		0x19
+
+/* MMU services.
+ *
+ * Layout of a TSB description for mmu_tsb_ctx{,non}0() calls.
+ */
+#ifndef __ASSEMBLY__
+struct hv_tsb_descr {
+	unsigned short		pgsz_idx;
+	unsigned short		assoc;
+	unsigned int		num_ttes;	/* in TTEs */
+	unsigned int		ctx_idx;
+	unsigned int		pgsz_mask;
+	unsigned long		tsb_base;
+	unsigned long		resv;
+};
+#endif
+#define HV_TSB_DESCR_PGSZ_IDX_OFFSET	0x00
+#define HV_TSB_DESCR_ASSOC_OFFSET	0x02
+#define HV_TSB_DESCR_NUM_TTES_OFFSET	0x04
+#define HV_TSB_DESCR_CTX_IDX_OFFSET	0x08
+#define HV_TSB_DESCR_PGSZ_MASK_OFFSET	0x0c
+#define HV_TSB_DESCR_TSB_BASE_OFFSET	0x10
+#define HV_TSB_DESCR_RESV_OFFSET	0x18
+
+/* Page size bitmask.  */
+#define HV_PGSZ_MASK_8K			(1 << 0)
+#define HV_PGSZ_MASK_64K		(1 << 1)
+#define HV_PGSZ_MASK_512K		(1 << 2)
+#define HV_PGSZ_MASK_4MB		(1 << 3)
+#define HV_PGSZ_MASK_32MB		(1 << 4)
+#define HV_PGSZ_MASK_256MB		(1 << 5)
+#define HV_PGSZ_MASK_2GB		(1 << 6)
+#define HV_PGSZ_MASK_16GB		(1 << 7)
+
+/* Page size index.  The value given in the TSB descriptor must correspond
+ * to the smallest page size specified in the pgsz_mask page size bitmask.
+ */
+#define HV_PGSZ_IDX_8K			0
+#define HV_PGSZ_IDX_64K			1
+#define HV_PGSZ_IDX_512K		2
+#define HV_PGSZ_IDX_4MB			3
+#define HV_PGSZ_IDX_32MB		4
+#define HV_PGSZ_IDX_256MB		5
+#define HV_PGSZ_IDX_2GB			6
+#define HV_PGSZ_IDX_16GB		7
+
+/* MMU fault status area.
+ *
+ * MMU related faults have their status and fault address information
+ * placed into a memory region made available by privileged code.  Each
+ * virtual processor must make a mmu_fault_area_conf() call to tell the
+ * hypervisor where that processor's fault status should be stored.
+ *
+ * The fault status block is a multiple of 64-bytes and must be aligned
+ * on a 64-byte boundary.
+ */
+#ifndef __ASSEMBLY__
+struct hv_fault_status {
+	unsigned long		i_fault_type;
+	unsigned long		i_fault_addr;
+	unsigned long		i_fault_ctx;
+	unsigned long		i_reserved[5];
+	unsigned long		d_fault_type;
+	unsigned long		d_fault_addr;
+	unsigned long		d_fault_ctx;
+	unsigned long		d_reserved[5];
+};
+#endif
+#define HV_FAULT_I_TYPE_OFFSET	0x00
+#define HV_FAULT_I_ADDR_OFFSET	0x08
+#define HV_FAULT_I_CTX_OFFSET	0x10
+#define HV_FAULT_D_TYPE_OFFSET	0x40
+#define HV_FAULT_D_ADDR_OFFSET	0x48
+#define HV_FAULT_D_CTX_OFFSET	0x50
+
+#define HV_FAULT_TYPE_FAST_MISS	1
+#define HV_FAULT_TYPE_FAST_PROT	2
+#define HV_FAULT_TYPE_MMU_MISS	3
+#define HV_FAULT_TYPE_INV_RA	4
+#define HV_FAULT_TYPE_PRIV_VIOL	5
+#define HV_FAULT_TYPE_PROT_VIOL	6
+#define HV_FAULT_TYPE_NFO	7
+#define HV_FAULT_TYPE_NFO_SEFF	8
+#define HV_FAULT_TYPE_INV_VA	9
+#define HV_FAULT_TYPE_INV_ASI	10
+#define HV_FAULT_TYPE_NC_ATOMIC	11
+#define HV_FAULT_TYPE_PRIV_ACT	12
+#define HV_FAULT_TYPE_RESV1	13
+#define HV_FAULT_TYPE_UNALIGNED	14
+#define HV_FAULT_TYPE_INV_PGSZ	15
+#define HV_FAULT_TYPE_MCD	17
+#define HV_FAULT_TYPE_MCD_DIS	18
+/* Values 16 --> -2 are reserved.  */
+#define HV_FAULT_TYPE_MULTIPLE	-1
+
+/* Flags argument for mmu_{map,unmap}_addr(), mmu_demap_{page,context,all}(),
+ * and mmu_{map,unmap}_perm_addr().
+ */
+#define HV_MMU_DMMU			0x01
+#define HV_MMU_IMMU			0x02
+#define HV_MMU_ALL			(HV_MMU_DMMU | HV_MMU_IMMU)
+
+/* mmu_map_addr()
+ * TRAP:	HV_MMU_MAP_ADDR_TRAP
+ * ARG0:	virtual address
+ * ARG1:	mmu context
+ * ARG2:	TTE
+ * ARG3:	flags (HV_MMU_{IMMU,DMMU})
+ * ERRORS:	EINVAL		Invalid virtual address, mmu context, or flags
+ *		EBADPGSZ	Invalid page size value
+ *		ENORADDR	Invalid real address in TTE
+ *
+ * Create a non-permanent mapping using the given TTE, virtual
+ * address, and mmu context.  The flags argument determines which
+ * (data, or instruction, or both) TLB the mapping gets loaded into.
+ *
+ * The behavior is undefined if the valid bit is clear in the TTE.
+ *
+ * Note: This API call is for privileged code to specify temporary translation
+ *       mappings without the need to create and manage a TSB.
+ */
+
+/* mmu_unmap_addr()
+ * TRAP:	HV_MMU_UNMAP_ADDR_TRAP
+ * ARG0:	virtual address
+ * ARG1:	mmu context
+ * ARG2:	flags (HV_MMU_{IMMU,DMMU})
+ * ERRORS:	EINVAL		Invalid virtual address, mmu context, or flags
+ *
+ * Demaps the given virtual address in the given mmu context on this
+ * CPU.  This function is intended to be used to demap pages mapped
+ * with mmu_map_addr.  This service is equivalent to invoking
+ * mmu_demap_page() with only the current CPU in the CPU list. The
+ * flags argument determines which (data, or instruction, or both) TLB
+ * the mapping gets unmapped from.
+ *
+ * Attempting to perform an unmap operation for a previously defined
+ * permanent mapping will have undefined results.
+ */
+
+/* mmu_tsb_ctx0()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_TSB_CTX0
+ * ARG0:	number of TSB descriptions
+ * ARG1:	TSB descriptions pointer
+ * RET0:	status
+ * ERRORS:	ENORADDR		Invalid TSB descriptions pointer or
+ *					TSB base within a descriptor
+ *		EBADALIGN		TSB descriptions pointer is not aligned
+ *					to an 8-byte boundary, or TSB base
+ *					within a descriptor is not aligned for
+ *					the given TSB size
+ *		EBADPGSZ		Invalid page size in a TSB descriptor
+ *		EBADTSB			Invalid associativity or size in a TSB
+ *					descriptor
+ *		EINVAL			Invalid number of TSB descriptions, or
+ *					invalid context index in a TSB
+ *					descriptor, or index page size not
+ *					equal to smallest page size in page
+ *					size bitmask field.
+ *
+ * Configures the TSBs for the current CPU for virtual addresses with
+ * context zero.  The TSB descriptions pointer is a pointer to an
+ * array of the given number of TSB descriptions.
+ *
+ * Note: The maximum number of TSBs available to a virtual CPU is given by the
+ *       mmu-max-#tsbs property of the cpu's corresponding "cpu" node in the
+ *       machine description.
+ */
+#define HV_FAST_MMU_TSB_CTX0		0x20
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_mmu_tsb_ctx0(unsigned long num_descriptions,
+				 unsigned long tsb_desc_ra);
+#endif
+
+/* mmu_tsb_ctxnon0()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_TSB_CTXNON0
+ * ARG0:	number of TSB descriptions
+ * ARG1:	TSB descriptions pointer
+ * RET0:	status
+ * ERRORS:	Same as for mmu_tsb_ctx0() above.
+ *
+ * Configures the TSBs for the current CPU for virtual addresses with
+ * non-zero contexts.  The TSB descriptions pointer is a pointer to an
+ * array of the given number of TSB descriptions.
+ *
+ * Note: A maximum of 16 TSBs may be specified in the TSB description list.
+ */
+#define HV_FAST_MMU_TSB_CTXNON0		0x21
+
+/* mmu_demap_page()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_DEMAP_PAGE
+ * ARG0:	reserved, must be zero
+ * ARG1:	reserved, must be zero
+ * ARG2:	virtual address
+ * ARG3:	mmu context
+ * ARG4:	flags (HV_MMU_{IMMU,DMMU})
+ * RET0:	status
+ * ERRORS:	EINVAL			Invalid virtual address, context, or
+ *					flags value
+ *		ENOTSUPPORTED		ARG0 or ARG1 is non-zero
+ *
+ * Demaps any page mapping of the given virtual address in the given
+ * mmu context for the current virtual CPU.  Any virtually tagged
+ * caches are guaranteed to be kept consistent.  The flags argument
+ * determines which TLB (instruction, or data, or both) participate in
+ * the operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_PAGE		0x22
+
+/* mmu_demap_ctx()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_DEMAP_CTX
+ * ARG0:	reserved, must be zero
+ * ARG1:	reserved, must be zero
+ * ARG2:	mmu context
+ * ARG3:	flags (HV_MMU_{IMMU,DMMU})
+ * RET0:	status
+ * ERRORS:	EINVAL			Invalid context or flags value
+ *		ENOTSUPPORTED		ARG0 or ARG1 is non-zero
+ *
+ * Demaps all non-permanent virtual page mappings previously specified
+ * for the given context for the current virtual CPU.  Any virtual
+ * tagged caches are guaranteed to be kept consistent.  The flags
+ * argument determines which TLB (instruction, or data, or both)
+ * participate in the operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_CTX		0x23
+
+/* mmu_demap_all()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_DEMAP_ALL
+ * ARG0:	reserved, must be zero
+ * ARG1:	reserved, must be zero
+ * ARG2:	flags (HV_MMU_{IMMU,DMMU})
+ * RET0:	status
+ * ERRORS:	EINVAL			Invalid flags value
+ *		ENOTSUPPORTED		ARG0 or ARG1 is non-zero
+ *
+ * Demaps all non-permanent virtual page mappings previously specified
+ * for the current virtual CPU.  Any virtual tagged caches are
+ * guaranteed to be kept consistent.  The flags argument determines
+ * which TLB (instruction, or data, or both) participate in the
+ * operation.
+ *
+ * ARG0 and ARG1 are both reserved and must be set to zero.
+ */
+#define HV_FAST_MMU_DEMAP_ALL		0x24
+
+#ifndef __ASSEMBLY__
+void sun4v_mmu_demap_all(void);
+#endif
+
+/* mmu_map_perm_addr()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_MAP_PERM_ADDR
+ * ARG0:	virtual address
+ * ARG1:	reserved, must be zero
+ * ARG2:	TTE
+ * ARG3:	flags (HV_MMU_{IMMU,DMMU})
+ * RET0:	status
+ * ERRORS:	EINVAL			Invalid virtual address or flags value
+ *		EBADPGSZ		Invalid page size value
+ *		ENORADDR		Invalid real address in TTE
+ *		ETOOMANY		Too many mappings (max of 8 reached)
+ *
+ * Create a permanent mapping using the given TTE and virtual address
+ * for context 0 on the calling virtual CPU.  A maximum of 8 such
+ * permanent mappings may be specified by privileged code.  Mappings
+ * may be removed with mmu_unmap_perm_addr().
+ *
+ * The behavior is undefined if a TTE with the valid bit clear is given.
+ *
+ * Note: This call is used to specify address space mappings for which
+ *       privileged code does not expect to receive misses.  For example,
+ *       this mechanism can be used to map kernel nucleus code and data.
+ */
+#define HV_FAST_MMU_MAP_PERM_ADDR	0x25
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_mmu_map_perm_addr(unsigned long vaddr,
+				      unsigned long set_to_zero,
+				      unsigned long tte,
+				      unsigned long flags);
+#endif
+
+/* mmu_fault_area_conf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_FAULT_AREA_CONF
+ * ARG0:	real address
+ * RET0:	status
+ * RET1:	previous mmu fault area real address
+ * ERRORS:	ENORADDR		Invalid real address
+ *		EBADALIGN		Invalid alignment for fault area
+ *
+ * Configure the MMU fault status area for the calling CPU.  A 64-byte
+ * aligned real address specifies where MMU fault status information
+ * is placed.  The return value is the previously specified area, or 0
+ * for the first invocation.  Specifying a fault area at real address
+ * 0 is not allowed.
+ */
+#define HV_FAST_MMU_FAULT_AREA_CONF	0x26
+
+/* mmu_enable()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_ENABLE
+ * ARG0:	enable flag
+ * ARG1:	return target address
+ * RET0:	status
+ * ERRORS:	ENORADDR		Invalid real address when disabling
+ *					translation.
+ *		EBADALIGN		The return target address is not
+ *					aligned to an instruction.
+ *		EINVAL			The enable flag request the current
+ *					operating mode (e.g. disable if already
+ *					disabled)
+ *
+ * Enable or disable virtual address translation for the calling CPU
+ * within the virtual machine domain.  If the enable flag is zero,
+ * translation is disabled, any non-zero value will enable
+ * translation.
+ *
+ * When this function returns, the newly selected translation mode
+ * will be active.  If the mmu is being enabled, then the return
+ * target address is a virtual address else it is a real address.
+ *
+ * Upon successful completion, control will be returned to the given
+ * return target address (ie. the cpu will jump to that address).  On
+ * failure, the previous mmu mode remains and the trap simply returns
+ * as normal with the appropriate error code in RET0.
+ */
+#define HV_FAST_MMU_ENABLE		0x27
+
+/* mmu_unmap_perm_addr()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_UNMAP_PERM_ADDR
+ * ARG0:	virtual address
+ * ARG1:	reserved, must be zero
+ * ARG2:	flags (HV_MMU_{IMMU,DMMU})
+ * RET0:	status
+ * ERRORS:	EINVAL			Invalid virtual address or flags value
+ *		ENOMAP			Specified mapping was not found
+ *
+ * Demaps any permanent page mapping (established via
+ * mmu_map_perm_addr()) at the given virtual address for context 0 on
+ * the current virtual CPU.  Any virtual tagged caches are guaranteed
+ * to be kept consistent.
+ */
+#define HV_FAST_MMU_UNMAP_PERM_ADDR	0x28
+
+/* mmu_tsb_ctx0_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_TSB_CTX0_INFO
+ * ARG0:	max TSBs
+ * ARG1:	buffer pointer
+ * RET0:	status
+ * RET1:	number of TSBs
+ * ERRORS:	EINVAL			Supplied buffer is too small
+ *		EBADALIGN		The buffer pointer is badly aligned
+ *		ENORADDR		Invalid real address for buffer pointer
+ *
+ * Return the TSB configuration as previous defined by mmu_tsb_ctx0()
+ * into the provided buffer.  The size of the buffer is given in ARG1
+ * in terms of the number of TSB description entries.
+ *
+ * Upon return, RET1 always contains the number of TSB descriptions
+ * previously configured.  If zero TSBs were configured, EOK is
+ * returned with RET1 containing 0.
+ */
+#define HV_FAST_MMU_TSB_CTX0_INFO	0x29
+
+/* mmu_tsb_ctxnon0_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_TSB_CTXNON0_INFO
+ * ARG0:	max TSBs
+ * ARG1:	buffer pointer
+ * RET0:	status
+ * RET1:	number of TSBs
+ * ERRORS:	EINVAL			Supplied buffer is too small
+ *		EBADALIGN		The buffer pointer is badly aligned
+ *		ENORADDR		Invalid real address for buffer pointer
+ *
+ * Return the TSB configuration as previous defined by
+ * mmu_tsb_ctxnon0() into the provided buffer.  The size of the buffer
+ * is given in ARG1 in terms of the number of TSB description entries.
+ *
+ * Upon return, RET1 always contains the number of TSB descriptions
+ * previously configured.  If zero TSBs were configured, EOK is
+ * returned with RET1 containing 0.
+ */
+#define HV_FAST_MMU_TSB_CTXNON0_INFO	0x2a
+
+/* mmu_fault_area_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMU_FAULT_AREA_INFO
+ * RET0:	status
+ * RET1:	fault area real address
+ * ERRORS:	No errors defined.
+ *
+ * Return the currently defined MMU fault status area for the current
+ * CPU.  The real address of the fault status area is returned in
+ * RET1, or 0 is returned in RET1 if no fault status area is defined.
+ *
+ * Note: mmu_fault_area_conf() may be called with the return value (RET1)
+ *       from this service if there is a need to save and restore the fault
+ *	 area for a cpu.
+ */
+#define HV_FAST_MMU_FAULT_AREA_INFO	0x2b
+
+/* Cache and Memory services. */
+
+/* mem_scrub()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MEM_SCRUB
+ * ARG0:	real address
+ * ARG1:	length
+ * RET0:	status
+ * RET1:	length scrubbed
+ * ERRORS:	ENORADDR	Invalid real address
+ *		EBADALIGN	Start address or length are not correctly
+ *				aligned
+ *		EINVAL		Length is zero
+ *
+ * Zero the memory contents in the range real address to real address
+ * plus length minus 1.  Also, valid ECC will be generated for that
+ * memory address range.  Scrubbing is started at the given real
+ * address, but may not scrub the entire given length.  The actual
+ * length scrubbed will be returned in RET1.
+ *
+ * The real address and length must be aligned on an 8K boundary, or
+ * contain the start address and length from a sun4v error report.
+ *
+ * Note: There are two uses for this function.  The first use is to block clear
+ *       and initialize memory and the second is to scrub an u ncorrectable
+ *       error reported via a resumable or non-resumable trap.  The second
+ *       use requires the arguments to be equal to the real address and length
+ *       provided in a sun4v memory error report.
+ */
+#define HV_FAST_MEM_SCRUB		0x31
+
+/* mem_sync()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MEM_SYNC
+ * ARG0:	real address
+ * ARG1:	length
+ * RET0:	status
+ * RET1:	length synced
+ * ERRORS:	ENORADDR	Invalid real address
+ *		EBADALIGN	Start address or length are not correctly
+ *				aligned
+ *		EINVAL		Length is zero
+ *
+ * Force the next access within the real address to real address plus
+ * length minus 1 to be fetches from main system memory.  Less than
+ * the given length may be synced, the actual amount synced is
+ * returned in RET1.  The real address and length must be aligned on
+ * an 8K boundary.
+ */
+#define HV_FAST_MEM_SYNC		0x32
+
+/* Coprocessor services
+ *
+ * M7 and later processors provide an on-chip coprocessor which
+ * accelerates database operations, and is known internally as
+ * DAX.
+ */
+
+/* ccb_submit()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_CCB_SUBMIT
+ * ARG0:	address of CCB array
+ * ARG1:	size (in bytes) of CCB array being submitted
+ * ARG2:	flags
+ * ARG3:	reserved
+ * RET0:	status (success or error code)
+ * RET1:	size (in bytes) of CCB array that was accepted (might be less
+ *		than arg1)
+ * RET2:	status data
+ *		if status == ENOMAP or ENOACCESS, identifies the VA in question
+ *		if status == EUNAVAILBLE, unavailable code
+ * RET3:	reserved
+ *
+ * ERRORS:	EOK		successful submission (check size)
+ *		EWOULDBLOCK	could not finish submissions, try again
+ *		EBADALIGN	array not 64B aligned or size not 64B multiple
+ *		ENORADDR	invalid RA for array or in CCB
+ *		ENOMAP		could not translate address (see status data)
+ *		EINVAL		invalid ccb or arguments
+ *		ETOOMANY	too many ccbs with all-or-nothing flag
+ *		ENOACCESS	guest has no access to submit ccbs or address
+ *				in CCB does not have correct permissions (check
+ *				status data)
+ *		EUNAVAILABLE	ccb operation could not be performed at this
+ *				time (check status data)
+ *				Status data codes:
+ *					0 - exact CCB could not be executed
+ *					1 - CCB opcode cannot be executed
+ *					2 - CCB version cannot be executed
+ *					3 - vcpu cannot execute CCBs
+ *					4 - no CCBs can be executed
+ */
+
+#define HV_CCB_SUBMIT               0x34
+#ifndef __ASSEMBLY__
+unsigned long sun4v_ccb_submit(unsigned long ccb_buf,
+			       unsigned long len,
+			       unsigned long flags,
+			       unsigned long reserved,
+			       void *submitted_len,
+			       void *status_data);
+#endif
+
+/* flags (ARG2) */
+#define HV_CCB_QUERY_CMD		BIT(1)
+#define HV_CCB_ARG0_TYPE_REAL		0UL
+#define HV_CCB_ARG0_TYPE_PRIMARY	BIT(4)
+#define HV_CCB_ARG0_TYPE_SECONDARY	BIT(5)
+#define HV_CCB_ARG0_TYPE_NUCLEUS	GENMASK(5, 4)
+#define HV_CCB_ARG0_PRIVILEGED		BIT(6)
+#define HV_CCB_ALL_OR_NOTHING		BIT(7)
+#define HV_CCB_QUEUE_INFO		BIT(8)
+#define HV_CCB_VA_REJECT		0UL
+#define HV_CCB_VA_SECONDARY		BIT(13)
+#define HV_CCB_VA_NUCLEUS		GENMASK(13, 12)
+#define HV_CCB_VA_PRIVILEGED		BIT(14)
+#define HV_CCB_VA_READ_ADI_DISABLE	BIT(15)	/* DAX2 only */
+
+/* ccb_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_CCB_INFO
+ * ARG0:	real address of CCB completion area
+ * RET0:	status (success or error code)
+ * RET1:	info array
+ *			- RET1[0]: CCB state
+ *			- RET1[1]: dax unit
+ *			- RET1[2]: queue number
+ *			- RET1[3]: queue position
+ *
+ * ERRORS:	EOK		operation successful
+ *		EBADALIGN	address not 64B aligned
+ *		ENORADDR	RA in address not valid
+ *		EINVAL		CA not valid
+ *		EWOULDBLOCK	info not available for this CCB currently, try
+ *				again
+ *		ENOACCESS	guest cannot use dax
+ */
+
+#define HV_CCB_INFO                 0x35
+#ifndef __ASSEMBLY__
+unsigned long sun4v_ccb_info(unsigned long ca,
+			     void *info_arr);
+#endif
+
+/* info array byte offsets (RET1) */
+#define CCB_INFO_OFFSET_CCB_STATE	0
+#define CCB_INFO_OFFSET_DAX_UNIT	2
+#define CCB_INFO_OFFSET_QUEUE_NUM	4
+#define CCB_INFO_OFFSET_QUEUE_POS	6
+
+/* CCB state (RET1[0]) */
+#define HV_CCB_STATE_COMPLETED      0
+#define HV_CCB_STATE_ENQUEUED       1
+#define HV_CCB_STATE_INPROGRESS     2
+#define HV_CCB_STATE_NOTFOUND       3
+
+/* ccb_kill()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_CCB_KILL
+ * ARG0:	real address of CCB completion area
+ * RET0:	status (success or error code)
+ * RET1:	CCB kill status
+ *
+ * ERRORS:	EOK		operation successful
+ *		EBADALIGN	address not 64B aligned
+ *		ENORADDR	RA in address not valid
+ *		EINVAL		CA not valid
+ *		EWOULDBLOCK	kill not available for this CCB currently, try
+ *				again
+ *		ENOACCESS	guest cannot use dax
+ */
+
+#define HV_CCB_KILL                 0x36
+#ifndef __ASSEMBLY__
+unsigned long sun4v_ccb_kill(unsigned long ca,
+			     void *kill_status);
+#endif
+
+/* CCB kill status (RET1) */
+#define HV_CCB_KILL_COMPLETED       0
+#define HV_CCB_KILL_DEQUEUED        1
+#define HV_CCB_KILL_KILLED          2
+#define HV_CCB_KILL_NOTFOUND        3
+
+/* Time of day services.
+ *
+ * The hypervisor maintains the time of day on a per-domain basis.
+ * Changing the time of day in one domain does not affect the time of
+ * day on any other domain.
+ *
+ * Time is described by a single unsigned 64-bit word which is the
+ * number of seconds since the UNIX Epoch (00:00:00 UTC, January 1,
+ * 1970).
+ */
+
+/* tod_get()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_TOD_GET
+ * RET0:	status
+ * RET1:	TOD
+ * ERRORS:	EWOULDBLOCK	TOD resource is temporarily unavailable
+ *		ENOTSUPPORTED	If TOD not supported on this platform
+ *
+ * Return the current time of day.  May block if TOD access is
+ * temporarily not possible.
+ */
+#define HV_FAST_TOD_GET			0x50
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_tod_get(unsigned long *time);
+#endif
+
+/* tod_set()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_TOD_SET
+ * ARG0:	TOD
+ * RET0:	status
+ * ERRORS:	EWOULDBLOCK	TOD resource is temporarily unavailable
+ *		ENOTSUPPORTED	If TOD not supported on this platform
+ *
+ * The current time of day is set to the value specified in ARG0.  May
+ * block if TOD access is temporarily not possible.
+ */
+#define HV_FAST_TOD_SET			0x51
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_tod_set(unsigned long time);
+#endif
+
+/* Console services */
+
+/* con_getchar()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CONS_GETCHAR
+ * RET0:	status
+ * RET1:	character
+ * ERRORS:	EWOULDBLOCK	No character available.
+ *
+ * Returns a character from the console device.  If no character is
+ * available then an EWOULDBLOCK error is returned.  If a character is
+ * available, then the returned status is EOK and the character value
+ * is in RET1.
+ *
+ * A virtual BREAK is represented by the 64-bit value -1.
+ *
+ * A virtual HUP signal is represented by the 64-bit value -2.
+ */
+#define HV_FAST_CONS_GETCHAR		0x60
+
+/* con_putchar()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CONS_PUTCHAR
+ * ARG0:	character
+ * RET0:	status
+ * ERRORS:	EINVAL		Illegal character
+ *		EWOULDBLOCK	Output buffer currently full, would block
+ *
+ * Send a character to the console device.  Only character values
+ * between 0 and 255 may be used.  Values outside this range are
+ * invalid except for the 64-bit value -1 which is used to send a
+ * virtual BREAK.
+ */
+#define HV_FAST_CONS_PUTCHAR		0x61
+
+/* con_read()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CONS_READ
+ * ARG0:	buffer real address
+ * ARG1:	buffer size in bytes
+ * RET0:	status
+ * RET1:	bytes read or BREAK or HUP
+ * ERRORS:	EWOULDBLOCK	No character available.
+ *
+ * Reads characters into a buffer from the console device.  If no
+ * character is available then an EWOULDBLOCK error is returned.
+ * If a character is available, then the returned status is EOK
+ * and the number of bytes read into the given buffer is provided
+ * in RET1.
+ *
+ * A virtual BREAK is represented by the 64-bit RET1 value -1.
+ *
+ * A virtual HUP signal is represented by the 64-bit RET1 value -2.
+ *
+ * If BREAK or HUP are indicated, no bytes were read into buffer.
+ */
+#define HV_FAST_CONS_READ		0x62
+
+/* con_write()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_CONS_WRITE
+ * ARG0:	buffer real address
+ * ARG1:	buffer size in bytes
+ * RET0:	status
+ * RET1:	bytes written
+ * ERRORS:	EWOULDBLOCK	Output buffer currently full, would block
+ *
+ * Send a characters in buffer to the console device.  Breaks must be
+ * sent using con_putchar().
+ */
+#define HV_FAST_CONS_WRITE		0x63
+
+#ifndef __ASSEMBLY__
+long sun4v_con_getchar(long *status);
+long sun4v_con_putchar(long c);
+long sun4v_con_read(unsigned long buffer,
+		    unsigned long size,
+		    unsigned long *bytes_read);
+unsigned long sun4v_con_write(unsigned long buffer,
+			      unsigned long size,
+			      unsigned long *bytes_written);
+#endif
+
+/* mach_set_soft_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_SET_SOFT_STATE
+ * ARG0:	software state
+ * ARG1:	software state description pointer
+ * RET0:	status
+ * ERRORS:	EINVAL		software state not valid or software state
+ *				description is not NULL terminated
+ *		ENORADDR	software state description pointer is not a
+ *				valid real address
+ *		EBADALIGNED	software state description is not correctly
+ *				aligned
+ *
+ * This allows the guest to report it's soft state to the hypervisor.  There
+ * are two primary components to this state.  The first part states whether
+ * the guest software is running or not.  The second containts optional
+ * details specific to the software.
+ *
+ * The software state argument is defined below in HV_SOFT_STATE_*, and
+ * indicates whether the guest is operating normally or in a transitional
+ * state.
+ *
+ * The software state description argument is a real address of a data buffer
+ * of size 32-bytes aligned on a 32-byte boundary.  It is treated as a NULL
+ * terminated 7-bit ASCII string of up to 31 characters not including the
+ * NULL termination.
+ */
+#define HV_FAST_MACH_SET_SOFT_STATE	0x70
+#define  HV_SOFT_STATE_NORMAL		 0x01
+#define  HV_SOFT_STATE_TRANSITION	 0x02
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_mach_set_soft_state(unsigned long soft_state,
+				        unsigned long msg_string_ra);
+#endif
+
+/* mach_get_soft_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MACH_GET_SOFT_STATE
+ * ARG0:	software state description pointer
+ * RET0:	status
+ * RET1:	software state
+ * ERRORS:	ENORADDR	software state description pointer is not a
+ *				valid real address
+ *		EBADALIGNED	software state description is not correctly
+ *				aligned
+ *
+ * Retrieve the current value of the guest's software state.  The rules
+ * for the software state pointer are the same as for mach_set_soft_state()
+ * above.
+ */
+#define HV_FAST_MACH_GET_SOFT_STATE	0x71
+
+/* svc_send()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_SVC_SEND
+ * ARG0:	service ID
+ * ARG1:	buffer real address
+ * ARG2:	buffer size
+ * RET0:	STATUS
+ * RET1:	sent_bytes
+ *
+ * Be careful, all output registers are clobbered by this operation,
+ * so for example it is not possible to save away a value in %o4
+ * across the trap.
+ */
+#define HV_FAST_SVC_SEND		0x80
+
+/* svc_recv()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_SVC_RECV
+ * ARG0:	service ID
+ * ARG1:	buffer real address
+ * ARG2:	buffer size
+ * RET0:	STATUS
+ * RET1:	recv_bytes
+ *
+ * Be careful, all output registers are clobbered by this operation,
+ * so for example it is not possible to save away a value in %o4
+ * across the trap.
+ */
+#define HV_FAST_SVC_RECV		0x81
+
+/* svc_getstatus()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_SVC_GETSTATUS
+ * ARG0:	service ID
+ * RET0:	STATUS
+ * RET1:	status bits
+ */
+#define HV_FAST_SVC_GETSTATUS		0x82
+
+/* svc_setstatus()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_SVC_SETSTATUS
+ * ARG0:	service ID
+ * ARG1:	bits to set
+ * RET0:	STATUS
+ */
+#define HV_FAST_SVC_SETSTATUS		0x83
+
+/* svc_clrstatus()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_SVC_CLRSTATUS
+ * ARG0:	service ID
+ * ARG1:	bits to clear
+ * RET0:	STATUS
+ */
+#define HV_FAST_SVC_CLRSTATUS		0x84
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_svc_send(unsigned long svc_id,
+			     unsigned long buffer,
+			     unsigned long buffer_size,
+			     unsigned long *sent_bytes);
+unsigned long sun4v_svc_recv(unsigned long svc_id,
+			     unsigned long buffer,
+			     unsigned long buffer_size,
+			     unsigned long *recv_bytes);
+unsigned long sun4v_svc_getstatus(unsigned long svc_id,
+				  unsigned long *status_bits);
+unsigned long sun4v_svc_setstatus(unsigned long svc_id,
+				  unsigned long status_bits);
+unsigned long sun4v_svc_clrstatus(unsigned long svc_id,
+				  unsigned long status_bits);
+#endif
+
+/* Trap trace services.
+ *
+ * The hypervisor provides a trap tracing capability for privileged
+ * code running on each virtual CPU.  Privileged code provides a
+ * round-robin trap trace queue within which the hypervisor writes
+ * 64-byte entries detailing hyperprivileged traps taken n behalf of
+ * privileged code.  This is provided as a debugging capability for
+ * privileged code.
+ *
+ * The trap trace control structure is 64-bytes long and placed at the
+ * start (offset 0) of the trap trace buffer, and is described as
+ * follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_trap_trace_control {
+	unsigned long		head_offset;
+	unsigned long		tail_offset;
+	unsigned long		__reserved[0x30 / sizeof(unsigned long)];
+};
+#endif
+#define HV_TRAP_TRACE_CTRL_HEAD_OFFSET	0x00
+#define HV_TRAP_TRACE_CTRL_TAIL_OFFSET	0x08
+
+/* The head offset is the offset of the most recently completed entry
+ * in the trap-trace buffer.  The tail offset is the offset of the
+ * next entry to be written.  The control structure is owned and
+ * modified by the hypervisor.  A guest may not modify the control
+ * structure contents.  Attempts to do so will result in undefined
+ * behavior for the guest.
+ *
+ * Each trap trace buffer entry is laid out as follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_trap_trace_entry {
+	unsigned char	type;		/* Hypervisor or guest entry?	*/
+	unsigned char	hpstate;	/* Hyper-privileged state	*/
+	unsigned char	tl;		/* Trap level			*/
+	unsigned char	gl;		/* Global register level	*/
+	unsigned short	tt;		/* Trap type			*/
+	unsigned short	tag;		/* Extended trap identifier	*/
+	unsigned long	tstate;		/* Trap state			*/
+	unsigned long	tick;		/* Tick				*/
+	unsigned long	tpc;		/* Trap PC			*/
+	unsigned long	f1;		/* Entry specific		*/
+	unsigned long	f2;		/* Entry specific		*/
+	unsigned long	f3;		/* Entry specific		*/
+	unsigned long	f4;		/* Entry specific		*/
+};
+#endif
+#define HV_TRAP_TRACE_ENTRY_TYPE	0x00
+#define HV_TRAP_TRACE_ENTRY_HPSTATE	0x01
+#define HV_TRAP_TRACE_ENTRY_TL		0x02
+#define HV_TRAP_TRACE_ENTRY_GL		0x03
+#define HV_TRAP_TRACE_ENTRY_TT		0x04
+#define HV_TRAP_TRACE_ENTRY_TAG		0x06
+#define HV_TRAP_TRACE_ENTRY_TSTATE	0x08
+#define HV_TRAP_TRACE_ENTRY_TICK	0x10
+#define HV_TRAP_TRACE_ENTRY_TPC		0x18
+#define HV_TRAP_TRACE_ENTRY_F1		0x20
+#define HV_TRAP_TRACE_ENTRY_F2		0x28
+#define HV_TRAP_TRACE_ENTRY_F3		0x30
+#define HV_TRAP_TRACE_ENTRY_F4		0x38
+
+/* The type field is encoded as follows.  */
+#define HV_TRAP_TYPE_UNDEF		0x00 /* Entry content undefined     */
+#define HV_TRAP_TYPE_HV			0x01 /* Hypervisor trap entry       */
+#define HV_TRAP_TYPE_GUEST		0xff /* Added via ttrace_addentry() */
+
+/* ttrace_buf_conf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_TTRACE_BUF_CONF
+ * ARG0:	real address
+ * ARG1:	number of entries
+ * RET0:	status
+ * RET1:	number of entries
+ * ERRORS:	ENORADDR	Invalid real address
+ *		EINVAL		Size is too small
+ *		EBADALIGN	Real address not aligned on 64-byte boundary
+ *
+ * Requests hypervisor trap tracing and declares a virtual CPU's trap
+ * trace buffer to the hypervisor.  The real address supplies the real
+ * base address of the trap trace queue and must be 64-byte aligned.
+ * Specifying a value of 0 for the number of entries disables trap
+ * tracing for the calling virtual CPU.  The buffer allocated must be
+ * sized for a power of two number of 64-byte trap trace entries plus
+ * an initial 64-byte control structure.
+ *
+ * This may be invoked any number of times so that a virtual CPU may
+ * relocate a trap trace buffer or create "snapshots" of information.
+ *
+ * If the real address is illegal or badly aligned, then trap tracing
+ * is disabled and an error is returned.
+ *
+ * Upon failure with EINVAL, this service call returns in RET1 the
+ * minimum number of buffer entries required.  Upon other failures
+ * RET1 is undefined.
+ */
+#define HV_FAST_TTRACE_BUF_CONF		0x90
+
+/* ttrace_buf_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_TTRACE_BUF_INFO
+ * RET0:	status
+ * RET1:	real address
+ * RET2:	size
+ * ERRORS:	None defined.
+ *
+ * Returns the size and location of the previously declared trap-trace
+ * buffer.  In the event that no buffer was previously defined, or the
+ * buffer is disabled, this call will return a size of zero bytes.
+ */
+#define HV_FAST_TTRACE_BUF_INFO		0x91
+
+/* ttrace_enable()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_TTRACE_ENABLE
+ * ARG0:	enable
+ * RET0:	status
+ * RET1:	previous enable state
+ * ERRORS:	EINVAL		No trap trace buffer currently defined
+ *
+ * Enable or disable trap tracing, and return the previous enabled
+ * state in RET1.  Future systems may define various flags for the
+ * enable argument (ARG0), for the moment a guest should pass
+ * "(uint64_t) -1" to enable, and "(uint64_t) 0" to disable all
+ * tracing - which will ensure future compatibility.
+ */
+#define HV_FAST_TTRACE_ENABLE		0x92
+
+/* ttrace_freeze()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_TTRACE_FREEZE
+ * ARG0:	freeze
+ * RET0:	status
+ * RET1:	previous freeze state
+ * ERRORS:	EINVAL		No trap trace buffer currently defined
+ *
+ * Freeze or unfreeze trap tracing, returning the previous freeze
+ * state in RET1.  A guest should pass a non-zero value to freeze and
+ * a zero value to unfreeze all tracing.  The returned previous state
+ * is 0 for not frozen and 1 for frozen.
+ */
+#define HV_FAST_TTRACE_FREEZE		0x93
+
+/* ttrace_addentry()
+ * TRAP:	HV_TTRACE_ADDENTRY_TRAP
+ * ARG0:	tag (16-bits)
+ * ARG1:	data word 0
+ * ARG2:	data word 1
+ * ARG3:	data word 2
+ * ARG4:	data word 3
+ * RET0:	status
+ * ERRORS:	EINVAL		No trap trace buffer currently defined
+ *
+ * Add an entry to the trap trace buffer.  Upon return only ARG0/RET0
+ * is modified - none of the other registers holding arguments are
+ * volatile across this hypervisor service.
+ */
+
+/* Core dump services.
+ *
+ * Since the hypervisor viraulizes and thus obscures a lot of the
+ * physical machine layout and state, traditional OS crash dumps can
+ * be difficult to diagnose especially when the problem is a
+ * configuration error of some sort.
+ *
+ * The dump services provide an opaque buffer into which the
+ * hypervisor can place it's internal state in order to assist in
+ * debugging such situations.  The contents are opaque and extremely
+ * platform and hypervisor implementation specific.  The guest, during
+ * a core dump, requests that the hypervisor update any information in
+ * the dump buffer in preparation to being dumped as part of the
+ * domain's memory image.
+ */
+
+/* dump_buf_update()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_DUMP_BUF_UPDATE
+ * ARG0:	real address
+ * ARG1:	size
+ * RET0:	status
+ * RET1:	required size of dump buffer
+ * ERRORS:	ENORADDR	Invalid real address
+ *		EBADALIGN	Real address is not aligned on a 64-byte
+ *				boundary
+ *		EINVAL		Size is non-zero but less than minimum size
+ *				required
+ *		ENOTSUPPORTED	Operation not supported on current logical
+ *				domain
+ *
+ * Declare a domain dump buffer to the hypervisor.  The real address
+ * provided for the domain dump buffer must be 64-byte aligned.  The
+ * size specifies the size of the dump buffer and may be larger than
+ * the minimum size specified in the machine description.  The
+ * hypervisor will fill the dump buffer with opaque data.
+ *
+ * Note: A guest may elect to include dump buffer contents as part of a crash
+ *       dump to assist with debugging.  This function may be called any number
+ *       of times so that a guest may relocate a dump buffer, or create
+ *       "snapshots" of any dump-buffer information.  Each call to
+ *       dump_buf_update() atomically declares the new dump buffer to the
+ *       hypervisor.
+ *
+ * A specified size of 0 unconfigures the dump buffer.  If the real
+ * address is illegal or badly aligned, then any currently active dump
+ * buffer is disabled and an error is returned.
+ *
+ * In the event that the call fails with EINVAL, RET1 contains the
+ * minimum size requires by the hypervisor for a valid dump buffer.
+ */
+#define HV_FAST_DUMP_BUF_UPDATE		0x94
+
+/* dump_buf_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_DUMP_BUF_INFO
+ * RET0:	status
+ * RET1:	real address of current dump buffer
+ * RET2:	size of current dump buffer
+ * ERRORS:	No errors defined.
+ *
+ * Return the currently configures dump buffer description.  A
+ * returned size of 0 bytes indicates an undefined dump buffer.  In
+ * this case the return address in RET1 is undefined.
+ */
+#define HV_FAST_DUMP_BUF_INFO		0x95
+
+/* Device interrupt services.
+ *
+ * Device interrupts are allocated to system bus bridges by the hypervisor,
+ * and described to OBP in the machine description.  OBP then describes
+ * these interrupts to the OS via properties in the device tree.
+ *
+ * Terminology:
+ *
+ *	cpuid		Unique opaque value which represents a target cpu.
+ *
+ *	devhandle	Device handle.  It uniquely identifies a device, and
+ *			consistes of the lower 28-bits of the hi-cell of the
+ *			first entry of the device's "reg" property in the
+ *			OBP device tree.
+ *
+ *	devino		Device interrupt number.  Specifies the relative
+ *			interrupt number within the device.  The unique
+ *			combination of devhandle and devino are used to
+ *			identify a specific device interrupt.
+ *
+ *			Note: The devino value is the same as the values in the
+ *			      "interrupts" property or "interrupt-map" property
+ *			      in the OBP device tree for that device.
+ *
+ *	sysino		System interrupt number.  A 64-bit unsigned interger
+ *			representing a unique interrupt within a virtual
+ *			machine.
+ *
+ *	intr_state	A flag representing the interrupt state for a given
+ *			sysino.  The state values are defined below.
+ *
+ *	intr_enabled	A flag representing the 'enabled' state for a given
+ *			sysino.  The enable values are defined below.
+ */
+
+#define HV_INTR_STATE_IDLE		0 /* Nothing pending */
+#define HV_INTR_STATE_RECEIVED		1 /* Interrupt received by hardware */
+#define HV_INTR_STATE_DELIVERED		2 /* Interrupt delivered to queue */
+
+#define HV_INTR_DISABLED		0 /* sysino not enabled */
+#define HV_INTR_ENABLED			1 /* sysino enabled */
+
+/* intr_devino_to_sysino()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_INTR_DEVINO2SYSINO
+ * ARG0:	devhandle
+ * ARG1:	devino
+ * RET0:	status
+ * RET1:	sysino
+ * ERRORS:	EINVAL		Invalid devhandle/devino
+ *
+ * Converts a device specific interrupt number of the given
+ * devhandle/devino into a system specific ino (sysino).
+ */
+#define HV_FAST_INTR_DEVINO2SYSINO	0xa0
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_devino_to_sysino(unsigned long devhandle,
+				     unsigned long devino);
+#endif
+
+/* intr_getenabled()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_INTR_GETENABLED
+ * ARG0:	sysino
+ * RET0:	status
+ * RET1:	intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ * ERRORS:	EINVAL		Invalid sysino
+ *
+ * Returns interrupt enabled state in RET1 for the interrupt defined
+ * by the given sysino.
+ */
+#define HV_FAST_INTR_GETENABLED		0xa1
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_intr_getenabled(unsigned long sysino);
+#endif
+
+/* intr_setenabled()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_INTR_SETENABLED
+ * ARG0:	sysino
+ * ARG1:	intr_enabled (HV_INTR_{DISABLED,ENABLED})
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid sysino or intr_enabled value
+ *
+ * Set the 'enabled' state of the interrupt sysino.
+ */
+#define HV_FAST_INTR_SETENABLED		0xa2
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_intr_setenabled(unsigned long sysino,
+				    unsigned long intr_enabled);
+#endif
+
+/* intr_getstate()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_INTR_GETSTATE
+ * ARG0:	sysino
+ * RET0:	status
+ * RET1:	intr_state (HV_INTR_STATE_*)
+ * ERRORS:	EINVAL		Invalid sysino
+ *
+ * Returns current state of the interrupt defined by the given sysino.
+ */
+#define HV_FAST_INTR_GETSTATE		0xa3
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_intr_getstate(unsigned long sysino);
+#endif
+
+/* intr_setstate()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_INTR_SETSTATE
+ * ARG0:	sysino
+ * ARG1:	intr_state (HV_INTR_STATE_*)
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid sysino or intr_state value
+ *
+ * Sets the current state of the interrupt described by the given sysino
+ * value.
+ *
+ * Note: Setting the state to HV_INTR_STATE_IDLE clears any pending
+ *       interrupt for sysino.
+ */
+#define HV_FAST_INTR_SETSTATE		0xa4
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_intr_setstate(unsigned long sysino, unsigned long intr_state);
+#endif
+
+/* intr_gettarget()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_INTR_GETTARGET
+ * ARG0:	sysino
+ * RET0:	status
+ * RET1:	cpuid
+ * ERRORS:	EINVAL		Invalid sysino
+ *
+ * Returns CPU that is the current target of the interrupt defined by
+ * the given sysino.  The CPU value returned is undefined if the target
+ * has not been set via intr_settarget().
+ */
+#define HV_FAST_INTR_GETTARGET		0xa5
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_intr_gettarget(unsigned long sysino);
+#endif
+
+/* intr_settarget()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_INTR_SETTARGET
+ * ARG0:	sysino
+ * ARG1:	cpuid
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid sysino
+ *		ENOCPU		Invalid cpuid
+ *
+ * Set the target CPU for the interrupt defined by the given sysino.
+ */
+#define HV_FAST_INTR_SETTARGET		0xa6
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_intr_settarget(unsigned long sysino, unsigned long cpuid);
+#endif
+
+/* vintr_get_cookie()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_COOKIE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	cookie
+ */
+#define HV_FAST_VINTR_GET_COOKIE	0xa7
+
+/* vintr_set_cookie()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_COOKIE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	cookie
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_COOKIE	0xa8
+
+/* vintr_get_valid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_VALID
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	valid state
+ */
+#define HV_FAST_VINTR_GET_VALID		0xa9
+
+/* vintr_set_valid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_VALID
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	valid state
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_VALID		0xaa
+
+/* vintr_get_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_STATE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	state
+ */
+#define HV_FAST_VINTR_GET_STATE		0xab
+
+/* vintr_set_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_STATE
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	state
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_STATE		0xac
+
+/* vintr_get_target()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_GET_TARGET
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * RET0:	status
+ * RET1:	cpuid
+ */
+#define HV_FAST_VINTR_GET_TARGET	0xad
+
+/* vintr_set_target()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_VINTR_SET_TARGET
+ * ARG0:	device handle
+ * ARG1:	device ino
+ * ARG2:	cpuid
+ * RET0:	status
+ */
+#define HV_FAST_VINTR_SET_TARGET	0xae
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_vintr_get_cookie(unsigned long dev_handle,
+				     unsigned long dev_ino,
+				     unsigned long *cookie);
+unsigned long sun4v_vintr_set_cookie(unsigned long dev_handle,
+				     unsigned long dev_ino,
+				     unsigned long cookie);
+unsigned long sun4v_vintr_get_valid(unsigned long dev_handle,
+				    unsigned long dev_ino,
+				    unsigned long *valid);
+unsigned long sun4v_vintr_set_valid(unsigned long dev_handle,
+				    unsigned long dev_ino,
+				    unsigned long valid);
+unsigned long sun4v_vintr_get_state(unsigned long dev_handle,
+				    unsigned long dev_ino,
+				    unsigned long *state);
+unsigned long sun4v_vintr_set_state(unsigned long dev_handle,
+				    unsigned long dev_ino,
+				    unsigned long state);
+unsigned long sun4v_vintr_get_target(unsigned long dev_handle,
+				     unsigned long dev_ino,
+				     unsigned long *cpuid);
+unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
+				     unsigned long dev_ino,
+				     unsigned long cpuid);
+#endif
+
+/* PCI IO services.
+ *
+ * See the terminology descriptions in the device interrupt services
+ * section above as those apply here too.  Here are terminology
+ * definitions specific to these PCI IO services:
+ *
+ *	tsbnum		TSB number.  Indentifies which io-tsb is used.
+ *			For this version of the specification, tsbnum
+ *			must be zero.
+ *
+ *	tsbindex	TSB index.  Identifies which entry in the TSB
+ *			is used.  The first entry is zero.
+ *
+ *	tsbid		A 64-bit aligned data structure which contains
+ *			a tsbnum and a tsbindex.  Bits 63:32 contain the
+ *			tsbnum and bits 31:00 contain the tsbindex.
+ *
+ *			Use the HV_PCI_TSBID() macro to construct such
+ * 			values.
+ *
+ *	io_attributes	IO attributes for IOMMU mappings.  One of more
+ *			of the attritbute bits are stores in a 64-bit
+ *			value.  The values are defined below.
+ *
+ *	r_addr		64-bit real address
+ *
+ *	pci_device	PCI device address.  A PCI device address identifies
+ *			a specific device on a specific PCI bus segment.
+ *			A PCI device address ia a 32-bit unsigned integer
+ *			with the following format:
+ *
+ *				00000000.bbbbbbbb.dddddfff.00000000
+ *
+ *			Use the HV_PCI_DEVICE_BUILD() macro to construct
+ *			such values.
+ *
+ *	pci_config_offset
+ *			PCI configureation space offset.  For conventional
+ *			PCI a value between 0 and 255.  For extended
+ *			configuration space, a value between 0 and 4095.
+ *
+ *			Note: For PCI configuration space accesses, the offset
+ *			      must be aligned to the access size.
+ *
+ *	error_flag	A return value which specifies if the action succeeded
+ *			or failed.  0 means no error, non-0 means some error
+ *			occurred while performing the service.
+ *
+ *	io_sync_direction
+ *			Direction definition for pci_dma_sync(), defined
+ *			below in HV_PCI_SYNC_*.
+ *
+ *	io_page_list	A list of io_page_addresses, an io_page_address is
+ *			a real address.
+ *
+ *	io_page_list_p	A pointer to an io_page_list.
+ *
+ *	"size based byte swap" - Some functions do size based byte swapping
+ *				 which allows sw to access pointers and
+ *				 counters in native form when the processor
+ *				 operates in a different endianness than the
+ *				 IO bus.  Size-based byte swapping converts a
+ *				 multi-byte field between big-endian and
+ *				 little-endian format.
+ */
+
+#define HV_PCI_MAP_ATTR_READ		0x01
+#define HV_PCI_MAP_ATTR_WRITE		0x02
+#define HV_PCI_MAP_ATTR_RELAXED_ORDER	0x04
+
+#define HV_PCI_DEVICE_BUILD(b,d,f)	\
+	((((b) & 0xff) << 16) | \
+	 (((d) & 0x1f) << 11) | \
+	 (((f) & 0x07) <<  8))
+
+#define HV_PCI_TSBID(__tsb_num, __tsb_index) \
+	((((u64)(__tsb_num)) << 32UL) | ((u64)(__tsb_index)))
+
+#define HV_PCI_SYNC_FOR_DEVICE		0x01
+#define HV_PCI_SYNC_FOR_CPU		0x02
+
+/* pci_iommu_map()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOMMU_MAP
+ * ARG0:	devhandle
+ * ARG1:	tsbid
+ * ARG2:	#ttes
+ * ARG3:	io_attributes
+ * ARG4:	io_page_list_p
+ * RET0:	status
+ * RET1:	#ttes mapped
+ * ERRORS:	EINVAL		Invalid devhandle/tsbnum/tsbindex/io_attributes
+ *		EBADALIGN	Improperly aligned real address
+ *		ENORADDR	Invalid real address
+ *
+ * Create IOMMU mappings in the sun4v device defined by the given
+ * devhandle.  The mappings are created in the TSB defined by the
+ * tsbnum component of the given tsbid.  The first mapping is created
+ * in the TSB i ndex defined by the tsbindex component of the given tsbid.
+ * The call creates up to #ttes mappings, the first one at tsbnum, tsbindex,
+ * the second at tsbnum, tsbindex + 1, etc.
+ *
+ * All mappings are created with the attributes defined by the io_attributes
+ * argument.  The page mapping addresses are described in the io_page_list
+ * defined by the given io_page_list_p, which is a pointer to the io_page_list.
+ * The first entry in the io_page_list is the address for the first iotte, the
+ * 2nd for the 2nd iotte, and so on.
+ *
+ * Each io_page_address in the io_page_list must be appropriately aligned.
+ * #ttes must be greater than zero.  For this version of the spec, the tsbnum
+ * component of the given tsbid must be zero.
+ *
+ * Returns the actual number of mappings creates, which may be less than
+ * or equal to the argument #ttes.  If the function returns a value which
+ * is less than the #ttes, the caller may continus to call the function with
+ * an updated tsbid, #ttes, io_page_list_p arguments until all pages are
+ * mapped.
+ *
+ * Note: This function does not imply an iotte cache flush.  The guest must
+ *       demap an entry before re-mapping it.
+ */
+#define HV_FAST_PCI_IOMMU_MAP		0xb0
+
+/* pci_iommu_demap()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOMMU_DEMAP
+ * ARG0:	devhandle
+ * ARG1:	tsbid
+ * ARG2:	#ttes
+ * RET0:	status
+ * RET1:	#ttes demapped
+ * ERRORS:	EINVAL		Invalid devhandle/tsbnum/tsbindex
+ *
+ * Demap and flush IOMMU mappings in the device defined by the given
+ * devhandle.  Demaps up to #ttes entries in the TSB defined by the tsbnum
+ * component of the given tsbid, starting at the TSB index defined by the
+ * tsbindex component of the given tsbid.
+ *
+ * For this version of the spec, the tsbnum of the given tsbid must be zero.
+ * #ttes must be greater than zero.
+ *
+ * Returns the actual number of ttes demapped, which may be less than or equal
+ * to the argument #ttes.  If #ttes demapped is less than #ttes, the caller
+ * may continue to call this function with updated tsbid and #ttes arguments
+ * until all pages are demapped.
+ *
+ * Note: Entries do not have to be mapped to be demapped.  A demap of an
+ *       unmapped page will flush the entry from the tte cache.
+ */
+#define HV_FAST_PCI_IOMMU_DEMAP		0xb1
+
+/* pci_iommu_getmap()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOMMU_GETMAP
+ * ARG0:	devhandle
+ * ARG1:	tsbid
+ * RET0:	status
+ * RET1:	io_attributes
+ * RET2:	real address
+ * ERRORS:	EINVAL		Invalid devhandle/tsbnum/tsbindex
+ *		ENOMAP		Mapping is not valid, no translation exists
+ *
+ * Read and return the mapping in the device described by the given devhandle
+ * and tsbid.  If successful, the io_attributes shall be returned in RET1
+ * and the page address of the mapping shall be returned in RET2.
+ *
+ * For this version of the spec, the tsbnum component of the given tsbid
+ * must be zero.
+ */
+#define HV_FAST_PCI_IOMMU_GETMAP	0xb2
+
+/* pci_iommu_getbypass()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOMMU_GETBYPASS
+ * ARG0:	devhandle
+ * ARG1:	real address
+ * ARG2:	io_attributes
+ * RET0:	status
+ * RET1:	io_addr
+ * ERRORS:	EINVAL		Invalid devhandle/io_attributes
+ *		ENORADDR	Invalid real address
+ *		ENOTSUPPORTED	Function not supported in this implementation.
+ *
+ * Create a "special" mapping in the device described by the given devhandle,
+ * for the given real address and attributes.  Return the IO address in RET1
+ * if successful.
+ */
+#define HV_FAST_PCI_IOMMU_GETBYPASS	0xb3
+
+/* pci_config_get()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_CONFIG_GET
+ * ARG0:	devhandle
+ * ARG1:	pci_device
+ * ARG2:	pci_config_offset
+ * ARG3:	size
+ * RET0:	status
+ * RET1:	error_flag
+ * RET2:	data
+ * ERRORS:	EINVAL		Invalid devhandle/pci_device/offset/size
+ *		EBADALIGN	pci_config_offset not size aligned
+ *		ENOACCESS	Access to this offset is not permitted
+ *
+ * Read PCI configuration space for the adapter described by the given
+ * devhandle.  Read size (1, 2, or 4) bytes of data from the given
+ * pci_device, at pci_config_offset from the beginning of the device's
+ * configuration space.  If there was no error, RET1 is set to zero and
+ * RET2 is set to the data read.  Insignificant bits in RET2 are not
+ * guaranteed to have any specific value and therefore must be ignored.
+ *
+ * The data returned in RET2 is size based byte swapped.
+ *
+ * If an error occurs during the read, set RET1 to a non-zero value.  The
+ * given pci_config_offset must be 'size' aligned.
+ */
+#define HV_FAST_PCI_CONFIG_GET		0xb4
+
+/* pci_config_put()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_CONFIG_PUT
+ * ARG0:	devhandle
+ * ARG1:	pci_device
+ * ARG2:	pci_config_offset
+ * ARG3:	size
+ * ARG4:	data
+ * RET0:	status
+ * RET1:	error_flag
+ * ERRORS:	EINVAL		Invalid devhandle/pci_device/offset/size
+ *		EBADALIGN	pci_config_offset not size aligned
+ *		ENOACCESS	Access to this offset is not permitted
+ *
+ * Write PCI configuration space for the adapter described by the given
+ * devhandle.  Write size (1, 2, or 4) bytes of data in a single operation,
+ * at pci_config_offset from the beginning of the device's configuration
+ * space.  The data argument contains the data to be written to configuration
+ * space.  Prior to writing, the data is size based byte swapped.
+ *
+ * If an error occurs during the write access, do not generate an error
+ * report, do set RET1 to a non-zero value.  Otherwise RET1 is zero.
+ * The given pci_config_offset must be 'size' aligned.
+ *
+ * This function is permitted to read from offset zero in the configuration
+ * space described by the given pci_device if necessary to ensure that the
+ * write access to config space completes.
+ */
+#define HV_FAST_PCI_CONFIG_PUT		0xb5
+
+/* pci_peek()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_PEEK
+ * ARG0:	devhandle
+ * ARG1:	real address
+ * ARG2:	size
+ * RET0:	status
+ * RET1:	error_flag
+ * RET2:	data
+ * ERRORS:	EINVAL		Invalid devhandle or size
+ *		EBADALIGN	Improperly aligned real address
+ *		ENORADDR	Bad real address
+ *		ENOACCESS	Guest access prohibited
+ *
+ * Attempt to read the IO address given by the given devhandle, real address,
+ * and size.  Size must be 1, 2, 4, or 8.  The read is performed as a single
+ * access operation using the given size.  If an error occurs when reading
+ * from the given location, do not generate an error report, but return a
+ * non-zero value in RET1.  If the read was successful, return zero in RET1
+ * and return the actual data read in RET2.  The data returned is size based
+ * byte swapped.
+ *
+ * Non-significant bits in RET2 are not guaranteed to have any specific value
+ * and therefore must be ignored.  If RET1 is returned as non-zero, the data
+ * value is not guaranteed to have any specific value and should be ignored.
+ *
+ * The caller must have permission to read from the given devhandle, real
+ * address, which must be an IO address.  The argument real address must be a
+ * size aligned address.
+ *
+ * The hypervisor implementation of this function must block access to any
+ * IO address that the guest does not have explicit permission to access.
+ */
+#define HV_FAST_PCI_PEEK		0xb6
+
+/* pci_poke()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_POKE
+ * ARG0:	devhandle
+ * ARG1:	real address
+ * ARG2:	size
+ * ARG3:	data
+ * ARG4:	pci_device
+ * RET0:	status
+ * RET1:	error_flag
+ * ERRORS:	EINVAL		Invalid devhandle, size, or pci_device
+ *		EBADALIGN	Improperly aligned real address
+ *		ENORADDR	Bad real address
+ *		ENOACCESS	Guest access prohibited
+ *		ENOTSUPPORTED	Function is not supported by implementation
+ *
+ * Attempt to write data to the IO address given by the given devhandle,
+ * real address, and size.  Size must be 1, 2, 4, or 8.  The write is
+ * performed as a single access operation using the given size. Prior to
+ * writing the data is size based swapped.
+ *
+ * If an error occurs when writing to the given location, do not generate an
+ * error report, but return a non-zero value in RET1.  If the write was
+ * successful, return zero in RET1.
+ *
+ * pci_device describes the configuration address of the device being
+ * written to.  The implementation may safely read from offset 0 with
+ * the configuration space of the device described by devhandle and
+ * pci_device in order to guarantee that the write portion of the operation
+ * completes
+ *
+ * Any error that occurs due to the read shall be reported using the normal
+ * error reporting mechanisms .. the read error is not suppressed.
+ *
+ * The caller must have permission to write to the given devhandle, real
+ * address, which must be an IO address.  The argument real address must be a
+ * size aligned address.  The caller must have permission to read from
+ * the given devhandle, pci_device cofiguration space offset 0.
+ *
+ * The hypervisor implementation of this function must block access to any
+ * IO address that the guest does not have explicit permission to access.
+ */
+#define HV_FAST_PCI_POKE		0xb7
+
+/* pci_dma_sync()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_DMA_SYNC
+ * ARG0:	devhandle
+ * ARG1:	real address
+ * ARG2:	size
+ * ARG3:	io_sync_direction
+ * RET0:	status
+ * RET1:	#synced
+ * ERRORS:	EINVAL		Invalid devhandle or io_sync_direction
+ *		ENORADDR	Bad real address
+ *
+ * Synchronize a memory region described by the given real address and size,
+ * for the device defined by the given devhandle using the direction(s)
+ * defined by the given io_sync_direction.  The argument size is the size of
+ * the memory region in bytes.
+ *
+ * Return the actual number of bytes synchronized in the return value #synced,
+ * which may be less than or equal to the argument size.  If the return
+ * value #synced is less than size, the caller must continue to call this
+ * function with updated real address and size arguments until the entire
+ * memory region is synchronized.
+ */
+#define HV_FAST_PCI_DMA_SYNC		0xb8
+
+/* PCI MSI services.  */
+
+#define HV_MSITYPE_MSI32		0x00
+#define HV_MSITYPE_MSI64		0x01
+
+#define HV_MSIQSTATE_IDLE		0x00
+#define HV_MSIQSTATE_ERROR		0x01
+
+#define HV_MSIQ_INVALID			0x00
+#define HV_MSIQ_VALID			0x01
+
+#define HV_MSISTATE_IDLE		0x00
+#define HV_MSISTATE_DELIVERED		0x01
+
+#define HV_MSIVALID_INVALID		0x00
+#define HV_MSIVALID_VALID		0x01
+
+#define HV_PCIE_MSGTYPE_PME_MSG		0x18
+#define HV_PCIE_MSGTYPE_PME_ACK_MSG	0x1b
+#define HV_PCIE_MSGTYPE_CORR_MSG	0x30
+#define HV_PCIE_MSGTYPE_NONFATAL_MSG	0x31
+#define HV_PCIE_MSGTYPE_FATAL_MSG	0x33
+
+#define HV_MSG_INVALID			0x00
+#define HV_MSG_VALID			0x01
+
+/* pci_msiq_conf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_CONF
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * ARG2:	real address
+ * ARG3:	number of entries
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle, msiqid or nentries
+ *		EBADALIGN	Improperly aligned real address
+ *		ENORADDR	Bad real address
+ *
+ * Configure the MSI queue given by the devhandle and msiqid arguments,
+ * and to be placed at the given real address and be of the given
+ * number of entries.  The real address must be aligned exactly to match
+ * the queue size.  Each queue entry is 64-bytes long, so f.e. a 32 entry
+ * queue must be aligned on a 2048 byte real address boundary.  The MSI-EQ
+ * Head and Tail are initialized so that the MSI-EQ is 'empty'.
+ *
+ * Implementation Note: Certain implementations have fixed sized queues.  In
+ *                      that case, number of entries must contain the correct
+ *                      value.
+ */
+#define HV_FAST_PCI_MSIQ_CONF		0xc0
+
+/* pci_msiq_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_INFO
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * RET0:	status
+ * RET1:	real address
+ * RET2:	number of entries
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid
+ *
+ * Return the configuration information for the MSI queue described
+ * by the given devhandle and msiqid.  The base address of the queue
+ * is returned in ARG1 and the number of entries is returned in ARG2.
+ * If the queue is unconfigured, the real address is undefined and the
+ * number of entries will be returned as zero.
+ */
+#define HV_FAST_PCI_MSIQ_INFO		0xc1
+
+/* pci_msiq_getvalid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_GETVALID
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * RET0:	status
+ * RET1:	msiqvalid	(HV_MSIQ_VALID or HV_MSIQ_INVALID)
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid
+ *
+ * Get the valid state of the MSI-EQ described by the given devhandle and
+ * msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETVALID	0xc2
+
+/* pci_msiq_setvalid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_SETVALID
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * ARG2:	msiqvalid	(HV_MSIQ_VALID or HV_MSIQ_INVALID)
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid or msiqvalid
+ *				value or MSI EQ is uninitialized
+ *
+ * Set the valid state of the MSI-EQ described by the given devhandle and
+ * msiqid to the given msiqvalid.
+ */
+#define HV_FAST_PCI_MSIQ_SETVALID	0xc3
+
+/* pci_msiq_getstate()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_GETSTATE
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * RET0:	status
+ * RET1:	msiqstate	(HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR)
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid
+ *
+ * Get the state of the MSI-EQ described by the given devhandle and
+ * msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETSTATE	0xc4
+
+/* pci_msiq_getvalid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_GETVALID
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * ARG2:	msiqstate	(HV_MSIQSTATE_IDLE or HV_MSIQSTATE_ERROR)
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid or msiqstate
+ *				value or MSI EQ is uninitialized
+ *
+ * Set the state of the MSI-EQ described by the given devhandle and
+ * msiqid to the given msiqvalid.
+ */
+#define HV_FAST_PCI_MSIQ_SETSTATE	0xc5
+
+/* pci_msiq_gethead()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_GETHEAD
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * RET0:	status
+ * RET1:	msiqhead
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid
+ *
+ * Get the current MSI EQ queue head for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETHEAD	0xc6
+
+/* pci_msiq_sethead()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_SETHEAD
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * ARG2:	msiqhead
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid or msiqhead,
+ *				or MSI EQ is uninitialized
+ *
+ * Set the current MSI EQ queue head for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_SETHEAD	0xc7
+
+/* pci_msiq_gettail()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSIQ_GETTAIL
+ * ARG0:	devhandle
+ * ARG1:	msiqid
+ * RET0:	status
+ * RET1:	msiqtail
+ * ERRORS:	EINVAL		Invalid devhandle or msiqid
+ *
+ * Get the current MSI EQ queue tail for the MSI-EQ described by the
+ * given devhandle and msiqid.
+ */
+#define HV_FAST_PCI_MSIQ_GETTAIL	0xc8
+
+/* pci_msi_getvalid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSI_GETVALID
+ * ARG0:	devhandle
+ * ARG1:	msinum
+ * RET0:	status
+ * RET1:	msivalidstate
+ * ERRORS:	EINVAL		Invalid devhandle or msinum
+ *
+ * Get the current valid/enabled state for the MSI defined by the
+ * given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_GETVALID	0xc9
+
+/* pci_msi_setvalid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSI_SETVALID
+ * ARG0:	devhandle
+ * ARG1:	msinum
+ * ARG2:	msivalidstate
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle or msinum or msivalidstate
+ *
+ * Set the current valid/enabled state for the MSI defined by the
+ * given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_SETVALID	0xca
+
+/* pci_msi_getmsiq()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSI_GETMSIQ
+ * ARG0:	devhandle
+ * ARG1:	msinum
+ * RET0:	status
+ * RET1:	msiqid
+ * ERRORS:	EINVAL		Invalid devhandle or msinum or MSI is unbound
+ *
+ * Get the MSI EQ that the MSI defined by the given devhandle and
+ * msinum is bound to.
+ */
+#define HV_FAST_PCI_MSI_GETMSIQ		0xcb
+
+/* pci_msi_setmsiq()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSI_SETMSIQ
+ * ARG0:	devhandle
+ * ARG1:	msinum
+ * ARG2:	msitype
+ * ARG3:	msiqid
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle or msinum or msiqid
+ *
+ * Set the MSI EQ that the MSI defined by the given devhandle and
+ * msinum is bound to.
+ */
+#define HV_FAST_PCI_MSI_SETMSIQ		0xcc
+
+/* pci_msi_getstate()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSI_GETSTATE
+ * ARG0:	devhandle
+ * ARG1:	msinum
+ * RET0:	status
+ * RET1:	msistate
+ * ERRORS:	EINVAL		Invalid devhandle or msinum
+ *
+ * Get the state of the MSI defined by the given devhandle and msinum.
+ * If not initialized, return HV_MSISTATE_IDLE.
+ */
+#define HV_FAST_PCI_MSI_GETSTATE	0xcd
+
+/* pci_msi_setstate()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSI_SETSTATE
+ * ARG0:	devhandle
+ * ARG1:	msinum
+ * ARG2:	msistate
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle or msinum or msistate
+ *
+ * Set the state of the MSI defined by the given devhandle and msinum.
+ */
+#define HV_FAST_PCI_MSI_SETSTATE	0xce
+
+/* pci_msg_getmsiq()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSG_GETMSIQ
+ * ARG0:	devhandle
+ * ARG1:	msgtype
+ * RET0:	status
+ * RET1:	msiqid
+ * ERRORS:	EINVAL		Invalid devhandle or msgtype
+ *
+ * Get the MSI EQ of the MSG defined by the given devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_GETMSIQ		0xd0
+
+/* pci_msg_setmsiq()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSG_SETMSIQ
+ * ARG0:	devhandle
+ * ARG1:	msgtype
+ * ARG2:	msiqid
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle, msgtype, or msiqid
+ *
+ * Set the MSI EQ of the MSG defined by the given devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_SETMSIQ		0xd1
+
+/* pci_msg_getvalid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSG_GETVALID
+ * ARG0:	devhandle
+ * ARG1:	msgtype
+ * RET0:	status
+ * RET1:	msgvalidstate
+ * ERRORS:	EINVAL		Invalid devhandle or msgtype
+ *
+ * Get the valid/enabled state of the MSG defined by the given
+ * devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_GETVALID	0xd2
+
+/* pci_msg_setvalid()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_MSG_SETVALID
+ * ARG0:	devhandle
+ * ARG1:	msgtype
+ * ARG2:	msgvalidstate
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle or msgtype or msgvalidstate
+ *
+ * Set the valid/enabled state of the MSG defined by the given
+ * devhandle and msgtype.
+ */
+#define HV_FAST_PCI_MSG_SETVALID	0xd3
+
+/* PCI IOMMU v2 definitions and services
+ *
+ * While the PCI IO definitions above is valid IOMMU v2 adds new PCI IO
+ * definitions and services.
+ *
+ *	CTE		Clump Table Entry. First level table entry in the ATU.
+ *
+ *	pci_device_list
+ *			A 32-bit aligned list of pci_devices.
+ *
+ *	pci_device_listp
+ *			real address of a pci_device_list. 32-bit aligned.
+ *
+ *	iotte		IOMMU translation table entry.
+ *
+ *	iotte_attributes
+ *			IO Attributes for IOMMU v2 mappings. In addition to
+ *			read, write IOMMU v2 supports relax ordering
+ *
+ *	io_page_list	A 64-bit aligned list of real addresses. Each real
+ *			address in an io_page_list must be properly aligned
+ *			to the pagesize of the given IOTSB.
+ *
+ *	io_page_list_p	Real address of an io_page_list, 64-bit aligned.
+ *
+ *	IOTSB		IO Translation Storage Buffer. An aligned table of
+ *			IOTTEs. Each IOTSB has a pagesize, table size, and
+ *			virtual address associated with it that must match
+ *			a pagesize and table size supported by the un-derlying
+ *			hardware implementation. The alignment requirements
+ *			for an IOTSB depend on the pagesize used for that IOTSB.
+ *			Each IOTTE in an IOTSB maps one pagesize-sized page.
+ *			The size of the IOTSB dictates how large of a virtual
+ *			address space the IOTSB is capable of mapping.
+ *
+ *	iotsb_handle	An opaque identifier for an IOTSB. A devhandle plus
+ *			iotsb_handle represents a binding of an IOTSB to a
+ *			PCI root complex.
+ *
+ *	iotsb_index	Zero-based IOTTE number within an IOTSB.
+ */
+
+/* The index_count argument consists of two fields:
+ * bits 63:48 #iottes and bits 47:0 iotsb_index
+ */
+#define HV_PCI_IOTSB_INDEX_COUNT(__iottes, __iotsb_index) \
+	(((u64)(__iottes) << 48UL) | ((u64)(__iotsb_index)))
+
+/* pci_iotsb_conf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_CONF
+ * ARG0:	devhandle
+ * ARG1:	r_addr
+ * ARG2:	size
+ * ARG3:	pagesize
+ * ARG4:	iova
+ * RET0:	status
+ * RET1:	iotsb_handle
+ * ERRORS:	EINVAL		Invalid devhandle, size, iova, or pagesize
+ *		EBADALIGN	r_addr is not properly aligned
+ *		ENORADDR	r_addr is not a valid real address
+ *		ETOOMANY	No further IOTSBs may be configured
+ *		EBUSY		Duplicate devhandle, raddir, iova combination
+ *
+ * Create an IOTSB suitable for the PCI root complex identified by devhandle,
+ * for the DMA virtual address defined by the argument iova.
+ *
+ * r_addr is the properly aligned base address of the IOTSB and size is the
+ * IOTSB (table) size in bytes.The IOTSB is required to be zeroed prior to
+ * being configured. If it contains any values other than zeros then the
+ * behavior is undefined.
+ *
+ * pagesize is the size of each page in the IOTSB. Note that the combination of
+ * size (table size) and pagesize must be valid.
+ *
+ * virt is the DMA virtual address this IOTSB will map.
+ *
+ * If successful, the opaque 64-bit handle iotsb_handle is returned in ret1.
+ * Once configured, privileged access to the IOTSB memory is prohibited and
+ * creates undefined behavior. The only permitted access is indirect via these
+ * services.
+ */
+#define HV_FAST_PCI_IOTSB_CONF		0x190
+
+/* pci_iotsb_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_INFO
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * RET0:	status
+ * RET1:	r_addr
+ * RET2:	size
+ * RET3:	pagesize
+ * RET4:	iova
+ * RET5:	#bound
+ * ERRORS:	EINVAL	Invalid devhandle or iotsb_handle
+ *
+ * This service returns configuration information about an IOTSB previously
+ * created with pci_iotsb_conf.
+ *
+ * iotsb_handle value 0 may be used with this service to inquire about the
+ * legacy IOTSB that may or may not exist. If the service succeeds, the return
+ * values describe the legacy IOTSB and I/O virtual addresses mapped by that
+ * table. However, the table base address r_addr may contain the value -1 which
+ * indicates a memory range that cannot be accessed or be reclaimed.
+ *
+ * The return value #bound contains the number of PCI devices that iotsb_handle
+ * is currently bound to.
+ */
+#define HV_FAST_PCI_IOTSB_INFO		0x191
+
+/* pci_iotsb_unconf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_UNCONF
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * RET0:	status
+ * ERRORS:	EINVAL	Invalid devhandle or iotsb_handle
+ *		EBUSY	The IOTSB is bound and may not be unconfigured
+ *
+ * This service unconfigures the IOTSB identified by the devhandle and
+ * iotsb_handle arguments, previously created with pci_iotsb_conf.
+ * The IOTSB must not be currently bound to any device or the service will fail
+ *
+ * If the call succeeds, iotsb_handle is no longer valid.
+ */
+#define HV_FAST_PCI_IOTSB_UNCONF	0x192
+
+/* pci_iotsb_bind()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_BIND
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	pci_device
+ * RET0:	status
+ * ERRORS:	EINVAL	Invalid devhandle, iotsb_handle, or pci_device
+ *		EBUSY	A PCI function is already bound to an IOTSB at the same
+ *			address range as specified by devhandle, iotsb_handle.
+ *
+ * This service binds the PCI function specified by the argument pci_device to
+ * the IOTSB specified by the arguments devhandle and iotsb_handle.
+ *
+ * The PCI device function is bound to the specified IOTSB with the IOVA range
+ * specified when the IOTSB was configured via pci_iotsb_conf. If the function
+ * is already bound then it is unbound first.
+ */
+#define HV_FAST_PCI_IOTSB_BIND		0x193
+
+/* pci_iotsb_unbind()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_UNBIND
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	pci_device
+ * RET0:	status
+ * ERRORS:	EINVAL	Invalid devhandle, iotsb_handle, or pci_device
+ *		ENOMAP	The PCI function was not bound to the specified IOTSB
+ *
+ * This service unbinds the PCI device specified by the argument pci_device
+ * from the IOTSB identified  * by the arguments devhandle and iotsb_handle.
+ *
+ * If the PCI device is not bound to the specified IOTSB then this service will
+ * fail with status ENOMAP
+ */
+#define HV_FAST_PCI_IOTSB_UNBIND	0x194
+
+/* pci_iotsb_get_binding()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_GET_BINDING
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	iova
+ * RET0:	status
+ * RET1:	iotsb_handle
+ * ERRORS:	EINVAL	Invalid devhandle, pci_device, or iova
+ *		ENOMAP	The PCI function is not bound to an IOTSB at iova
+ *
+ * This service returns the IOTSB binding, iotsb_handle, for a given pci_device
+ * and DMA virtual address, iova.
+ *
+ * iova must be the base address of a DMA virtual address range as defined by
+ * the iommu-address-ranges property in the root complex device node defined
+ * by the argument devhandle.
+ */
+#define HV_FAST_PCI_IOTSB_GET_BINDING	0x195
+
+/* pci_iotsb_map()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_MAP
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	index_count
+ * ARG3:	iotte_attributes
+ * ARG4:	io_page_list_p
+ * RET0:	status
+ * RET1:	#mapped
+ * ERRORS:	EINVAL		Invalid devhandle, iotsb_handle, #iottes,
+ *				iotsb_index or iotte_attributes
+ *		EBADALIGN	Improperly aligned io_page_list_p or I/O page
+ *				address in the I/O page list.
+ *		ENORADDR	Invalid io_page_list_p or I/O page address in
+ *				the I/O page list.
+ *
+ * This service creates and flushes mappings in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The index_count argument consists of two fields. Bits 63:48 contain #iotte
+ * and bits 47:0 contain iotsb_index
+ *
+ * The first mapping is created in the IOTSB index specified by iotsb_index.
+ * Subsequent mappings are  created at iotsb_index+1 and so on.
+ *
+ * The attributes of each mapping are defined by the argument iotte_attributes.
+ *
+ * The io_page_list_p specifies the real address of the 64-bit-aligned list of
+ * #iottes I/O page addresses. Each page address must be a properly aligned
+ * real address of a page to be mapped in the IOTSB. The first entry in the I/O
+ * page list contains the real address of the first page, the 2nd entry for the
+ * 2nd page, and so on.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The return value #mapped is the actual number of mappings created, which may
+ * be less than or equal to the argument #iottes. If the function returns
+ * successfully with a #mapped value less than the requested #iottes then the
+ * caller should continue to invoke the service with updated iotsb_index,
+ * #iottes, and io_page_list_p arguments until all pages are mapped.
+ *
+ * This service must not be used to demap a mapping. In other words, all
+ * mappings must be valid and have  one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP		0x196
+
+/* pci_iotsb_map_one()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_MAP_ONE
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	iotsb_index
+ * ARG3:	iotte_attributes
+ * ARG4:	r_addr
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid devhandle,iotsb_handle, iotsb_index
+ *				or iotte_attributes
+ *		EBADALIGN	Improperly aligned r_addr
+ *		ENORADDR	Invalid r_addr
+ *
+ * This service creates and flushes a single mapping in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The mapping for the page at r_addr is created at the IOTSB index specified by
+ * iotsb_index with  the attributes iotte_attributes.
+ *
+ * This service must not be used to demap a mapping. In other words, the mapping
+ * must be valid and have one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP_ONE	0x197
+
+/* pci_iotsb_demap()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_DEMAP
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	iotsb_index
+ * ARG3:	#iottes
+ * RET0:	status
+ * RET1:	#unmapped
+ * ERRORS:	EINVAL	Invalid devhandle, iotsb_handle, iotsb_index or #iottes
+ *
+ * This service unmaps and flushes up to #iottes mappings starting at index
+ * iotsb_index from the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs unmapped is returned in #unmapped and may be less
+ * than or equal to the requested number of IOTTEs, #iottes.
+ *
+ * If #unmapped is less than #iottes, the caller should continue to invoke this
+ * service with updated iotsb_index and #iottes arguments until all pages are
+ * demapped.
+ */
+#define HV_FAST_PCI_IOTSB_DEMAP		0x198
+
+/* pci_iotsb_getmap()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_GETMAP
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	iotsb_index
+ * RET0:	status
+ * RET1:	r_addr
+ * RET2:	iotte_attributes
+ * ERRORS:	EINVAL	Invalid devhandle, iotsb_handle, or iotsb_index
+ *		ENOMAP	No mapping was found
+ *
+ * This service returns the mapping specified by index iotsb_index from the
+ * IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * Upon success, the real address of the mapping shall be returned in
+ * r_addr and thethe IOTTE mapping attributes shall be returned in
+ * iotte_attributes.
+ *
+ * The return value iotte_attributes may not include optional features used in
+ * the call to create the  mapping.
+ */
+#define HV_FAST_PCI_IOTSB_GETMAP	0x199
+
+/* pci_iotsb_sync_mappings()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_PCI_IOTSB_SYNC_MAPPINGS
+ * ARG0:	devhandle
+ * ARG1:	iotsb_handle
+ * ARG2:	iotsb_index
+ * ARG3:	#iottes
+ * RET0:	status
+ * RET1:	#synced
+ * ERROS:	EINVAL	Invalid devhandle, iotsb_handle, iotsb_index, or #iottes
+ *
+ * This service synchronizes #iottes mappings starting at index iotsb_index in
+ * the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs synchronized is returned in #synced, which may
+ * be less than or equal to the requested number, #iottes.
+ *
+ * Upon a successful return, #synced is less than #iottes, the caller should
+ * continue to invoke this service with updated iotsb_index and #iottes
+ * arguments until all pages are synchronized.
+ */
+#define HV_FAST_PCI_IOTSB_SYNC_MAPPINGS	0x19a
+
+/* Logical Domain Channel services.  */
+
+#define LDC_CHANNEL_DOWN		0
+#define LDC_CHANNEL_UP			1
+#define LDC_CHANNEL_RESETTING		2
+
+/* ldc_tx_qconf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_QCONF
+ * ARG0:	channel ID
+ * ARG1:	real address base of queue
+ * ARG2:	num entries in queue
+ * RET0:	status
+ *
+ * Configure transmit queue for the LDC endpoint specified by the
+ * given channel ID, to be placed at the given real address, and
+ * be of the given num entries.  Num entries must be a power of two.
+ * The real address base of the queue must be aligned on the queue
+ * size.  Each queue entry is 64-bytes, so for example, a 32 entry
+ * queue must be aligned on a 2048 byte real address boundary.
+ *
+ * Upon configuration of a valid transmit queue the head and tail
+ * pointers are set to a hypervisor specific identical value indicating
+ * that the queue initially is empty.
+ *
+ * The endpoint's transmit queue is un-configured if num entries is zero.
+ *
+ * The maximum number of entries for each queue for a specific cpu may be
+ * determined from the machine description.  A transmit queue may be
+ * specified even in the event that the LDC is down (peer endpoint has no
+ * receive queue specified).  Transmission will begin as soon as the peer
+ * endpoint defines a receive queue.
+ *
+ * It is recommended that a guest wait for a transmit queue to empty prior
+ * to reconfiguring it, or un-configuring it.  Re or un-configuring of a
+ * non-empty transmit queue behaves exactly as defined above, however it
+ * is undefined as to how many of the pending entries in the original queue
+ * will be delivered prior to the re-configuration taking effect.
+ * Furthermore, as the queue configuration causes a reset of the head and
+ * tail pointers there is no way for a guest to determine how many entries
+ * have been sent after the configuration operation.
+ */
+#define HV_FAST_LDC_TX_QCONF		0xe0
+
+/* ldc_tx_qinfo()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_QINFO
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	real address base of queue
+ * RET2:	num entries in queue
+ *
+ * Return the configuration info for the transmit queue of LDC endpoint
+ * defined by the given channel ID.  The real address is the currently
+ * defined real address base of the defined queue, and num entries is the
+ * size of the queue in terms of number of entries.
+ *
+ * If the specified channel ID is a valid endpoint number, but no transmit
+ * queue has been defined this service will return success, but with num
+ * entries set to zero and the real address will have an undefined value.
+ */
+#define HV_FAST_LDC_TX_QINFO		0xe1
+
+/* ldc_tx_get_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_GET_STATE
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	head offset
+ * RET2:	tail offset
+ * RET3:	channel state
+ *
+ * Return the transmit state, and the head and tail queue pointers, for
+ * the transmit queue of the LDC endpoint defined by the given channel ID.
+ * The head and tail values are the byte offset of the head and tail
+ * positions of the transmit queue for the specified endpoint.
+ */
+#define HV_FAST_LDC_TX_GET_STATE	0xe2
+
+/* ldc_tx_set_qtail()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_TX_SET_QTAIL
+ * ARG0:	channel ID
+ * ARG1:	tail offset
+ * RET0:	status
+ *
+ * Update the tail pointer for the transmit queue associated with the LDC
+ * endpoint defined by the given channel ID.  The tail offset specified
+ * must be aligned on a 64 byte boundary, and calculated so as to increase
+ * the number of pending entries on the transmit queue.  Any attempt to
+ * decrease the number of pending transmit queue entires is considered
+ * an invalid tail offset and will result in an EINVAL error.
+ *
+ * Since the tail of the transmit queue may not be moved backwards, the
+ * transmit queue may be flushed by configuring a new transmit queue,
+ * whereupon the hypervisor will configure the initial transmit head and
+ * tail pointers to be equal.
+ */
+#define HV_FAST_LDC_TX_SET_QTAIL	0xe3
+
+/* ldc_rx_qconf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_QCONF
+ * ARG0:	channel ID
+ * ARG1:	real address base of queue
+ * ARG2:	num entries in queue
+ * RET0:	status
+ *
+ * Configure receive queue for the LDC endpoint specified by the
+ * given channel ID, to be placed at the given real address, and
+ * be of the given num entries.  Num entries must be a power of two.
+ * The real address base of the queue must be aligned on the queue
+ * size.  Each queue entry is 64-bytes, so for example, a 32 entry
+ * queue must be aligned on a 2048 byte real address boundary.
+ *
+ * The endpoint's transmit queue is un-configured if num entries is zero.
+ *
+ * If a valid receive queue is specified for a local endpoint the LDC is
+ * in the up state for the purpose of transmission to this endpoint.
+ *
+ * The maximum number of entries for each queue for a specific cpu may be
+ * determined from the machine description.
+ *
+ * As receive queue configuration causes a reset of the queue's head and
+ * tail pointers there is no way for a gues to determine how many entries
+ * have been received between a preceding ldc_get_rx_state() API call
+ * and the completion of the configuration operation.  It should be noted
+ * that datagram delivery is not guaranteed via domain channels anyway,
+ * and therefore any higher protocol should be resilient to datagram
+ * loss if necessary.  However, to overcome this specific race potential
+ * it is recommended, for example, that a higher level protocol be employed
+ * to ensure either retransmission, or ensure that no datagrams are pending
+ * on the peer endpoint's transmit queue prior to the configuration process.
+ */
+#define HV_FAST_LDC_RX_QCONF		0xe4
+
+/* ldc_rx_qinfo()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_QINFO
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	real address base of queue
+ * RET2:	num entries in queue
+ *
+ * Return the configuration info for the receive queue of LDC endpoint
+ * defined by the given channel ID.  The real address is the currently
+ * defined real address base of the defined queue, and num entries is the
+ * size of the queue in terms of number of entries.
+ *
+ * If the specified channel ID is a valid endpoint number, but no receive
+ * queue has been defined this service will return success, but with num
+ * entries set to zero and the real address will have an undefined value.
+ */
+#define HV_FAST_LDC_RX_QINFO		0xe5
+
+/* ldc_rx_get_state()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_GET_STATE
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	head offset
+ * RET2:	tail offset
+ * RET3:	channel state
+ *
+ * Return the receive state, and the head and tail queue pointers, for
+ * the receive queue of the LDC endpoint defined by the given channel ID.
+ * The head and tail values are the byte offset of the head and tail
+ * positions of the receive queue for the specified endpoint.
+ */
+#define HV_FAST_LDC_RX_GET_STATE	0xe6
+
+/* ldc_rx_set_qhead()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_RX_SET_QHEAD
+ * ARG0:	channel ID
+ * ARG1:	head offset
+ * RET0:	status
+ *
+ * Update the head pointer for the receive queue associated with the LDC
+ * endpoint defined by the given channel ID.  The head offset specified
+ * must be aligned on a 64 byte boundary, and calculated so as to decrease
+ * the number of pending entries on the receive queue.  Any attempt to
+ * increase the number of pending receive queue entires is considered
+ * an invalid head offset and will result in an EINVAL error.
+ *
+ * The receive queue may be flushed by setting the head offset equal
+ * to the current tail offset.
+ */
+#define HV_FAST_LDC_RX_SET_QHEAD	0xe7
+
+/* LDC Map Table Entry.  Each slot is defined by a translation table
+ * entry, as specified by the LDC_MTE_* bits below, and a 64-bit
+ * hypervisor invalidation cookie.
+ */
+#define LDC_MTE_PADDR	0x0fffffffffffe000 /* pa[55:13]          */
+#define LDC_MTE_COPY_W	0x0000000000000400 /* copy write access  */
+#define LDC_MTE_COPY_R	0x0000000000000200 /* copy read access   */
+#define LDC_MTE_IOMMU_W	0x0000000000000100 /* IOMMU write access */
+#define LDC_MTE_IOMMU_R	0x0000000000000080 /* IOMMU read access  */
+#define LDC_MTE_EXEC	0x0000000000000040 /* execute            */
+#define LDC_MTE_WRITE	0x0000000000000020 /* read               */
+#define LDC_MTE_READ	0x0000000000000010 /* write              */
+#define LDC_MTE_SZALL	0x000000000000000f /* page size bits     */
+#define LDC_MTE_SZ16GB	0x0000000000000007 /* 16GB page          */
+#define LDC_MTE_SZ2GB	0x0000000000000006 /* 2GB page           */
+#define LDC_MTE_SZ256MB	0x0000000000000005 /* 256MB page         */
+#define LDC_MTE_SZ32MB	0x0000000000000004 /* 32MB page          */
+#define LDC_MTE_SZ4MB	0x0000000000000003 /* 4MB page           */
+#define LDC_MTE_SZ512K	0x0000000000000002 /* 512K page          */
+#define LDC_MTE_SZ64K	0x0000000000000001 /* 64K page           */
+#define LDC_MTE_SZ8K	0x0000000000000000 /* 8K page            */
+
+#ifndef __ASSEMBLY__
+struct ldc_mtable_entry {
+	unsigned long	mte;
+	unsigned long	cookie;
+};
+#endif
+
+/* ldc_set_map_table()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_SET_MAP_TABLE
+ * ARG0:	channel ID
+ * ARG1:	table real address
+ * ARG2:	num entries
+ * RET0:	status
+ *
+ * Register the MTE table at the given table real address, with the
+ * specified num entries, for the LDC indicated by the given channel
+ * ID.
+ */
+#define HV_FAST_LDC_SET_MAP_TABLE	0xea
+
+/* ldc_get_map_table()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_GET_MAP_TABLE
+ * ARG0:	channel ID
+ * RET0:	status
+ * RET1:	table real address
+ * RET2:	num entries
+ *
+ * Return the configuration of the current mapping table registered
+ * for the given channel ID.
+ */
+#define HV_FAST_LDC_GET_MAP_TABLE	0xeb
+
+#define LDC_COPY_IN	0
+#define LDC_COPY_OUT	1
+
+/* ldc_copy()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_COPY
+ * ARG0:	channel ID
+ * ARG1:	LDC_COPY_* direction code
+ * ARG2:	target real address
+ * ARG3:	local real address
+ * ARG4:	length in bytes
+ * RET0:	status
+ * RET1:	actual length in bytes
+ */
+#define HV_FAST_LDC_COPY		0xec
+
+#define LDC_MEM_READ	1
+#define LDC_MEM_WRITE	2
+#define LDC_MEM_EXEC	4
+
+/* ldc_mapin()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_MAPIN
+ * ARG0:	channel ID
+ * ARG1:	cookie
+ * RET0:	status
+ * RET1:	real address
+ * RET2:	LDC_MEM_* permissions
+ */
+#define HV_FAST_LDC_MAPIN		0xed
+
+/* ldc_unmap()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_UNMAP
+ * ARG0:	real address
+ * RET0:	status
+ */
+#define HV_FAST_LDC_UNMAP		0xee
+
+/* ldc_revoke()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_LDC_REVOKE
+ * ARG0:	channel ID
+ * ARG1:	cookie
+ * ARG2:	ldc_mtable_entry cookie
+ * RET0:	status
+ */
+#define HV_FAST_LDC_REVOKE		0xef
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_ldc_tx_qconf(unsigned long channel,
+				 unsigned long ra,
+				 unsigned long num_entries);
+unsigned long sun4v_ldc_tx_qinfo(unsigned long channel,
+				 unsigned long *ra,
+				 unsigned long *num_entries);
+unsigned long sun4v_ldc_tx_get_state(unsigned long channel,
+				     unsigned long *head_off,
+				     unsigned long *tail_off,
+				     unsigned long *chan_state);
+unsigned long sun4v_ldc_tx_set_qtail(unsigned long channel,
+				     unsigned long tail_off);
+unsigned long sun4v_ldc_rx_qconf(unsigned long channel,
+				 unsigned long ra,
+				 unsigned long num_entries);
+unsigned long sun4v_ldc_rx_qinfo(unsigned long channel,
+				 unsigned long *ra,
+				 unsigned long *num_entries);
+unsigned long sun4v_ldc_rx_get_state(unsigned long channel,
+				     unsigned long *head_off,
+				     unsigned long *tail_off,
+				     unsigned long *chan_state);
+unsigned long sun4v_ldc_rx_set_qhead(unsigned long channel,
+				     unsigned long head_off);
+unsigned long sun4v_ldc_set_map_table(unsigned long channel,
+				      unsigned long ra,
+				      unsigned long num_entries);
+unsigned long sun4v_ldc_get_map_table(unsigned long channel,
+				      unsigned long *ra,
+				      unsigned long *num_entries);
+unsigned long sun4v_ldc_copy(unsigned long channel,
+			     unsigned long dir_code,
+			     unsigned long tgt_raddr,
+			     unsigned long lcl_raddr,
+			     unsigned long len,
+			     unsigned long *actual_len);
+unsigned long sun4v_ldc_mapin(unsigned long channel,
+			      unsigned long cookie,
+			      unsigned long *ra,
+			      unsigned long *perm);
+unsigned long sun4v_ldc_unmap(unsigned long ra);
+unsigned long sun4v_ldc_revoke(unsigned long channel,
+			       unsigned long cookie,
+			       unsigned long mte_cookie);
+#endif
+
+/* Performance counter services.  */
+
+#define HV_PERF_JBUS_PERF_CTRL_REG	0x00
+#define HV_PERF_JBUS_PERF_CNT_REG	0x01
+#define HV_PERF_DRAM_PERF_CTRL_REG_0	0x02
+#define HV_PERF_DRAM_PERF_CNT_REG_0	0x03
+#define HV_PERF_DRAM_PERF_CTRL_REG_1	0x04
+#define HV_PERF_DRAM_PERF_CNT_REG_1	0x05
+#define HV_PERF_DRAM_PERF_CTRL_REG_2	0x06
+#define HV_PERF_DRAM_PERF_CNT_REG_2	0x07
+#define HV_PERF_DRAM_PERF_CTRL_REG_3	0x08
+#define HV_PERF_DRAM_PERF_CNT_REG_3	0x09
+
+/* get_perfreg()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_GET_PERFREG
+ * ARG0:	performance reg number
+ * RET0:	status
+ * RET1:	performance reg value
+ * ERRORS:	EINVAL		Invalid performance register number
+ *		ENOACCESS	No access allowed to performance counters
+ *
+ * Read the value of the given DRAM/JBUS performance counter/control register.
+ */
+#define HV_FAST_GET_PERFREG		0x100
+
+/* set_perfreg()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_SET_PERFREG
+ * ARG0:	performance reg number
+ * ARG1:	performance reg value
+ * RET0:	status
+ * ERRORS:	EINVAL		Invalid performance register number
+ *		ENOACCESS	No access allowed to performance counters
+ *
+ * Write the given performance reg value to the given DRAM/JBUS
+ * performance counter/control register.
+ */
+#define HV_FAST_SET_PERFREG		0x101
+
+#define HV_N2_PERF_SPARC_CTL		0x0
+#define HV_N2_PERF_DRAM_CTL0		0x1
+#define HV_N2_PERF_DRAM_CNT0		0x2
+#define HV_N2_PERF_DRAM_CTL1		0x3
+#define HV_N2_PERF_DRAM_CNT1		0x4
+#define HV_N2_PERF_DRAM_CTL2		0x5
+#define HV_N2_PERF_DRAM_CNT2		0x6
+#define HV_N2_PERF_DRAM_CTL3		0x7
+#define HV_N2_PERF_DRAM_CNT3		0x8
+
+#define HV_FAST_N2_GET_PERFREG		0x104
+#define HV_FAST_N2_SET_PERFREG		0x105
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_niagara_getperf(unsigned long reg,
+				    unsigned long *val);
+unsigned long sun4v_niagara_setperf(unsigned long reg,
+				    unsigned long val);
+unsigned long sun4v_niagara2_getperf(unsigned long reg,
+				     unsigned long *val);
+unsigned long sun4v_niagara2_setperf(unsigned long reg,
+				     unsigned long val);
+#endif
+
+/* MMU statistics services.
+ *
+ * The hypervisor maintains MMU statistics and privileged code provides
+ * a buffer where these statistics can be collected.  It is continually
+ * updated once configured.  The layout is as follows:
+ */
+#ifndef __ASSEMBLY__
+struct hv_mmu_statistics {
+	unsigned long immu_tsb_hits_ctx0_8k_tte;
+	unsigned long immu_tsb_ticks_ctx0_8k_tte;
+	unsigned long immu_tsb_hits_ctx0_64k_tte;
+	unsigned long immu_tsb_ticks_ctx0_64k_tte;
+	unsigned long __reserved1[2];
+	unsigned long immu_tsb_hits_ctx0_4mb_tte;
+	unsigned long immu_tsb_ticks_ctx0_4mb_tte;
+	unsigned long __reserved2[2];
+	unsigned long immu_tsb_hits_ctx0_256mb_tte;
+	unsigned long immu_tsb_ticks_ctx0_256mb_tte;
+	unsigned long __reserved3[4];
+	unsigned long immu_tsb_hits_ctxnon0_8k_tte;
+	unsigned long immu_tsb_ticks_ctxnon0_8k_tte;
+	unsigned long immu_tsb_hits_ctxnon0_64k_tte;
+	unsigned long immu_tsb_ticks_ctxnon0_64k_tte;
+	unsigned long __reserved4[2];
+	unsigned long immu_tsb_hits_ctxnon0_4mb_tte;
+	unsigned long immu_tsb_ticks_ctxnon0_4mb_tte;
+	unsigned long __reserved5[2];
+	unsigned long immu_tsb_hits_ctxnon0_256mb_tte;
+	unsigned long immu_tsb_ticks_ctxnon0_256mb_tte;
+	unsigned long __reserved6[4];
+	unsigned long dmmu_tsb_hits_ctx0_8k_tte;
+	unsigned long dmmu_tsb_ticks_ctx0_8k_tte;
+	unsigned long dmmu_tsb_hits_ctx0_64k_tte;
+	unsigned long dmmu_tsb_ticks_ctx0_64k_tte;
+	unsigned long __reserved7[2];
+	unsigned long dmmu_tsb_hits_ctx0_4mb_tte;
+	unsigned long dmmu_tsb_ticks_ctx0_4mb_tte;
+	unsigned long __reserved8[2];
+	unsigned long dmmu_tsb_hits_ctx0_256mb_tte;
+	unsigned long dmmu_tsb_ticks_ctx0_256mb_tte;
+	unsigned long __reserved9[4];
+	unsigned long dmmu_tsb_hits_ctxnon0_8k_tte;
+	unsigned long dmmu_tsb_ticks_ctxnon0_8k_tte;
+	unsigned long dmmu_tsb_hits_ctxnon0_64k_tte;
+	unsigned long dmmu_tsb_ticks_ctxnon0_64k_tte;
+	unsigned long __reserved10[2];
+	unsigned long dmmu_tsb_hits_ctxnon0_4mb_tte;
+	unsigned long dmmu_tsb_ticks_ctxnon0_4mb_tte;
+	unsigned long __reserved11[2];
+	unsigned long dmmu_tsb_hits_ctxnon0_256mb_tte;
+	unsigned long dmmu_tsb_ticks_ctxnon0_256mb_tte;
+	unsigned long __reserved12[4];
+};
+#endif
+
+/* mmustat_conf()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMUSTAT_CONF
+ * ARG0:	real address
+ * RET0:	status
+ * RET1:	real address
+ * ERRORS:	ENORADDR	Invalid real address
+ *		EBADALIGN	Real address not aligned on 64-byte boundary
+ *		EBADTRAP	API not supported on this processor
+ *
+ * Enable MMU statistic gathering using the buffer at the given real
+ * address on the current virtual CPU.  The new buffer real address
+ * is given in ARG1, and the previously specified buffer real address
+ * is returned in RET1, or is returned as zero for the first invocation.
+ *
+ * If the passed in real address argument is zero, this will disable
+ * MMU statistic collection on the current virtual CPU.  If an error is
+ * returned then no statistics are collected.
+ *
+ * The buffer contents should be initialized to all zeros before being
+ * given to the hypervisor or else the statistics will be meaningless.
+ */
+#define HV_FAST_MMUSTAT_CONF		0x102
+
+/* mmustat_info()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_MMUSTAT_INFO
+ * RET0:	status
+ * RET1:	real address
+ * ERRORS:	EBADTRAP	API not supported on this processor
+ *
+ * Return the current state and real address of the currently configured
+ * MMU statistics buffer on the current virtual CPU.
+ */
+#define HV_FAST_MMUSTAT_INFO		0x103
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_mmustat_conf(unsigned long ra, unsigned long *orig_ra);
+unsigned long sun4v_mmustat_info(unsigned long *ra);
+#endif
+
+/* NCS crypto services  */
+
+/* ncs_request() sub-function numbers */
+#define HV_NCS_QCONF			0x01
+#define HV_NCS_QTAIL_UPDATE		0x02
+
+#ifndef __ASSEMBLY__
+struct hv_ncs_queue_entry {
+	/* MAU Control Register */
+	unsigned long	mau_control;
+#define MAU_CONTROL_INV_PARITY	0x0000000000002000
+#define MAU_CONTROL_STRAND	0x0000000000001800
+#define MAU_CONTROL_BUSY	0x0000000000000400
+#define MAU_CONTROL_INT		0x0000000000000200
+#define MAU_CONTROL_OP		0x00000000000001c0
+#define MAU_CONTROL_OP_SHIFT	6
+#define MAU_OP_LOAD_MA_MEMORY	0x0
+#define MAU_OP_STORE_MA_MEMORY	0x1
+#define MAU_OP_MODULAR_MULT	0x2
+#define MAU_OP_MODULAR_REDUCE	0x3
+#define MAU_OP_MODULAR_EXP_LOOP	0x4
+#define MAU_CONTROL_LEN		0x000000000000003f
+#define MAU_CONTROL_LEN_SHIFT	0
+
+	/* Real address of bytes to load or store bytes
+	 * into/out-of the MAU.
+	 */
+	unsigned long	mau_mpa;
+
+	/* Modular Arithmetic MA Offset Register.  */
+	unsigned long	mau_ma;
+
+	/* Modular Arithmetic N Prime Register.  */
+	unsigned long	mau_np;
+};
+
+struct hv_ncs_qconf_arg {
+	unsigned long	mid;      /* MAU ID, 1 per core on Niagara */
+	unsigned long	base;     /* Real address base of queue */
+	unsigned long	end;	  /* Real address end of queue */
+	unsigned long	num_ents; /* Number of entries in queue */
+};
+
+struct hv_ncs_qtail_update_arg {
+	unsigned long	mid;      /* MAU ID, 1 per core on Niagara */
+	unsigned long	tail;     /* New tail index to use */
+	unsigned long	syncflag; /* only SYNCFLAG_SYNC is implemented */
+#define HV_NCS_SYNCFLAG_SYNC	0x00
+#define HV_NCS_SYNCFLAG_ASYNC	0x01
+};
+#endif
+
+/* ncs_request()
+ * TRAP:	HV_FAST_TRAP
+ * FUNCTION:	HV_FAST_NCS_REQUEST
+ * ARG0:	NCS sub-function
+ * ARG1:	sub-function argument real address
+ * ARG2:	size in bytes of sub-function argument
+ * RET0:	status
+ *
+ * The MAU chip of the Niagara processor is not directly accessible
+ * to privileged code, instead it is programmed indirectly via this
+ * hypervisor API.
+ *
+ * The interfaces defines a queue of MAU operations to perform.
+ * Privileged code registers a queue with the hypervisor by invoking
+ * this HVAPI with the HV_NCS_QCONF sub-function, which defines the
+ * base, end, and number of entries of the queue.  Each queue entry
+ * contains a MAU register struct block.
+ *
+ * The privileged code then proceeds to add entries to the queue and
+ * then invoke the HV_NCS_QTAIL_UPDATE sub-function.  Since only
+ * synchronous operations are supported by the current hypervisor,
+ * HV_NCS_QTAIL_UPDATE will run all the pending queue entries to
+ * completion and return HV_EOK, or return an error code.
+ *
+ * The real address of the sub-function argument must be aligned on at
+ * least an 8-byte boundary.
+ *
+ * The tail argument of HV_NCS_QTAIL_UPDATE is an index, not a byte
+ * offset, into the queue and must be less than or equal the 'num_ents'
+ * argument given in the HV_NCS_QCONF call.
+ */
+#define HV_FAST_NCS_REQUEST		0x110
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_ncs_request(unsigned long request,
+			        unsigned long arg_ra,
+			        unsigned long arg_size);
+#endif
+
+#define HV_FAST_FIRE_GET_PERFREG	0x120
+#define HV_FAST_FIRE_SET_PERFREG	0x121
+
+#define HV_FAST_REBOOT_DATA_SET		0x172
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_reboot_data_set(unsigned long ra,
+				    unsigned long len);
+#endif
+
+#define HV_FAST_VT_GET_PERFREG		0x184
+#define HV_FAST_VT_SET_PERFREG		0x185
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_vt_get_perfreg(unsigned long reg_num,
+				   unsigned long *reg_val);
+unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
+				   unsigned long reg_val);
+#endif
+
+#define	HV_FAST_T5_GET_PERFREG		0x1a8
+#define	HV_FAST_T5_SET_PERFREG		0x1a9
+
+#ifndef	__ASSEMBLY__
+unsigned long sun4v_t5_get_perfreg(unsigned long reg_num,
+				   unsigned long *reg_val);
+unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
+				   unsigned long reg_val);
+#endif
+
+
+#define HV_FAST_M7_GET_PERFREG	0x43
+#define HV_FAST_M7_SET_PERFREG	0x44
+
+#ifndef	__ASSEMBLY__
+unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
+				      unsigned long *reg_val);
+unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
+				      unsigned long reg_val);
+#endif
+
+/* Function numbers for HV_CORE_TRAP.  */
+#define HV_CORE_SET_VER			0x00
+#define HV_CORE_PUTCHAR			0x01
+#define HV_CORE_EXIT			0x02
+#define HV_CORE_GET_VER			0x03
+
+/* Hypervisor API groups for use with HV_CORE_SET_VER and
+ * HV_CORE_GET_VER.
+ */
+#define HV_GRP_SUN4V			0x0000
+#define HV_GRP_CORE			0x0001
+#define HV_GRP_INTR			0x0002
+#define HV_GRP_SOFT_STATE		0x0003
+#define HV_GRP_TM			0x0080
+#define HV_GRP_PCI			0x0100
+#define HV_GRP_LDOM			0x0101
+#define HV_GRP_SVC_CHAN			0x0102
+#define HV_GRP_NCS			0x0103
+#define HV_GRP_RNG			0x0104
+#define HV_GRP_PBOOT			0x0105
+#define HV_GRP_TPM			0x0107
+#define HV_GRP_SDIO			0x0108
+#define HV_GRP_SDIO_ERR			0x0109
+#define HV_GRP_REBOOT_DATA		0x0110
+#define HV_GRP_ATU			0x0111
+#define HV_GRP_DAX			0x0113
+#define HV_GRP_M7_PERF			0x0114
+#define HV_GRP_NIAG_PERF		0x0200
+#define HV_GRP_FIRE_PERF		0x0201
+#define HV_GRP_N2_CPU			0x0202
+#define HV_GRP_NIU			0x0204
+#define HV_GRP_VF_CPU			0x0205
+#define HV_GRP_KT_CPU			0x0209
+#define HV_GRP_VT_CPU			0x020c
+#define HV_GRP_T5_CPU			0x0211
+#define HV_GRP_DIAG			0x0300
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_get_version(unsigned long group,
+			        unsigned long *major,
+			        unsigned long *minor);
+unsigned long sun4v_set_version(unsigned long group,
+			        unsigned long major,
+			        unsigned long minor,
+			        unsigned long *actual_minor);
+
+int sun4v_hvapi_register(unsigned long group, unsigned long major,
+			 unsigned long *minor);
+void sun4v_hvapi_unregister(unsigned long group);
+int sun4v_hvapi_get(unsigned long group,
+		    unsigned long *major,
+		    unsigned long *minor);
+void sun4v_hvapi_init(void);
+#endif
+
+#endif /* !(_SPARC64_HYPERVISOR_H) */
diff --git a/arch/sparc/include/asm/ide.h b/arch/sparc/include/asm/ide.h
new file mode 100644
index 0000000..09f0265
--- /dev/null
+++ b/arch/sparc/include/asm/ide.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ide.h: SPARC PCI specific IDE glue.
+ *
+ * Copyright (C) 1997  David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998  Eddie C. Dost   (ecd@skynet.be)
+ * Adaptation from sparc64 version to sparc by Pete Zaitcev.
+ */
+
+#ifndef _SPARC_IDE_H
+#define _SPARC_IDE_H
+
+#ifdef __KERNEL__
+
+#include <asm/io.h>
+#ifdef CONFIG_SPARC64
+#include <asm/pgalloc.h>
+#include <asm/spitfire.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+#else
+#include <asm/pgtable.h>
+#include <asm/psr.h>
+#endif
+
+#define __ide_insl(data_reg, buffer, wcount) \
+	__ide_insw(data_reg, buffer, (wcount)<<1)
+#define __ide_outsl(data_reg, buffer, wcount) \
+	__ide_outsw(data_reg, buffer, (wcount)<<1)
+
+/* On sparc, I/O ports and MMIO registers are accessed identically.  */
+#define __ide_mm_insw	__ide_insw
+#define __ide_mm_insl	__ide_insl
+#define __ide_mm_outsw	__ide_outsw
+#define __ide_mm_outsl	__ide_outsl
+
+static inline void __ide_insw(void __iomem *port, void *dst, u32 count)
+{
+#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
+	unsigned long end = (unsigned long)dst + (count << 1);
+#endif
+	u16 *ps = dst;
+	u32 *pi;
+
+	if(((unsigned long)ps) & 0x2) {
+		*ps++ = __raw_readw(port);
+		count--;
+	}
+	pi = (u32 *)ps;
+	while(count >= 2) {
+		u32 w;
+
+		w  = __raw_readw(port) << 16;
+		w |= __raw_readw(port);
+		*pi++ = w;
+		count -= 2;
+	}
+	ps = (u16 *)pi;
+	if(count)
+		*ps++ = __raw_readw(port);
+
+#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
+	__flush_dcache_range((unsigned long)dst, end);
+#endif
+}
+
+static inline void __ide_outsw(void __iomem *port, const void *src, u32 count)
+{
+#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
+	unsigned long end = (unsigned long)src + (count << 1);
+#endif
+	const u16 *ps = src;
+	const u32 *pi;
+
+	if(((unsigned long)src) & 0x2) {
+		__raw_writew(*ps++, port);
+		count--;
+	}
+	pi = (const u32 *)ps;
+	while(count >= 2) {
+		u32 w;
+
+		w = *pi++;
+		__raw_writew((w >> 16), port);
+		__raw_writew(w, port);
+		count -= 2;
+	}
+	ps = (const u16 *)pi;
+	if(count)
+		__raw_writew(*ps, port);
+
+#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
+	__flush_dcache_range((unsigned long)src, end);
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _SPARC_IDE_H */
diff --git a/arch/sparc/include/asm/idprom.h b/arch/sparc/include/asm/idprom.h
new file mode 100644
index 0000000..4c37219
--- /dev/null
+++ b/arch/sparc/include/asm/idprom.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * idprom.h: Macros and defines for idprom routines
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_IDPROM_H
+#define _SPARC_IDPROM_H
+
+#include <linux/types.h>
+
+struct idprom {
+	u8		id_format;	/* Format identifier (always 0x01) */
+	u8		id_machtype;	/* Machine type */
+	u8		id_ethaddr[6];	/* Hardware ethernet address */
+	s32		id_date;	/* Date of manufacture */
+	u32		id_sernum:24;	/* Unique serial number */
+	u8		id_cksum;	/* Checksum - xor of the data bytes */
+	u8		reserved[16];
+};
+
+extern struct idprom *idprom;
+void idprom_init(void);
+
+#endif /* !(_SPARC_IDPROM_H) */
diff --git a/arch/sparc/include/asm/intr_queue.h b/arch/sparc/include/asm/intr_queue.h
new file mode 100644
index 0000000..d61be2a
--- /dev/null
+++ b/arch/sparc/include/asm/intr_queue.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_INTR_QUEUE_H
+#define _SPARC64_INTR_QUEUE_H
+
+/* Sun4v interrupt queue registers, accessed via ASI_QUEUE.  */
+
+#define INTRQ_CPU_MONDO_HEAD	  0x3c0 /* CPU mondo head	          */
+#define INTRQ_CPU_MONDO_TAIL	  0x3c8 /* CPU mondo tail	          */
+#define INTRQ_DEVICE_MONDO_HEAD	  0x3d0 /* Device mondo head	          */
+#define INTRQ_DEVICE_MONDO_TAIL	  0x3d8 /* Device mondo tail	          */
+#define INTRQ_RESUM_MONDO_HEAD	  0x3e0 /* Resumable error mondo head     */
+#define INTRQ_RESUM_MONDO_TAIL	  0x3e8 /* Resumable error mondo tail     */
+#define INTRQ_NONRESUM_MONDO_HEAD 0x3f0 /* Non-resumable error mondo head */
+#define INTRQ_NONRESUM_MONDO_TAIL 0x3f8 /* Non-resumable error mondo head */
+
+#endif /* !(_SPARC64_INTR_QUEUE_H) */
diff --git a/arch/sparc/include/asm/io-unit.h b/arch/sparc/include/asm/io-unit.h
new file mode 100644
index 0000000..3ce96e8
--- /dev/null
+++ b/arch/sparc/include/asm/io-unit.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* io-unit.h: Definitions for the sun4d IO-UNIT.
+ *
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+#ifndef _SPARC_IO_UNIT_H
+#define _SPARC_IO_UNIT_H
+
+#include <linux/spinlock.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+/* The io-unit handles all virtual to physical address translations
+ * that occur between the SBUS and physical memory.  Access by
+ * the cpu to IO registers and similar go over the xdbus so are
+ * translated by the on chip SRMMU.  The io-unit and the srmmu do
+ * not need to have the same translations at all, in fact most
+ * of the time the translations they handle are a disjunct set.
+ * Basically the io-unit handles all dvma sbus activity.
+ */
+ 
+/* AIEEE, unlike the nice sun4m, these monsters have 
+   fixed DMA range 64M */
+ 
+#define IOUNIT_DMA_BASE	    0xfc000000 /* TOP - 64M */
+#define IOUNIT_DMA_SIZE	    0x04000000 /* 64M */
+/* We use last 1M for sparc_dvma_malloc */
+#define IOUNIT_DVMA_SIZE    0x00100000 /* 1M */
+
+/* The format of an iopte in the external page tables */
+#define IOUPTE_PAGE          0xffffff00 /* Physical page number (PA[35:12])	*/
+#define IOUPTE_CACHE         0x00000080 /* Cached (in Viking/MXCC)		*/
+/* XXX Jakub, find out how to program SBUS streaming cache on XDBUS/sun4d.
+ * XXX Actually, all you should need to do is find out where the registers
+ * XXX are and copy over the sparc64 implementation I wrote.  There may be
+ * XXX some horrible hwbugs though, so be careful.  -DaveM
+ */
+#define IOUPTE_STREAM        0x00000040 /* Translation can use streaming cache	*/
+#define IOUPTE_INTRA	     0x00000008 /* SBUS direct slot->slot transfer	*/
+#define IOUPTE_WRITE         0x00000004 /* Writeable				*/
+#define IOUPTE_VALID         0x00000002 /* IOPTE is valid			*/
+#define IOUPTE_PARITY        0x00000001 /* Parity is checked during DVMA	*/
+
+struct iounit_struct {
+	unsigned long		bmap[(IOUNIT_DMA_SIZE >> (PAGE_SHIFT + 3)) / sizeof(unsigned long)];
+	spinlock_t		lock;
+	iopte_t __iomem		*page_table;
+	unsigned long		rotor[3];
+	unsigned long		limit[4];
+};
+
+#define IOUNIT_BMAP1_START	0x00000000
+#define IOUNIT_BMAP1_END	(IOUNIT_DMA_SIZE >> (PAGE_SHIFT + 1))
+#define IOUNIT_BMAP2_START	IOUNIT_BMAP1_END
+#define IOUNIT_BMAP2_END	IOUNIT_BMAP2_START + (IOUNIT_DMA_SIZE >> (PAGE_SHIFT + 2))
+#define IOUNIT_BMAPM_START	IOUNIT_BMAP2_END
+#define IOUNIT_BMAPM_END	((IOUNIT_DMA_SIZE - IOUNIT_DVMA_SIZE) >> PAGE_SHIFT)
+
+#endif /* !(_SPARC_IO_UNIT_H) */
diff --git a/arch/sparc/include/asm/io.h b/arch/sparc/include/asm/io.h
new file mode 100644
index 0000000..2eefa52
--- /dev/null
+++ b/arch/sparc/include/asm/io.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_IO_H
+#define ___ASM_SPARC_IO_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/io_64.h>
+#else
+#include <asm/io_32.h>
+#endif
+
+/*
+ * Defines used for both SPARC32 and SPARC64
+ */
+
+/* Big endian versions of memory read/write routines */
+#define readb_be(__addr)	__raw_readb(__addr)
+#define readw_be(__addr)	__raw_readw(__addr)
+#define readl_be(__addr)	__raw_readl(__addr)
+#define writeb_be(__b, __addr)	__raw_writeb(__b, __addr)
+#define writel_be(__w, __addr)	__raw_writel(__w, __addr)
+#define writew_be(__l, __addr)	__raw_writew(__l, __addr)
+
+#endif
diff --git a/arch/sparc/include/asm/io_32.h b/arch/sparc/include/asm/io_32.h
new file mode 100644
index 0000000..df2dc17
--- /dev/null
+++ b/arch/sparc/include/asm/io_32.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_IO_H
+#define __SPARC_IO_H
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>  /* struct resource */
+
+#define IO_SPACE_LIMIT 0xffffffff
+
+#define memset_io(d,c,sz)     _memset_io(d,c,sz)
+#define memcpy_fromio(d,s,sz) _memcpy_fromio(d,s,sz)
+#define memcpy_toio(d,s,sz)   _memcpy_toio(d,s,sz)
+
+#include <asm-generic/io.h>
+
+static inline void _memset_io(volatile void __iomem *dst,
+                              int c, __kernel_size_t n)
+{
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		writeb(c, d);
+		d++;
+	}
+}
+
+static inline void _memcpy_fromio(void *dst, const volatile void __iomem *src,
+                                  __kernel_size_t n)
+{
+	char *d = dst;
+
+	while (n--) {
+		char tmp = readb(src);
+		*d++ = tmp;
+		src++;
+	}
+}
+
+static inline void _memcpy_toio(volatile void __iomem *dst, const void *src,
+                                __kernel_size_t n)
+{
+	const char *s = src;
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		char tmp = *s++;
+		writeb(tmp, d);
+		d++;
+	}
+}
+
+/*
+ * SBus accessors.
+ *
+ * SBus has only one, memory mapped, I/O space.
+ * We do not need to flip bytes for SBus of course.
+ */
+static inline u8 sbus_readb(const volatile void __iomem *addr)
+{
+	return *(__force volatile u8 *)addr;
+}
+
+static inline u16 sbus_readw(const volatile void __iomem *addr)
+{
+	return *(__force volatile u16 *)addr;
+}
+
+static inline u32 sbus_readl(const volatile void __iomem *addr)
+{
+	return *(__force volatile u32 *)addr;
+}
+
+static inline void sbus_writeb(u8 b, volatile void __iomem *addr)
+{
+	*(__force volatile u8 *)addr = b;
+}
+
+static inline void sbus_writew(u16 w, volatile void __iomem *addr)
+{
+	*(__force volatile u16 *)addr = w;
+}
+
+static inline void sbus_writel(u32 l, volatile void __iomem *addr)
+{
+	*(__force volatile u32 *)addr = l;
+}
+
+static inline void sbus_memset_io(volatile void __iomem *__dst, int c,
+                                  __kernel_size_t n)
+{
+	while(n--) {
+		sbus_writeb(c, __dst);
+		__dst++;
+	}
+}
+
+static inline void sbus_memcpy_fromio(void *dst,
+                                      const volatile void __iomem *src,
+                                      __kernel_size_t n)
+{
+	char *d = dst;
+
+	while (n--) {
+		char tmp = sbus_readb(src);
+		*d++ = tmp;
+		src++;
+	}
+}
+
+static inline void sbus_memcpy_toio(volatile void __iomem *dst,
+                                    const void *src,
+                                    __kernel_size_t n)
+{
+	const char *s = src;
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		char tmp = *s++;
+		sbus_writeb(tmp, d);
+		d++;
+	}
+}
+
+#ifdef __KERNEL__
+
+/*
+ * Bus number may be embedded in the higher bits of the physical address.
+ * This is why we have no bus number argument to ioremap().
+ */
+void iounmap(volatile void __iomem *addr);
+/* Create a virtual mapping cookie for an IO port range */
+void __iomem *ioport_map(unsigned long port, unsigned int nr);
+void ioport_unmap(void __iomem *);
+
+/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
+struct pci_dev;
+void pci_iounmap(struct pci_dev *dev, void __iomem *);
+
+static inline int sbus_can_dma_64bit(void)
+{
+	return 0; /* actually, sparc_cpu_model==sun4d */
+}
+static inline int sbus_can_burst64(void)
+{
+	return 0; /* actually, sparc_cpu_model==sun4d */
+}
+struct device;
+void sbus_set_sbus64(struct device *, int);
+
+#endif
+
+#define __ARCH_HAS_NO_PAGE_ZERO_MAPPED		1
+
+
+#endif /* !(__SPARC_IO_H) */
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
new file mode 100644
index 0000000..b162c23
--- /dev/null
+++ b/arch/sparc/include/asm/io_64.h
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC64_IO_H
+#define __SPARC64_IO_H
+
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/page.h>      /* IO address mapping routines need this */
+#include <asm/asi.h>
+#include <asm-generic/pci_iomap.h>
+
+/* BIO layer definitions. */
+extern unsigned long kern_base, kern_size;
+
+/* __raw_{read,write}{b,w,l,q} uses direct access.
+ * Access the memory as big endian bypassing the cache
+ * by using ASI_PHYS_BYPASS_EC_E
+ */
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+	u8 ret;
+
+	__asm__ __volatile__("lduba\t[%1] %2, %0\t/* pci_raw_readb */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+	u16 ret;
+
+	__asm__ __volatile__("lduha\t[%1] %2, %0\t/* pci_raw_readw */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+	u32 ret;
+
+	__asm__ __volatile__("lduwa\t[%1] %2, %0\t/* pci_raw_readl */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	u64 ret;
+
+	__asm__ __volatile__("ldxa\t[%1] %2, %0\t/* pci_raw_readq */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, const volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stba\t%r0, [%1] %2\t/* pci_raw_writeb */"
+			     : /* no outputs */
+			     : "Jr" (b), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 w, const volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stha\t%r0, [%1] %2\t/* pci_raw_writew */"
+			     : /* no outputs */
+			     : "Jr" (w), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 l, const volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stwa\t%r0, [%1] %2\t/* pci_raw_writel */"
+			     : /* no outputs */
+			     : "Jr" (l), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 q, const volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stxa\t%r0, [%1] %2\t/* pci_raw_writeq */"
+			     : /* no outputs */
+			     : "Jr" (q), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+/* Memory functions, same as I/O accesses on Ultra.
+ * Access memory as little endian bypassing
+ * the cache by using ASI_PHYS_BYPASS_EC_E_L
+ */
+#define readb readb
+#define readb_relaxed readb
+static inline u8 readb(const volatile void __iomem *addr)
+{	u8 ret;
+
+	__asm__ __volatile__("lduba\t[%1] %2, %0\t/* pci_readb */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+	return ret;
+}
+
+#define readw readw
+#define readw_relaxed readw
+static inline u16 readw(const volatile void __iomem *addr)
+{	u16 ret;
+
+	__asm__ __volatile__("lduha\t[%1] %2, %0\t/* pci_readw */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+
+	return ret;
+}
+
+#define readl readl
+#define readl_relaxed readl
+static inline u32 readl(const volatile void __iomem *addr)
+{	u32 ret;
+
+	__asm__ __volatile__("lduwa\t[%1] %2, %0\t/* pci_readl */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+
+	return ret;
+}
+
+#define readq readq
+#define readq_relaxed readq
+static inline u64 readq(const volatile void __iomem *addr)
+{	u64 ret;
+
+	__asm__ __volatile__("ldxa\t[%1] %2, %0\t/* pci_readq */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+
+	return ret;
+}
+
+#define writeb writeb
+#define writeb_relaxed writeb
+static inline void writeb(u8 b, volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stba\t%r0, [%1] %2\t/* pci_writeb */"
+			     : /* no outputs */
+			     : "Jr" (b), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+}
+
+#define writew writew
+#define writew_relaxed writew
+static inline void writew(u16 w, volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stha\t%r0, [%1] %2\t/* pci_writew */"
+			     : /* no outputs */
+			     : "Jr" (w), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+}
+
+#define writel writel
+#define writel_relaxed writel
+static inline void writel(u32 l, volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stwa\t%r0, [%1] %2\t/* pci_writel */"
+			     : /* no outputs */
+			     : "Jr" (l), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+}
+
+#define writeq writeq
+#define writeq_relaxed writeq
+static inline void writeq(u64 q, volatile void __iomem *addr)
+{
+	__asm__ __volatile__("stxa\t%r0, [%1] %2\t/* pci_writeq */"
+			     : /* no outputs */
+			     : "Jr" (q), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+}
+
+#define inb inb
+static inline u8 inb(unsigned long addr)
+{
+	return readb((volatile void __iomem *)addr);
+}
+
+#define inw inw
+static inline u16 inw(unsigned long addr)
+{
+	return readw((volatile void __iomem *)addr);
+}
+
+#define inl inl
+static inline u32 inl(unsigned long addr)
+{
+	return readl((volatile void __iomem *)addr);
+}
+
+#define outb outb
+static inline void outb(u8 b, unsigned long addr)
+{
+	writeb(b, (volatile void __iomem *)addr);
+}
+
+#define outw outw
+static inline void outw(u16 w, unsigned long addr)
+{
+	writew(w, (volatile void __iomem *)addr);
+}
+
+#define outl outl
+static inline void outl(u32 l, unsigned long addr)
+{
+	writel(l, (volatile void __iomem *)addr);
+}
+
+
+#define inb_p(__addr) 		inb(__addr)
+#define outb_p(__b, __addr)	outb(__b, __addr)
+#define inw_p(__addr)		inw(__addr)
+#define outw_p(__w, __addr)	outw(__w, __addr)
+#define inl_p(__addr)		inl(__addr)
+#define outl_p(__l, __addr)	outl(__l, __addr)
+
+void outsb(unsigned long, const void *, unsigned long);
+void outsw(unsigned long, const void *, unsigned long);
+void outsl(unsigned long, const void *, unsigned long);
+void insb(unsigned long, void *, unsigned long);
+void insw(unsigned long, void *, unsigned long);
+void insl(unsigned long, void *, unsigned long);
+
+static inline void readsb(void __iomem *port, void *buf, unsigned long count)
+{
+	insb((unsigned long __force)port, buf, count);
+}
+static inline void readsw(void __iomem *port, void *buf, unsigned long count)
+{
+	insw((unsigned long __force)port, buf, count);
+}
+
+static inline void readsl(void __iomem *port, void *buf, unsigned long count)
+{
+	insl((unsigned long __force)port, buf, count);
+}
+
+static inline void writesb(void __iomem *port, const void *buf, unsigned long count)
+{
+	outsb((unsigned long __force)port, buf, count);
+}
+
+static inline void writesw(void __iomem *port, const void *buf, unsigned long count)
+{
+	outsw((unsigned long __force)port, buf, count);
+}
+
+static inline void writesl(void __iomem *port, const void *buf, unsigned long count)
+{
+	outsl((unsigned long __force)port, buf, count);
+}
+
+#define ioread8_rep(p,d,l)	readsb(p,d,l)
+#define ioread16_rep(p,d,l)	readsw(p,d,l)
+#define ioread32_rep(p,d,l)	readsl(p,d,l)
+#define iowrite8_rep(p,d,l)	writesb(p,d,l)
+#define iowrite16_rep(p,d,l)	writesw(p,d,l)
+#define iowrite32_rep(p,d,l)	writesl(p,d,l)
+
+/* Valid I/O Space regions are anywhere, because each PCI bus supported
+ * can live in an arbitrary area of the physical address range.
+ */
+#define IO_SPACE_LIMIT 0xffffffffffffffffUL
+
+/* Now, SBUS variants, only difference from PCI is that we do
+ * not use little-endian ASIs.
+ */
+static inline u8 sbus_readb(const volatile void __iomem *addr)
+{
+	return __raw_readb(addr);
+}
+
+static inline u16 sbus_readw(const volatile void __iomem *addr)
+{
+	return __raw_readw(addr);
+}
+
+static inline u32 sbus_readl(const volatile void __iomem *addr)
+{
+	return __raw_readl(addr);
+}
+
+static inline u64 sbus_readq(const volatile void __iomem *addr)
+{
+	return __raw_readq(addr);
+}
+
+static inline void sbus_writeb(u8 b, volatile void __iomem *addr)
+{
+	__raw_writeb(b, addr);
+}
+
+static inline void sbus_writew(u16 w, volatile void __iomem *addr)
+{
+	__raw_writew(w, addr);
+}
+
+static inline void sbus_writel(u32 l, volatile void __iomem *addr)
+{
+	__raw_writel(l, addr);
+}
+
+static inline void sbus_writeq(u64 q, volatile void __iomem *addr)
+{
+	__raw_writeq(q, addr);
+}
+
+static inline void sbus_memset_io(volatile void __iomem *dst, int c, __kernel_size_t n)
+{
+	while(n--) {
+		sbus_writeb(c, dst);
+		dst++;
+	}
+}
+
+static inline void memset_io(volatile void __iomem *dst, int c, __kernel_size_t n)
+{
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		writeb(c, d);
+		d++;
+	}
+}
+
+static inline void sbus_memcpy_fromio(void *dst, const volatile void __iomem *src,
+				      __kernel_size_t n)
+{
+	char *d = dst;
+
+	while (n--) {
+		char tmp = sbus_readb(src);
+		*d++ = tmp;
+		src++;
+	}
+}
+
+
+static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
+				 __kernel_size_t n)
+{
+	char *d = dst;
+
+	while (n--) {
+		char tmp = readb(src);
+		*d++ = tmp;
+		src++;
+	}
+}
+
+static inline void sbus_memcpy_toio(volatile void __iomem *dst, const void *src,
+				    __kernel_size_t n)
+{
+	const char *s = src;
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		char tmp = *s++;
+		sbus_writeb(tmp, d);
+		d++;
+	}
+}
+
+static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
+			       __kernel_size_t n)
+{
+	const char *s = src;
+	volatile void __iomem *d = dst;
+
+	while (n--) {
+		char tmp = *s++;
+		writeb(tmp, d);
+		d++;
+	}
+}
+
+#define mmiowb()
+
+#ifdef __KERNEL__
+
+/* On sparc64 we have the whole physical IO address space accessible
+ * using physically addressed loads and stores, so this does nothing.
+ */
+static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
+{
+	return (void __iomem *)offset;
+}
+
+#define ioremap_nocache(X,Y)		ioremap((X),(Y))
+#define ioremap_wc(X,Y)			ioremap((X),(Y))
+#define ioremap_wt(X,Y)			ioremap((X),(Y))
+
+static inline void iounmap(volatile void __iomem *addr)
+{
+}
+
+#define ioread8			readb
+#define ioread16		readw
+#define ioread16be		__raw_readw
+#define ioread32		readl
+#define ioread32be		__raw_readl
+#define iowrite8		writeb
+#define iowrite16		writew
+#define iowrite16be		__raw_writew
+#define iowrite32		writel
+#define iowrite32be		__raw_writel
+
+/* Create a virtual mapping cookie for an IO port range */
+void __iomem *ioport_map(unsigned long port, unsigned int nr);
+void ioport_unmap(void __iomem *);
+
+/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
+struct pci_dev;
+void pci_iounmap(struct pci_dev *dev, void __iomem *);
+
+static inline int sbus_can_dma_64bit(void)
+{
+	return 1;
+}
+static inline int sbus_can_burst64(void)
+{
+	return 1;
+}
+struct device;
+void sbus_set_sbus64(struct device *, int);
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p)	__va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p)	p
+
+#endif
+
+#endif /* !(__SPARC64_IO_H) */
diff --git a/arch/sparc/include/asm/ioctls.h b/arch/sparc/include/asm/ioctls.h
new file mode 100644
index 0000000..c92ac78
--- /dev/null
+++ b/arch/sparc/include/asm/ioctls.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC_IOCTLS_H
+#define _ASM_SPARC_IOCTLS_H
+
+#include <uapi/asm/ioctls.h>
+
+#define TIOCGETC __TIOCGETC
+#define TIOCGETP __TIOCGETP
+#define TIOCGLTC __TIOCGLTC
+#define TIOCSLTC __TIOCSLTC
+#define TIOCSETP __TIOCSETP
+#define TIOCSETN __TIOCSETN
+#define TIOCSETC __TIOCSETC
+#endif /* !(_ASM_SPARC_IOCTLS_H) */
diff --git a/arch/sparc/include/asm/iommu-common.h b/arch/sparc/include/asm/iommu-common.h
new file mode 100644
index 0000000..802c90c
--- /dev/null
+++ b/arch/sparc/include/asm/iommu-common.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_IOMMU_COMMON_H
+#define _LINUX_IOMMU_COMMON_H
+
+#include <linux/spinlock_types.h>
+#include <linux/device.h>
+#include <asm/page.h>
+
+#define IOMMU_POOL_HASHBITS     4
+#define IOMMU_NR_POOLS          (1 << IOMMU_POOL_HASHBITS)
+#define IOMMU_ERROR_CODE	(~(unsigned long) 0)
+
+struct iommu_pool {
+	unsigned long	start;
+	unsigned long	end;
+	unsigned long	hint;
+	spinlock_t	lock;
+};
+
+struct iommu_map_table {
+	unsigned long		table_map_base;
+	unsigned long		table_shift;
+	unsigned long		nr_pools;
+	void			(*lazy_flush)(struct iommu_map_table *);
+	unsigned long		poolsize;
+	struct iommu_pool	pools[IOMMU_NR_POOLS];
+	u32			flags;
+#define	IOMMU_HAS_LARGE_POOL	0x00000001
+#define	IOMMU_NO_SPAN_BOUND	0x00000002
+#define	IOMMU_NEED_FLUSH	0x00000004
+	struct iommu_pool	large_pool;
+	unsigned long		*map;
+};
+
+extern void iommu_tbl_pool_init(struct iommu_map_table *iommu,
+				unsigned long num_entries,
+				u32 table_shift,
+				void (*lazy_flush)(struct iommu_map_table *),
+				bool large_pool, u32 npools,
+				bool skip_span_boundary_check);
+
+extern unsigned long iommu_tbl_range_alloc(struct device *dev,
+					   struct iommu_map_table *iommu,
+					   unsigned long npages,
+					   unsigned long *handle,
+					   unsigned long mask,
+					   unsigned int align_order);
+
+extern void iommu_tbl_range_free(struct iommu_map_table *iommu,
+				 u64 dma_addr, unsigned long npages,
+				 unsigned long entry);
+
+#endif
diff --git a/arch/sparc/include/asm/iommu.h b/arch/sparc/include/asm/iommu.h
new file mode 100644
index 0000000..37935cb
--- /dev/null
+++ b/arch/sparc/include/asm/iommu.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_IOMMU_H
+#define ___ASM_SPARC_IOMMU_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/iommu_64.h>
+#else
+#include <asm/iommu_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/iommu_32.h b/arch/sparc/include/asm/iommu_32.h
new file mode 100644
index 0000000..af51cd5
--- /dev/null
+++ b/arch/sparc/include/asm/iommu_32.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* iommu.h: Definitions for the sun4m IOMMU.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef _SPARC_IOMMU_H
+#define _SPARC_IOMMU_H
+
+#include <asm/page.h>
+#include <asm/bitext.h>
+
+/* The iommu handles all virtual to physical address translations
+ * that occur between the SBUS and physical memory.  Access by
+ * the cpu to IO registers and similar go over the mbus so are
+ * translated by the on chip SRMMU.  The iommu and the srmmu do
+ * not need to have the same translations at all, in fact most
+ * of the time the translations they handle are a disjunct set.
+ * Basically the iommu handles all dvma sbus activity.
+ */
+
+/* The IOMMU registers occupy three pages in IO space. */
+struct iommu_regs {
+	/* First page */
+	volatile unsigned long control;    /* IOMMU control */
+	volatile unsigned long base;       /* Physical base of iopte page table */
+	volatile unsigned long _unused1[3];
+	volatile unsigned long tlbflush;   /* write only */
+	volatile unsigned long pageflush;  /* write only */
+	volatile unsigned long _unused2[1017];
+	/* Second page */
+	volatile unsigned long afsr;       /* Async-fault status register */
+	volatile unsigned long afar;       /* Async-fault physical address */
+	volatile unsigned long _unused3[2];
+	volatile unsigned long sbuscfg0;   /* SBUS configuration registers, per-slot */
+	volatile unsigned long sbuscfg1;
+	volatile unsigned long sbuscfg2;
+	volatile unsigned long sbuscfg3;
+	volatile unsigned long mfsr;       /* Memory-fault status register */
+	volatile unsigned long mfar;       /* Memory-fault physical address */
+	volatile unsigned long _unused4[1014];
+	/* Third page */
+	volatile unsigned long mid;        /* IOMMU module-id */
+};
+
+#define IOMMU_CTRL_IMPL     0xf0000000 /* Implementation */
+#define IOMMU_CTRL_VERS     0x0f000000 /* Version */
+#define IOMMU_CTRL_RNGE     0x0000001c /* Mapping RANGE */
+#define IOMMU_RNGE_16MB     0x00000000 /* 0xff000000 -> 0xffffffff */
+#define IOMMU_RNGE_32MB     0x00000004 /* 0xfe000000 -> 0xffffffff */
+#define IOMMU_RNGE_64MB     0x00000008 /* 0xfc000000 -> 0xffffffff */
+#define IOMMU_RNGE_128MB    0x0000000c /* 0xf8000000 -> 0xffffffff */
+#define IOMMU_RNGE_256MB    0x00000010 /* 0xf0000000 -> 0xffffffff */
+#define IOMMU_RNGE_512MB    0x00000014 /* 0xe0000000 -> 0xffffffff */
+#define IOMMU_RNGE_1GB      0x00000018 /* 0xc0000000 -> 0xffffffff */
+#define IOMMU_RNGE_2GB      0x0000001c /* 0x80000000 -> 0xffffffff */
+#define IOMMU_CTRL_ENAB     0x00000001 /* IOMMU Enable */
+
+#define IOMMU_AFSR_ERR      0x80000000 /* LE, TO, or BE asserted */
+#define IOMMU_AFSR_LE       0x40000000 /* SBUS reports error after transaction */
+#define IOMMU_AFSR_TO       0x20000000 /* Write access took more than 12.8 us. */
+#define IOMMU_AFSR_BE       0x10000000 /* Write access received error acknowledge */
+#define IOMMU_AFSR_SIZE     0x0e000000 /* Size of transaction causing error */
+#define IOMMU_AFSR_S        0x01000000 /* Sparc was in supervisor mode */
+#define IOMMU_AFSR_RESV     0x00f00000 /* Reserver, forced to 0x8 by hardware */
+#define IOMMU_AFSR_ME       0x00080000 /* Multiple errors occurred */
+#define IOMMU_AFSR_RD       0x00040000 /* A read operation was in progress */
+#define IOMMU_AFSR_FAV      0x00020000 /* IOMMU afar has valid contents */
+
+#define IOMMU_SBCFG_SAB30   0x00010000 /* Phys-address bit 30 when bypass enabled */
+#define IOMMU_SBCFG_BA16    0x00000004 /* Slave supports 16 byte bursts */
+#define IOMMU_SBCFG_BA8     0x00000002 /* Slave supports 8 byte bursts */
+#define IOMMU_SBCFG_BYPASS  0x00000001 /* Bypass IOMMU, treat all addresses
+					  produced by this device as pure
+					  physical. */
+
+#define IOMMU_MFSR_ERR      0x80000000 /* One or more of PERR1 or PERR0 */
+#define IOMMU_MFSR_S        0x01000000 /* Sparc was in supervisor mode */
+#define IOMMU_MFSR_CPU      0x00800000 /* CPU transaction caused parity error */
+#define IOMMU_MFSR_ME       0x00080000 /* Multiple parity errors occurred */
+#define IOMMU_MFSR_PERR     0x00006000 /* high bit indicates parity error occurred
+					  on the even word of the access, low bit
+					  indicated odd word caused the parity error */
+#define IOMMU_MFSR_BM       0x00001000 /* Error occurred while in boot mode */
+#define IOMMU_MFSR_C        0x00000800 /* Address causing error was marked cacheable */
+#define IOMMU_MFSR_RTYP     0x000000f0 /* Memory request transaction type */
+
+#define IOMMU_MID_SBAE      0x001f0000 /* SBus arbitration enable */
+#define IOMMU_MID_SE        0x00100000 /* Enables SCSI/ETHERNET arbitration */
+#define IOMMU_MID_SB3       0x00080000 /* Enable SBUS device 3 arbitration */
+#define IOMMU_MID_SB2       0x00040000 /* Enable SBUS device 2 arbitration */
+#define IOMMU_MID_SB1       0x00020000 /* Enable SBUS device 1 arbitration */
+#define IOMMU_MID_SB0       0x00010000 /* Enable SBUS device 0 arbitration */
+#define IOMMU_MID_MID       0x0000000f /* Module-id, hardcoded to 0x8 */
+
+/* The format of an iopte in the page tables */
+#define IOPTE_PAGE          0x07ffff00 /* Physical page number (PA[30:12]) */
+#define IOPTE_CACHE         0x00000080 /* Cached (in vme IOCACHE or Viking/MXCC) */
+#define IOPTE_WRITE         0x00000004 /* Writeable */
+#define IOPTE_VALID         0x00000002 /* IOPTE is valid */
+#define IOPTE_WAZ           0x00000001 /* Write as zeros */
+
+struct iommu_struct {
+	struct iommu_regs __iomem *regs;
+	iopte_t *page_table;
+	/* For convenience */
+	unsigned long start; /* First managed virtual address */
+	unsigned long end;   /* Last managed virtual address */
+
+	struct bit_map usemap;
+};
+
+static inline void iommu_invalidate(struct iommu_regs __iomem *regs)
+{
+	sbus_writel(0, &regs->tlbflush);
+}
+
+static inline void iommu_invalidate_page(struct iommu_regs __iomem *regs, unsigned long ba)
+{
+	sbus_writel(ba & PAGE_MASK, &regs->pageflush);
+}
+
+#endif /* !(_SPARC_IOMMU_H) */
diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h
new file mode 100644
index 0000000..0ef6ded
--- /dev/null
+++ b/arch/sparc/include/asm/iommu_64.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* iommu.h: Definitions for the sun5 IOMMU.
+ *
+ * Copyright (C) 1996, 1999, 2007 David S. Miller (davem@davemloft.net)
+ */
+#ifndef _SPARC64_IOMMU_H
+#define _SPARC64_IOMMU_H
+
+/* The format of an iopte in the page tables. */
+#define IOPTE_VALID   0x8000000000000000UL
+#define IOPTE_64K     0x2000000000000000UL
+#define IOPTE_STBUF   0x1000000000000000UL
+#define IOPTE_INTRA   0x0800000000000000UL
+#define IOPTE_CONTEXT 0x07ff800000000000UL
+#define IOPTE_PAGE    0x00007fffffffe000UL
+#define IOPTE_CACHE   0x0000000000000010UL
+#define IOPTE_WRITE   0x0000000000000002UL
+
+#define IOMMU_NUM_CTXS	4096
+#include <asm/iommu-common.h>
+
+struct iommu_arena {
+	unsigned long	*map;
+	unsigned int	hint;
+	unsigned int	limit;
+};
+
+#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */
+
+/* Data structures for SPARC ATU architecture */
+struct atu_iotsb {
+	void	*table;		/* IOTSB table base virtual addr*/
+	u64	ra;		/* IOTSB table real addr */
+	u64	dvma_size;	/* ranges[3].size or OS slected 32G size */
+	u64	dvma_base;	/* ranges[3].base */
+	u64	table_size;	/* IOTSB table size */
+	u64	page_size;	/* IO PAGE size for IOTSB */
+	u32	iotsb_num;	/* tsbnum is same as iotsb_handle */
+};
+
+struct atu_ranges {
+	u64	base;
+	u64	size;
+};
+
+struct atu {
+	struct	atu_ranges	*ranges;
+	struct	atu_iotsb	*iotsb;
+	struct	iommu_map_table	tbl;
+	u64			base;
+	u64			size;
+	u64			dma_addr_mask;
+};
+
+struct iommu {
+	struct iommu_map_table	tbl;
+	struct atu		*atu;
+	spinlock_t		lock;
+	u32			dma_addr_mask;
+	iopte_t			*page_table;
+	unsigned long		iommu_control;
+	unsigned long		iommu_tsbbase;
+	unsigned long		iommu_flush;
+	unsigned long		iommu_flushinv;
+	unsigned long		iommu_tags;
+	unsigned long		iommu_ctxflush;
+	unsigned long		write_complete_reg;
+	unsigned long		dummy_page;
+	unsigned long		dummy_page_pa;
+	unsigned long		ctx_lowest_free;
+	DECLARE_BITMAP(ctx_bitmap, IOMMU_NUM_CTXS);
+};
+
+struct strbuf {
+	int			strbuf_enabled;
+	unsigned long		strbuf_control;
+	unsigned long		strbuf_pflush;
+	unsigned long		strbuf_fsync;
+	unsigned long		strbuf_err_stat;
+	unsigned long		strbuf_tag_diag;
+	unsigned long		strbuf_line_diag;
+	unsigned long		strbuf_ctxflush;
+	unsigned long		strbuf_ctxmatch_base;
+	unsigned long		strbuf_flushflag_pa;
+	volatile unsigned long *strbuf_flushflag;
+	volatile unsigned long	__flushflag_buf[(64+(64-1)) / sizeof(long)];
+};
+
+int iommu_table_init(struct iommu *iommu, int tsbsize,
+		     u32 dma_offset, u32 dma_addr_mask,
+		     int numa_node);
+
+#endif /* !(_SPARC64_IOMMU_H) */
diff --git a/arch/sparc/include/asm/irq.h b/arch/sparc/include/asm/irq.h
new file mode 100644
index 0000000..a2efc27
--- /dev/null
+++ b/arch/sparc/include/asm/irq.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_IRQ_H
+#define ___ASM_SPARC_IRQ_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/irq_64.h>
+#else
+#include <asm/irq_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/irq_32.h b/arch/sparc/include/asm/irq_32.h
new file mode 100644
index 0000000..43ec260
--- /dev/null
+++ b/arch/sparc/include/asm/irq_32.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* irq.h: IRQ registers on the Sparc.
+ *
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _SPARC_IRQ_H
+#define _SPARC_IRQ_H
+
+/* Allocated number of logical irq numbers.
+ * sun4d boxes (ss2000e) should be OK with ~32.
+ * Be on the safe side and make room for 64
+ */
+#define NR_IRQS    64
+
+#include <linux/interrupt.h>
+
+#define irq_canonicalize(irq)	(irq)
+
+void __init init_IRQ(void);
+void __init sun4d_init_sbi_irq(void);
+
+#define NO_IRQ		0xffffffff
+
+#endif
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
new file mode 100644
index 0000000..4d748e9
--- /dev/null
+++ b/arch/sparc/include/asm/irq_64.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* irq.h: IRQ registers on the 64-bit Sparc.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#ifndef _SPARC64_IRQ_H
+#define _SPARC64_IRQ_H
+
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <asm/pil.h>
+#include <asm/ptrace.h>
+
+/* IMAP/ICLR register defines */
+#define IMAP_VALID		0x80000000UL	/* IRQ Enabled		*/
+#define IMAP_TID_UPA		0x7c000000UL	/* UPA TargetID		*/
+#define IMAP_TID_JBUS		0x7c000000UL	/* JBUS TargetID	*/
+#define IMAP_TID_SHIFT		26
+#define IMAP_AID_SAFARI		0x7c000000UL	/* Safari AgentID	*/
+#define IMAP_AID_SHIFT		26
+#define IMAP_NID_SAFARI		0x03e00000UL	/* Safari NodeID	*/
+#define IMAP_NID_SHIFT		21
+#define IMAP_IGN		0x000007c0UL	/* IRQ Group Number	*/
+#define IMAP_INO		0x0000003fUL	/* IRQ Number		*/
+#define IMAP_INR		0x000007ffUL	/* Full interrupt number*/
+
+#define ICLR_IDLE		0x00000000UL	/* Idle state		*/
+#define ICLR_TRANSMIT		0x00000001UL	/* Transmit state	*/
+#define ICLR_PENDING		0x00000003UL	/* Pending state	*/
+
+/* The largest number of unique interrupt sources we support.
+ * If this needs to ever be larger than 255, you need to change
+ * the type of ino_bucket->irq as appropriate.
+ *
+ * ino_bucket->irq allocation is made during {sun4v_,}build_irq().
+ */
+#define NR_IRQS		(2048)
+
+void irq_install_pre_handler(int irq,
+			     void (*func)(unsigned int, void *, void *),
+			     void *arg1, void *arg2);
+#define irq_canonicalize(irq)	(irq)
+unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap);
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino);
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino);
+unsigned int sun4v_build_msi(u32 devhandle, unsigned int *irq_p,
+			     unsigned int msi_devino_start,
+			     unsigned int msi_devino_end);
+void sun4v_destroy_msi(unsigned int irq);
+unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p,
+			     unsigned int msi_devino_start,
+			     unsigned int msi_devino_end,
+			     unsigned long imap_base,
+			     unsigned long iclr_base);
+void sun4u_destroy_msi(unsigned int irq);
+
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino);
+void irq_free(unsigned int irq);
+
+void __init init_IRQ(void);
+void fixup_irqs(void);
+
+static inline void set_softint(unsigned long bits)
+{
+	__asm__ __volatile__("wr	%0, 0x0, %%set_softint"
+			     : /* No outputs */
+			     : "r" (bits));
+}
+
+static inline void clear_softint(unsigned long bits)
+{
+	__asm__ __volatile__("wr	%0, 0x0, %%clear_softint"
+			     : /* No outputs */
+			     : "r" (bits));
+}
+
+static inline unsigned long get_softint(void)
+{
+	unsigned long retval;
+
+	__asm__ __volatile__("rd	%%softint, %0"
+			     : "=r" (retval));
+	return retval;
+}
+
+void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
+				    bool exclude_self);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+
+extern void *hardirq_stack[NR_CPUS];
+extern void *softirq_stack[NR_CPUS];
+#define __ARCH_HAS_DO_SOFTIRQ
+
+#define NO_IRQ		0xffffffff
+
+#endif
diff --git a/arch/sparc/include/asm/irqflags.h b/arch/sparc/include/asm/irqflags.h
new file mode 100644
index 0000000..d1bdf6c
--- /dev/null
+++ b/arch/sparc/include/asm/irqflags.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_IRQFLAGS_H
+#define ___ASM_SPARC_IRQFLAGS_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/irqflags_64.h>
+#else
+#include <asm/irqflags_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/irqflags_32.h b/arch/sparc/include/asm/irqflags_32.h
new file mode 100644
index 0000000..7ca3eaf
--- /dev/null
+++ b/arch/sparc/include/asm/irqflags_32.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * arch_local_irq_*() functions from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/psr.h>
+
+void arch_local_irq_restore(unsigned long);
+unsigned long arch_local_irq_save(void);
+void arch_local_irq_enable(void);
+
+static inline notrace unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+
+	asm volatile("rd        %%psr, %0" : "=r" (flags));
+	return flags;
+}
+
+static inline notrace void arch_local_irq_disable(void)
+{
+	arch_local_irq_save();
+}
+
+static inline notrace bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags & PSR_PIL) != 0;
+}
+
+static inline notrace bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#endif /* (__ASSEMBLY__) */
+
+#endif /* !(_ASM_IRQFLAGS_H) */
diff --git a/arch/sparc/include/asm/irqflags_64.h b/arch/sparc/include/asm/irqflags_64.h
new file mode 100644
index 0000000..c29ed57
--- /dev/null
+++ b/arch/sparc/include/asm/irqflags_64.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * arch_local_irq_*() functions from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#include <asm/pil.h>
+
+#ifndef __ASSEMBLY__
+
+static inline notrace unsigned long arch_local_save_flags(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__(
+		"rdpr	%%pil, %0"
+		: "=r" (flags)
+	);
+
+	return flags;
+}
+
+static inline notrace void arch_local_irq_restore(unsigned long flags)
+{
+	__asm__ __volatile__(
+		"wrpr	%0, %%pil"
+		: /* no output */
+		: "r" (flags)
+		: "memory"
+	);
+}
+
+static inline notrace void arch_local_irq_disable(void)
+{
+	__asm__ __volatile__(
+		"wrpr	%0, %%pil"
+		: /* no outputs */
+		: "i" (PIL_NORMAL_MAX)
+		: "memory"
+	);
+}
+
+static inline notrace void arch_local_irq_enable(void)
+{
+	__asm__ __volatile__(
+		"wrpr	0, %%pil"
+		: /* no outputs */
+		: /* no inputs */
+		: "memory"
+	);
+}
+
+static inline notrace int arch_irqs_disabled_flags(unsigned long flags)
+{
+	return (flags > 0);
+}
+
+static inline notrace int arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+static inline notrace unsigned long arch_local_irq_save(void)
+{
+	unsigned long flags, tmp;
+
+	/* Disable interrupts to PIL_NORMAL_MAX unless we already
+	 * are using PIL_NMI, in which case PIL_NMI is retained.
+	 *
+	 * The only values we ever program into the %pil are 0,
+	 * PIL_NORMAL_MAX and PIL_NMI.
+	 *
+	 * Since PIL_NMI is the largest %pil value and all bits are
+	 * set in it (0xf), it doesn't matter what PIL_NORMAL_MAX
+	 * actually is.
+	 */
+	__asm__ __volatile__(
+		"rdpr	%%pil, %0\n\t"
+		"or	%0, %2, %1\n\t"
+		"wrpr	%1, 0x0, %%pil"
+		: "=r" (flags), "=r" (tmp)
+		: "i" (PIL_NORMAL_MAX)
+		: "memory"
+	);
+
+	return flags;
+}
+
+#endif /* (__ASSEMBLY__) */
+
+#endif /* !(_ASM_IRQFLAGS_H) */
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
new file mode 100644
index 0000000..94eb529
--- /dev/null
+++ b/arch/sparc/include/asm/jump_label.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC_JUMP_LABEL_H
+#define _ASM_SPARC_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "nop\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:\n\t"
+		 "b %l[l_yes]\n\t"
+		 "nop\n\t"
+		 ".pushsection __jump_table,  \"aw\"\n\t"
+		 ".align 4\n\t"
+		 ".word 1b, %l[l_yes], %c0\n\t"
+		 ".popsection \n\t"
+		 : :  "i" (&((char *)key)[branch]) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+typedef u32 jump_label_t;
+
+struct jump_entry {
+	jump_label_t code;
+	jump_label_t target;
+	jump_label_t key;
+};
+
+#endif  /* __ASSEMBLY__ */
+#endif
diff --git a/arch/sparc/include/asm/kdebug.h b/arch/sparc/include/asm/kdebug.h
new file mode 100644
index 0000000..830053f
--- /dev/null
+++ b/arch/sparc/include/asm/kdebug.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_KDEBUG_H
+#define ___ASM_SPARC_KDEBUG_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/kdebug_64.h>
+#else
+#include <asm/kdebug_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/kdebug_32.h b/arch/sparc/include/asm/kdebug_32.h
new file mode 100644
index 0000000..763d423
--- /dev/null
+++ b/arch/sparc/include/asm/kdebug_32.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kdebug.h:  Defines and definitions for debugging the Linux kernel
+ *            under various kernel debuggers.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef _SPARC_KDEBUG_H
+#define _SPARC_KDEBUG_H
+
+#include <asm/openprom.h>
+#include <asm/vaddrs.h>
+
+/* Breakpoints are enter through trap table entry 126.  So in sparc assembly
+ * if you want to drop into the debugger you do:
+ *
+ * t DEBUG_BP_TRAP
+ */
+
+#define DEBUG_BP_TRAP     126
+
+#ifndef __ASSEMBLY__
+/* The debug vector is passed in %o1 at boot time.  It is a pointer to
+ * a structure in the debuggers address space.  Here is its format.
+ */
+
+typedef unsigned int (*debugger_funct)(void);
+
+struct kernel_debug {
+	/* First the entry point into the debugger.  You jump here
+	 * to give control over to the debugger.
+	 */
+	unsigned long kdebug_entry;
+	unsigned long kdebug_trapme;   /* Figure out later... */
+	/* The following is the number of pages that the debugger has
+	 * taken from to total pool.
+	 */
+	unsigned long *kdebug_stolen_pages;
+	/* Ok, after you remap yourself and/or change the trap table
+	 * from what you were left with at boot time you have to call
+	 * this synchronization function so the debugger can check out
+	 * what you have done.
+	 */
+	debugger_funct teach_debugger;
+}; /* I think that is it... */
+
+extern struct kernel_debug *linux_dbvec;
+
+/* Use this macro in C-code to enter the debugger. */
+static inline void sp_enter_debugger(void)
+{
+	__asm__ __volatile__("jmpl %0, %%o7\n\t"
+			     "nop\n\t" : :
+			     "r" (linux_dbvec) : "o7", "memory");
+}
+
+#define SP_ENTER_DEBUGGER do { \
+	     if((linux_dbvec!=0) && ((*(short *)linux_dbvec)!=-1)) \
+	       sp_enter_debugger(); \
+		       } while(0)
+
+enum die_val {
+	DIE_UNUSED,
+	DIE_OOPS,
+};
+
+#endif /* !(__ASSEMBLY__) */
+
+/* Some nice offset defines for assembler code. */
+#define KDEBUG_ENTRY_OFF    0x0
+#define KDEBUG_DUNNO_OFF    0x4
+#define KDEBUG_DUNNO2_OFF   0x8
+#define KDEBUG_TEACH_OFF    0xc
+
+#endif /* !(_SPARC_KDEBUG_H) */
diff --git a/arch/sparc/include/asm/kdebug_64.h b/arch/sparc/include/asm/kdebug_64.h
new file mode 100644
index 0000000..89428bd
--- /dev/null
+++ b/arch/sparc/include/asm/kdebug_64.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_KDEBUG_H
+#define _SPARC64_KDEBUG_H
+
+struct pt_regs;
+
+void bad_trap(struct pt_regs *, long);
+
+/* Grossly misnamed. */
+enum die_val {
+	DIE_OOPS = 1,
+	DIE_DEBUG,	/* ta 0x70 */
+	DIE_DEBUG_2,	/* ta 0x71 */
+	DIE_BPT,	/* ta 0x73 */
+	DIE_SSTEP,	/* ta 0x74 */
+	DIE_DIE,
+	DIE_TRAP,
+	DIE_TRAP_TL1,
+	DIE_CALL,
+	DIE_NMI,
+	DIE_NMIWATCHDOG,
+};
+
+#endif
diff --git a/arch/sparc/include/asm/kgdb.h b/arch/sparc/include/asm/kgdb.h
new file mode 100644
index 0000000..deabe02
--- /dev/null
+++ b/arch/sparc/include/asm/kgdb.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_KGDB_H
+#define _SPARC_KGDB_H
+
+#ifdef CONFIG_SPARC32
+#define BUFMAX			2048
+#else
+#define BUFMAX			4096
+#endif
+
+enum regnames {
+	GDB_G0, GDB_G1, GDB_G2, GDB_G3, GDB_G4, GDB_G5, GDB_G6, GDB_G7,
+	GDB_O0, GDB_O1, GDB_O2, GDB_O3, GDB_O4, GDB_O5, GDB_SP, GDB_O7,
+	GDB_L0, GDB_L1, GDB_L2, GDB_L3, GDB_L4, GDB_L5, GDB_L6, GDB_L7,
+	GDB_I0, GDB_I1, GDB_I2, GDB_I3, GDB_I4, GDB_I5, GDB_FP, GDB_I7,
+	GDB_F0,
+	GDB_F31 = GDB_F0 + 31,
+#ifdef CONFIG_SPARC32
+	GDB_Y, GDB_PSR, GDB_WIM, GDB_TBR, GDB_PC, GDB_NPC,
+	GDB_FSR, GDB_CSR,
+#else
+	GDB_F32 = GDB_F0 + 32,
+	GDB_F62 = GDB_F32 + 15,
+	GDB_PC, GDB_NPC, GDB_STATE, GDB_FSR, GDB_FPRS, GDB_Y,
+#endif
+};
+
+#ifdef CONFIG_SPARC32
+#define NUMREGBYTES		((GDB_CSR + 1) * 4)
+#else
+#define NUMREGBYTES		((GDB_Y + 1) * 8)
+#endif
+
+struct pt_regs;
+asmlinkage void kgdb_trap(unsigned long trap_level, struct pt_regs *regs);
+
+void arch_kgdb_breakpoint(void);
+
+#define BREAK_INSTR_SIZE	4
+#define CACHE_FLUSH_IS_SAFE	1
+
+#endif /* _SPARC_KGDB_H */
diff --git a/arch/sparc/include/asm/kmap_types.h b/arch/sparc/include/asm/kmap_types.h
new file mode 100644
index 0000000..55a99b6
--- /dev/null
+++ b/arch/sparc/include/asm/kmap_types.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+/* Dummy header just to define km_type.  None of this
+ * is actually used on sparc.  -DaveM
+ */
+
+#include <asm-generic/kmap_types.h>
+
+#endif
diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h
new file mode 100644
index 0000000..bfcaa63
--- /dev/null
+++ b/arch/sparc/include/asm/kprobes.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_KPROBES_H
+#define _SPARC64_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
+#define BREAKPOINT_INSTRUCTION   0x91d02070 /* ta 0x70 */
+#define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */
+
+#ifdef CONFIG_KPROBES
+#include <linux/types.h>
+#include <linux/percpu.h>
+
+typedef u32 kprobe_opcode_t;
+
+#define MAX_INSN_SIZE 2
+
+#define kretprobe_blacklist_size 0
+
+#define arch_remove_kprobe(p)	do {} while (0)
+
+#define flush_insn_slot(p)		\
+do { 	flushi(&(p)->ainsn.insn[0]);	\
+	flushi(&(p)->ainsn.insn[1]);	\
+} while (0)
+
+void kretprobe_trampoline(void);
+
+/* Architecture specific copy of original instruction*/
+struct arch_specific_insn {
+	/* copy of the original instruction */
+	kprobe_opcode_t insn[MAX_INSN_SIZE];
+};
+
+struct prev_kprobe {
+	struct kprobe *kp;
+	unsigned long status;
+	unsigned long orig_tnpc;
+	unsigned long orig_tstate_pil;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+	unsigned long kprobe_status;
+	unsigned long kprobe_orig_tnpc;
+	unsigned long kprobe_orig_tstate_pil;
+	struct prev_kprobe prev_kprobe;
+};
+
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data);
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
+				      struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
+#endif /* _SPARC64_KPROBES_H */
diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h
new file mode 100644
index 0000000..ca97395
--- /dev/null
+++ b/arch/sparc/include/asm/ldc.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_LDC_H
+#define _SPARC64_LDC_H
+
+#include <asm/hypervisor.h>
+
+extern int ldom_domaining_enabled;
+void ldom_set_var(const char *var, const char *value);
+void ldom_reboot(const char *boot_command);
+void ldom_power_off(void);
+
+/* The event handler will be evoked when link state changes
+ * or data becomes available on the receive side.
+ *
+ * For non-RAW links, if the LDC_EVENT_RESET event arrives the
+ * driver should reset all of it's internal state and reinvoke
+ * ldc_connect() to try and bring the link up again.
+ *
+ * For RAW links, ldc_connect() is not used.  Instead the driver
+ * just waits for the LDC_EVENT_UP event.
+ */
+struct ldc_channel_config {
+	void (*event)(void *arg, int event);
+
+	u32			mtu;
+	unsigned int		rx_irq;
+	unsigned int		tx_irq;
+	u8			mode;
+#define LDC_MODE_RAW		0x00
+#define LDC_MODE_UNRELIABLE	0x01
+#define LDC_MODE_RESERVED	0x02
+#define LDC_MODE_STREAM		0x03
+
+	u8			debug;
+#define LDC_DEBUG_HS		0x01
+#define LDC_DEBUG_STATE		0x02
+#define LDC_DEBUG_RX		0x04
+#define LDC_DEBUG_TX		0x08
+#define LDC_DEBUG_DATA		0x10
+};
+
+#define LDC_EVENT_RESET		0x01
+#define LDC_EVENT_UP		0x02
+#define LDC_EVENT_DATA_READY	0x04
+
+#define LDC_STATE_INVALID	0x00
+#define LDC_STATE_INIT		0x01
+#define LDC_STATE_BOUND		0x02
+#define LDC_STATE_READY		0x03
+#define LDC_STATE_CONNECTED	0x04
+
+#define	LDC_PACKET_SIZE		64
+
+struct ldc_channel;
+
+/* Allocate state for a channel.  */
+struct ldc_channel *ldc_alloc(unsigned long id,
+			      const struct ldc_channel_config *cfgp,
+			      void *event_arg,
+			      const char *name);
+
+/* Shut down and free state for a channel.  */
+void ldc_free(struct ldc_channel *lp);
+
+/* Register TX and RX queues of the link with the hypervisor.  */
+int ldc_bind(struct ldc_channel *lp);
+void ldc_unbind(struct ldc_channel *lp);
+
+/* For non-RAW protocols we need to complete a handshake before
+ * communication can proceed.  ldc_connect() does that, if the
+ * handshake completes successfully, an LDC_EVENT_UP event will
+ * be sent up to the driver.
+ */
+int ldc_connect(struct ldc_channel *lp);
+int ldc_disconnect(struct ldc_channel *lp);
+
+int ldc_state(struct ldc_channel *lp);
+void ldc_set_state(struct ldc_channel *lp, u8 state);
+int ldc_mode(struct ldc_channel *lp);
+void __ldc_print(struct ldc_channel *lp, const char *caller);
+int ldc_rx_reset(struct ldc_channel *lp);
+
+#define	ldc_print(chan)	__ldc_print(chan, __func__)
+
+/* Read and write operations.  Only valid when the link is up.  */
+int ldc_write(struct ldc_channel *lp, const void *buf,
+	      unsigned int size);
+int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size);
+
+#define LDC_MAP_SHADOW	0x01
+#define LDC_MAP_DIRECT	0x02
+#define LDC_MAP_IO	0x04
+#define LDC_MAP_R	0x08
+#define LDC_MAP_W	0x10
+#define LDC_MAP_X	0x20
+#define LDC_MAP_RW	(LDC_MAP_R | LDC_MAP_W)
+#define LDC_MAP_RWX	(LDC_MAP_R | LDC_MAP_W | LDC_MAP_X)
+#define LDC_MAP_ALL	0x03f
+
+struct ldc_trans_cookie {
+	u64			cookie_addr;
+	u64			cookie_size;
+};
+
+struct scatterlist;
+int ldc_map_sg(struct ldc_channel *lp,
+	       struct scatterlist *sg, int num_sg,
+	       struct ldc_trans_cookie *cookies, int ncookies,
+	       unsigned int map_perm);
+
+int ldc_map_single(struct ldc_channel *lp,
+		   void *buf, unsigned int len,
+		   struct ldc_trans_cookie *cookies, int ncookies,
+		   unsigned int map_perm);
+
+void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
+	       int ncookies);
+
+int ldc_copy(struct ldc_channel *lp, int copy_dir,
+	     void *buf, unsigned int len, unsigned long offset,
+	     struct ldc_trans_cookie *cookies, int ncookies);
+
+static inline int ldc_get_dring_entry(struct ldc_channel *lp,
+				      void *buf, unsigned int len,
+				      unsigned long offset,
+				      struct ldc_trans_cookie *cookies,
+				      int ncookies)
+{
+	return ldc_copy(lp, LDC_COPY_IN, buf, len, offset, cookies, ncookies);
+}
+
+static inline int ldc_put_dring_entry(struct ldc_channel *lp,
+				      void *buf, unsigned int len,
+				      unsigned long offset,
+				      struct ldc_trans_cookie *cookies,
+				      int ncookies)
+{
+	return ldc_copy(lp, LDC_COPY_OUT, buf, len, offset, cookies, ncookies);
+}
+
+void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
+			  struct ldc_trans_cookie *cookies,
+			  int *ncookies, unsigned int map_perm);
+
+void ldc_free_exp_dring(struct ldc_channel *lp, void *buf,
+		        unsigned int len,
+		        struct ldc_trans_cookie *cookies, int ncookies);
+
+#endif /* _SPARC64_LDC_H */
diff --git a/arch/sparc/include/asm/leon.h b/arch/sparc/include/asm/leon.h
new file mode 100644
index 0000000..c68bb5b
--- /dev/null
+++ b/arch/sparc/include/asm/leon.h
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2004 Konrad Eisele (eiselekd@web.de,konrad@gaisler.com) Gaisler Research
+ * Copyright (C) 2004 Stefan Holst (mail@s-holst.de) Uni-Stuttgart
+ * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+ */
+
+#ifndef LEON_H_INCLUDE
+#define LEON_H_INCLUDE
+
+/* mmu register access, ASI_LEON_MMUREGS */
+#define LEON_CNR_CTRL		0x000
+#define LEON_CNR_CTXP		0x100
+#define LEON_CNR_CTX		0x200
+#define LEON_CNR_F		0x300
+#define LEON_CNR_FADDR		0x400
+
+#define LEON_CNR_CTX_NCTX	256	/*number of MMU ctx */
+
+#define LEON_CNR_CTRL_TLBDIS	0x80000000
+
+#define LEON_MMUTLB_ENT_MAX	64
+
+/*
+ * diagnostic access from mmutlb.vhd:
+ * 0: pte address
+ * 4: pte
+ * 8: additional flags
+ */
+#define LEON_DIAGF_LVL		0x3
+#define LEON_DIAGF_WR		0x8
+#define LEON_DIAGF_WR_SHIFT	3
+#define LEON_DIAGF_HIT		0x10
+#define LEON_DIAGF_HIT_SHIFT	4
+#define LEON_DIAGF_CTX		0x1fe0
+#define LEON_DIAGF_CTX_SHIFT	5
+#define LEON_DIAGF_VALID	0x2000
+#define LEON_DIAGF_VALID_SHIFT	13
+
+/* irq masks */
+#define LEON_HARD_INT(x)	(1 << (x))	/* irq 0-15 */
+#define LEON_IRQMASK_R		0x0000fffe	/* bit 15- 1 of lregs.irqmask */
+#define LEON_IRQPRIO_R		0xfffe0000	/* bit 31-17 of lregs.irqmask */
+
+#define LEON_MCFG2_SRAMDIS		0x00002000
+#define LEON_MCFG2_SDRAMEN		0x00004000
+#define LEON_MCFG2_SRAMBANKSZ		0x00001e00	/* [12-9] */
+#define LEON_MCFG2_SRAMBANKSZ_SHIFT	9
+#define LEON_MCFG2_SDRAMBANKSZ		0x03800000	/* [25-23] */
+#define LEON_MCFG2_SDRAMBANKSZ_SHIFT	23
+
+#define LEON_TCNT0_MASK	0x7fffff
+
+
+#define ASI_LEON3_SYSCTRL		0x02
+#define ASI_LEON3_SYSCTRL_ICFG		0x08
+#define ASI_LEON3_SYSCTRL_DCFG		0x0c
+#define ASI_LEON3_SYSCTRL_CFG_SNOOPING (1 << 27)
+#define ASI_LEON3_SYSCTRL_CFG_SSIZE(c) (1 << ((c >> 20) & 0xf))
+
+#ifndef __ASSEMBLY__
+
+/* do a physical address bypass write, i.e. for 0x80000000 */
+static inline void leon_store_reg(unsigned long paddr, unsigned long value)
+{
+	__asm__ __volatile__("sta %0, [%1] %2\n\t" : : "r"(value), "r"(paddr),
+			     "i"(ASI_LEON_BYPASS) : "memory");
+}
+
+/* do a physical address bypass load, i.e. for 0x80000000 */
+static inline unsigned long leon_load_reg(unsigned long paddr)
+{
+	unsigned long retval;
+	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
+			     "=r"(retval) : "r"(paddr), "i"(ASI_LEON_BYPASS));
+	return retval;
+}
+
+/* macro access for leon_load_reg() and leon_store_reg() */
+#define LEON3_BYPASS_LOAD_PA(x)	    (leon_load_reg((unsigned long)(x)))
+#define LEON3_BYPASS_STORE_PA(x, v) (leon_store_reg((unsigned long)(x), (unsigned long)(v)))
+#define LEON_BYPASS_LOAD_PA(x)      leon_load_reg((unsigned long)(x))
+#define LEON_BYPASS_STORE_PA(x, v)  leon_store_reg((unsigned long)(x), (unsigned long)(v))
+
+void leon_switch_mm(void);
+void leon_init_IRQ(void);
+
+static inline unsigned long sparc_leon3_get_dcachecfg(void)
+{
+	unsigned int retval;
+	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
+			     "=r"(retval) :
+			     "r"(ASI_LEON3_SYSCTRL_DCFG),
+			     "i"(ASI_LEON3_SYSCTRL));
+	return retval;
+}
+
+/* enable snooping */
+static inline void sparc_leon3_enable_snooping(void)
+{
+	__asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
+			  "set 0x800000, %%l2\n\t"
+			  "or  %%l2, %%l1, %%l2\n\t"
+			  "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
+};
+
+static inline int sparc_leon3_snooping_enabled(void)
+{
+	u32 cctrl;
+	__asm__ __volatile__("lda [%%g0] 2, %0\n\t" : "=r"(cctrl));
+	return ((cctrl >> 23) & 1) && ((cctrl >> 17) & 1);
+};
+
+static inline void sparc_leon3_disable_cache(void)
+{
+	__asm__ __volatile__ ("lda [%%g0] 2, %%l1\n\t"
+			  "set 0x00000f, %%l2\n\t"
+			  "andn  %%l2, %%l1, %%l2\n\t"
+			  "sta %%l2, [%%g0] 2\n\t" : : : "l1", "l2");
+};
+
+static inline unsigned long sparc_leon3_asr17(void)
+{
+	u32 asr17;
+	__asm__ __volatile__ ("rd %%asr17, %0\n\t" : "=r"(asr17));
+	return asr17;
+};
+
+static inline int sparc_leon3_cpuid(void)
+{
+	return sparc_leon3_asr17() >> 28;
+}
+
+#endif /*!__ASSEMBLY__*/
+
+#ifdef CONFIG_SMP
+# define LEON3_IRQ_IPI_DEFAULT		13
+# define LEON3_IRQ_TICKER		(leon3_gptimer_irq)
+# define LEON3_IRQ_CROSS_CALL		15
+#endif
+
+#if defined(PAGE_SIZE_LEON_8K)
+#define LEON_PAGE_SIZE_LEON 1
+#elif defined(PAGE_SIZE_LEON_16K)
+#define LEON_PAGE_SIZE_LEON 2)
+#else
+#define LEON_PAGE_SIZE_LEON 0
+#endif
+
+#if LEON_PAGE_SIZE_LEON == 0
+/* [ 8, 6, 6 ] + 12 */
+#define LEON_PGD_SH    24
+#define LEON_PGD_M     0xff
+#define LEON_PMD_SH    18
+#define LEON_PMD_SH_V  (LEON_PGD_SH-2)
+#define LEON_PMD_M     0x3f
+#define LEON_PTE_SH    12
+#define LEON_PTE_M     0x3f
+#elif LEON_PAGE_SIZE_LEON == 1
+/* [ 7, 6, 6 ] + 13 */
+#define LEON_PGD_SH    25
+#define LEON_PGD_M     0x7f
+#define LEON_PMD_SH    19
+#define LEON_PMD_SH_V  (LEON_PGD_SH-1)
+#define LEON_PMD_M     0x3f
+#define LEON_PTE_SH    13
+#define LEON_PTE_M     0x3f
+#elif LEON_PAGE_SIZE_LEON == 2
+/* [ 6, 6, 6 ] + 14 */
+#define LEON_PGD_SH    26
+#define LEON_PGD_M     0x3f
+#define LEON_PMD_SH    20
+#define LEON_PMD_SH_V  (LEON_PGD_SH-0)
+#define LEON_PMD_M     0x3f
+#define LEON_PTE_SH    14
+#define LEON_PTE_M     0x3f
+#elif LEON_PAGE_SIZE_LEON == 3
+/* [ 4, 7, 6 ] + 15 */
+#define LEON_PGD_SH    28
+#define LEON_PGD_M     0x0f
+#define LEON_PMD_SH    21
+#define LEON_PMD_SH_V  (LEON_PGD_SH-0)
+#define LEON_PMD_M     0x7f
+#define LEON_PTE_SH    15
+#define LEON_PTE_M     0x3f
+#else
+#error cannot determine LEON_PAGE_SIZE_LEON
+#endif
+
+#define LEON3_XCCR_SETS_MASK  0x07000000UL
+#define LEON3_XCCR_SSIZE_MASK 0x00f00000UL
+
+#define LEON2_CCR_DSETS_MASK 0x03000000UL
+#define LEON2_CFG_SSIZE_MASK 0x00007000UL
+
+#ifndef __ASSEMBLY__
+struct vm_area_struct;
+
+unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr);
+void leon_flush_icache_all(void);
+void leon_flush_dcache_all(void);
+void leon_flush_cache_all(void);
+void leon_flush_tlb_all(void);
+extern int leon_flush_during_switch;
+int leon_flush_needed(void);
+void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page);
+
+/* struct that hold LEON3 cache configuration registers */
+struct leon3_cacheregs {
+	unsigned long ccr;	/* 0x00 - Cache Control Register  */
+	unsigned long iccr;     /* 0x08 - Instruction Cache Configuration Register */
+	unsigned long dccr;	/* 0x0c - Data Cache Configuration Register */
+};
+
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+
+struct device_node;
+struct task_struct;
+unsigned int leon_build_device_irq(unsigned int real_irq,
+				   irq_flow_handler_t flow_handler,
+				   const char *name, int do_ack);
+void leon_update_virq_handling(unsigned int virq,
+			       irq_flow_handler_t flow_handler,
+			       const char *name, int do_ack);
+void leon_init_timers(void);
+void leon_trans_init(struct device_node *dp);
+void leon_node_init(struct device_node *dp, struct device_node ***nextp);
+void init_leon(void);
+void poke_leonsparc(void);
+void leon3_getCacheRegs(struct leon3_cacheregs *regs);
+extern int leon3_ticker_irq;
+
+#ifdef CONFIG_SMP
+int leon_smp_nrcpus(void);
+void leon_clear_profile_irq(int cpu);
+void leon_smp_done(void);
+void leon_boot_cpus(void);
+int leon_boot_one_cpu(int i, struct task_struct *);
+void leon_init_smp(void);
+void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu);
+irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused);
+
+extern unsigned int smpleon_ipi[];
+extern unsigned int linux_trap_ipi15_leon[];
+extern int leon_ipi_irq;
+
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */
+
+/* macros used in leon_mm.c */
+#define PFN(x)           ((x) >> PAGE_SHIFT)
+#define _pfn_valid(pfn)	 ((pfn < last_valid_pfn) && (pfn >= PFN(phys_base)))
+#define _SRMMU_PTE_PMASK_LEON 0xffffffff
+
+#endif
diff --git a/arch/sparc/include/asm/leon_amba.h b/arch/sparc/include/asm/leon_amba.h
new file mode 100644
index 0000000..6433a93
--- /dev/null
+++ b/arch/sparc/include/asm/leon_amba.h
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+*Copyright (C) 2004 Konrad Eisele (eiselekd@web.de,konrad@gaisler.com), Gaisler Research
+*Copyright (C) 2004 Stefan Holst (mail@s-holst.de), Uni-Stuttgart
+*Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com),Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+*/
+
+#ifndef LEON_AMBA_H_INCLUDE
+#define LEON_AMBA_H_INCLUDE
+
+#ifndef __ASSEMBLY__
+
+struct amba_prom_registers {
+	unsigned int phys_addr;	/* The physical address of this register */
+	unsigned int reg_size;	/* How many bytes does this register take up? */
+};
+
+#endif
+
+/*
+ *  The following defines the bits in the LEON UART Status Registers.
+ */
+
+#define LEON_REG_UART_STATUS_DR   0x00000001	/* Data Ready */
+#define LEON_REG_UART_STATUS_TSE  0x00000002	/* TX Send Register Empty */
+#define LEON_REG_UART_STATUS_THE  0x00000004	/* TX Hold Register Empty */
+#define LEON_REG_UART_STATUS_BR   0x00000008	/* Break Error */
+#define LEON_REG_UART_STATUS_OE   0x00000010	/* RX Overrun Error */
+#define LEON_REG_UART_STATUS_PE   0x00000020	/* RX Parity Error */
+#define LEON_REG_UART_STATUS_FE   0x00000040	/* RX Framing Error */
+#define LEON_REG_UART_STATUS_ERR  0x00000078	/* Error Mask */
+
+/*
+ *  The following defines the bits in the LEON UART Ctrl Registers.
+ */
+
+#define LEON_REG_UART_CTRL_RE     0x00000001	/* Receiver enable */
+#define LEON_REG_UART_CTRL_TE     0x00000002	/* Transmitter enable */
+#define LEON_REG_UART_CTRL_RI     0x00000004	/* Receiver interrupt enable */
+#define LEON_REG_UART_CTRL_TI     0x00000008	/* Transmitter irq */
+#define LEON_REG_UART_CTRL_PS     0x00000010	/* Parity select */
+#define LEON_REG_UART_CTRL_PE     0x00000020	/* Parity enable */
+#define LEON_REG_UART_CTRL_FL     0x00000040	/* Flow control enable */
+#define LEON_REG_UART_CTRL_LB     0x00000080	/* Loop Back enable */
+
+#define LEON3_GPTIMER_EN 1
+#define LEON3_GPTIMER_RL 2
+#define LEON3_GPTIMER_LD 4
+#define LEON3_GPTIMER_IRQEN 8
+#define LEON3_GPTIMER_SEPIRQ 8
+#define LEON3_GPTIMER_TIMERS 0x7
+
+#define LEON23_REG_TIMER_CONTROL_EN    0x00000001 /* 1 = enable counting */
+/* 0 = hold scalar and counter */
+#define LEON23_REG_TIMER_CONTROL_RL    0x00000002 /* 1 = reload at 0 */
+						  /* 0 = stop at 0 */
+#define LEON23_REG_TIMER_CONTROL_LD    0x00000004 /* 1 = load counter */
+						  /* 0 = no function */
+#define LEON23_REG_TIMER_CONTROL_IQ    0x00000008 /* 1 = irq enable */
+						  /* 0 = no function */
+
+/*
+ *  The following defines the bits in the LEON PS/2 Status Registers.
+ */
+
+#define LEON_REG_PS2_STATUS_DR   0x00000001	/* Data Ready */
+#define LEON_REG_PS2_STATUS_PE   0x00000002	/* Parity error */
+#define LEON_REG_PS2_STATUS_FE   0x00000004	/* Framing error */
+#define LEON_REG_PS2_STATUS_KI   0x00000008	/* Keyboard inhibit */
+#define LEON_REG_PS2_STATUS_RF   0x00000010	/* RX buffer full */
+#define LEON_REG_PS2_STATUS_TF   0x00000020	/* TX buffer full */
+
+/*
+ *  The following defines the bits in the LEON PS/2 Ctrl Registers.
+ */
+
+#define LEON_REG_PS2_CTRL_RE 0x00000001	/* Receiver enable */
+#define LEON_REG_PS2_CTRL_TE 0x00000002	/* Transmitter enable */
+#define LEON_REG_PS2_CTRL_RI 0x00000004	/* Keyboard receive irq  */
+#define LEON_REG_PS2_CTRL_TI 0x00000008	/* Keyboard transmit irq */
+
+#define LEON3_IRQMPSTATUS_CPUNR     28
+#define LEON3_IRQMPSTATUS_BROADCAST 27
+
+#define GPTIMER_CONFIG_IRQNT(a)          (((a) >> 3) & 0x1f)
+#define GPTIMER_CONFIG_ISSEP(a)          ((a) & (1 << 8))
+#define GPTIMER_CONFIG_NTIMERS(a)        ((a) & (0x7))
+#define LEON3_GPTIMER_CTRL_PENDING       0x10
+#define LEON3_GPTIMER_CONFIG_NRTIMERS(c) ((c)->config & 0x7)
+#define LEON3_GPTIMER_CTRL_ISPENDING(r)  (((r)&LEON3_GPTIMER_CTRL_PENDING) ? 1 : 0)
+
+#ifndef __ASSEMBLY__
+
+struct leon3_irqctrl_regs_map {
+	u32 ilevel;
+	u32 ipend;
+	u32 iforce;
+	u32 iclear;
+	u32 mpstatus;
+	u32 mpbroadcast;
+	u32 notused02;
+	u32 notused03;
+	u32 ampctrl;
+	u32 icsel[2];
+	u32 notused13;
+	u32 notused20;
+	u32 notused21;
+	u32 notused22;
+	u32 notused23;
+	u32 mask[16];
+	u32 force[16];
+	/* Extended IRQ registers */
+	u32 intid[16];	/* 0xc0 */
+	u32 unused[(0x1000-0x100)/4];
+};
+
+struct leon3_apbuart_regs_map {
+	u32 data;
+	u32 status;
+	u32 ctrl;
+	u32 scaler;
+};
+
+struct leon3_gptimerelem_regs_map {
+	u32 val;
+	u32 rld;
+	u32 ctrl;
+	u32 unused;
+};
+
+struct leon3_gptimer_regs_map {
+	u32 scalar;
+	u32 scalar_reload;
+	u32 config;
+	u32 unused;
+	struct leon3_gptimerelem_regs_map e[8];
+};
+
+/*
+ *  Types and structure used for AMBA Plug & Play bus scanning
+ */
+
+#define AMBA_MAXAPB_DEVS 64
+#define AMBA_MAXAPB_DEVS_PERBUS 16
+
+struct amba_device_table {
+	int devnr;		   /* number of devices on AHB or APB bus */
+	unsigned int *addr[16];    /* addresses to the devices configuration tables */
+	unsigned int allocbits[1]; /* 0=unallocated, 1=allocated driver */
+};
+
+struct amba_apbslv_device_table {
+	int devnr;		                  /* number of devices on AHB or APB bus */
+	unsigned int *addr[AMBA_MAXAPB_DEVS];     /* addresses to the devices configuration tables */
+	unsigned int apbmst[AMBA_MAXAPB_DEVS];    /* apb master if a entry is a apb slave */
+	unsigned int apbmstidx[AMBA_MAXAPB_DEVS]; /* apb master idx if a entry is a apb slave */
+	unsigned int allocbits[4];                /* 0=unallocated, 1=allocated driver */
+};
+
+struct amba_confarea_type {
+	struct amba_confarea_type *next;/* next bus in chain */
+	struct amba_device_table ahbmst;
+	struct amba_device_table ahbslv;
+	struct amba_apbslv_device_table apbslv;
+	unsigned int apbmst;
+};
+
+/* collect apb slaves */
+struct amba_apb_device {
+	unsigned int start, irq, bus_id;
+	struct amba_confarea_type *bus;
+};
+
+/* collect ahb slaves */
+struct amba_ahb_device {
+	unsigned int start[4], irq, bus_id;
+	struct amba_confarea_type *bus;
+};
+
+struct device_node;
+void _amba_init(struct device_node *dp, struct device_node ***nextp);
+
+extern unsigned long amba_system_id;
+extern struct leon3_irqctrl_regs_map *leon3_irqctrl_regs;
+extern struct leon3_gptimer_regs_map *leon3_gptimer_regs;
+extern struct amba_apb_device leon_percpu_timer_dev[16];
+extern int leondebug_irq_disable;
+extern int leon_debug_irqout;
+extern unsigned long leon3_gptimer_irq;
+extern unsigned int sparc_leon_eirq;
+
+#endif /* __ASSEMBLY__ */
+
+#define LEON3_IO_AREA 0xfff00000
+#define LEON3_CONF_AREA 0xff000
+#define LEON3_AHB_SLAVE_CONF_AREA (1 << 11)
+
+#define LEON3_AHB_CONF_WORDS 8
+#define LEON3_APB_CONF_WORDS 2
+#define LEON3_AHB_MASTERS 16
+#define LEON3_AHB_SLAVES 16
+#define LEON3_APB_SLAVES 16
+#define LEON3_APBUARTS 8
+
+/* Vendor codes */
+#define VENDOR_GAISLER   1
+#define VENDOR_PENDER    2
+#define VENDOR_ESA       4
+#define VENDOR_OPENCORES 8
+
+/* Gaisler Research device id's */
+#define GAISLER_LEON3    0x003
+#define GAISLER_LEON3DSU 0x004
+#define GAISLER_ETHAHB   0x005
+#define GAISLER_APBMST   0x006
+#define GAISLER_AHBUART  0x007
+#define GAISLER_SRCTRL   0x008
+#define GAISLER_SDCTRL   0x009
+#define GAISLER_APBUART  0x00C
+#define GAISLER_IRQMP    0x00D
+#define GAISLER_AHBRAM   0x00E
+#define GAISLER_GPTIMER  0x011
+#define GAISLER_PCITRG   0x012
+#define GAISLER_PCISBRG  0x013
+#define GAISLER_PCIFBRG  0x014
+#define GAISLER_PCITRACE 0x015
+#define GAISLER_PCIDMA   0x016
+#define GAISLER_AHBTRACE 0x017
+#define GAISLER_ETHDSU   0x018
+#define GAISLER_PIOPORT  0x01A
+#define GAISLER_GRGPIO   0x01A
+#define GAISLER_AHBJTAG  0x01c
+#define GAISLER_ETHMAC   0x01D
+#define GAISLER_AHB2AHB  0x020
+#define GAISLER_USBDC    0x021
+#define GAISLER_ATACTRL  0x024
+#define GAISLER_DDRSPA   0x025
+#define GAISLER_USBEHC   0x026
+#define GAISLER_USBUHC   0x027
+#define GAISLER_I2CMST   0x028
+#define GAISLER_SPICTRL  0x02D
+#define GAISLER_DDR2SPA  0x02E
+#define GAISLER_SPIMCTRL 0x045
+#define GAISLER_LEON4    0x048
+#define GAISLER_LEON4DSU 0x049
+#define GAISLER_AHBSTAT  0x052
+#define GAISLER_FTMCTRL  0x054
+#define GAISLER_KBD      0x060
+#define GAISLER_VGA      0x061
+#define GAISLER_SVGA     0x063
+#define GAISLER_GRSYSMON 0x066
+#define GAISLER_GRACECTRL 0x067
+
+#define GAISLER_L2TIME   0xffd	/* internal device: leon2 timer */
+#define GAISLER_L2C      0xffe	/* internal device: leon2compat */
+#define GAISLER_PLUGPLAY 0xfff	/* internal device: plug & play configarea */
+
+/* Chip IDs */
+#define AEROFLEX_UT699    0x0699
+#define LEON4_NEXTREME1   0x0102
+#define GAISLER_GR712RC   0x0712
+
+#define amba_vendor(x) (((x) >> 24) & 0xff)
+
+#define amba_device(x) (((x) >> 12) & 0xfff)
+
+#endif
diff --git a/arch/sparc/include/asm/leon_pci.h b/arch/sparc/include/asm/leon_pci.h
new file mode 100644
index 0000000..484a56e
--- /dev/null
+++ b/arch/sparc/include/asm/leon_pci.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * asm/leon_pci.h
+ *
+ * Copyright (C) 2011 Aeroflex Gaisler AB, Daniel Hellstrom
+ */
+
+#ifndef _ASM_LEON_PCI_H_
+#define _ASM_LEON_PCI_H_
+
+/* PCI related definitions */
+struct leon_pci_info {
+	struct pci_ops *ops;
+	struct resource	io_space;
+	struct resource	mem_space;
+	struct resource	busn;
+	int (*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
+};
+
+void leon_pci_init(struct platform_device *ofdev,
+		   struct leon_pci_info *info);
+
+#endif /* _ASM_LEON_PCI_H_ */
diff --git a/arch/sparc/include/asm/lsu.h b/arch/sparc/include/asm/lsu.h
new file mode 100644
index 0000000..154ae76
--- /dev/null
+++ b/arch/sparc/include/asm/lsu.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_LSU_H
+#define _SPARC64_LSU_H
+
+#include <linux/const.h>
+
+/* LSU Control Register */
+#define LSU_CONTROL_PM _AC(0x000001fe00000000,UL) /* Phys-watchpoint byte mask*/
+#define LSU_CONTROL_VM _AC(0x00000001fe000000,UL) /* Virt-watchpoint byte mask*/
+#define LSU_CONTROL_PR _AC(0x0000000001000000,UL) /* Phys-rd watchpoint enable*/
+#define LSU_CONTROL_PW _AC(0x0000000000800000,UL) /* Phys-wr watchpoint enable*/
+#define LSU_CONTROL_VR _AC(0x0000000000400000,UL) /* Virt-rd watchpoint enable*/
+#define LSU_CONTROL_VW _AC(0x0000000000200000,UL) /* Virt-wr watchpoint enable*/
+#define LSU_CONTROL_FM _AC(0x00000000000ffff0,UL) /* Parity mask enables.     */
+#define LSU_CONTROL_DM _AC(0x0000000000000008,UL) /* Data MMU enable.         */
+#define LSU_CONTROL_IM _AC(0x0000000000000004,UL) /* Instruction MMU enable.  */
+#define LSU_CONTROL_DC _AC(0x0000000000000002,UL) /* Data cache enable.       */
+#define LSU_CONTROL_IC _AC(0x0000000000000001,UL) /* Instruction cache enable.*/
+
+#endif /* !(_SPARC64_LSU_H) */
diff --git a/arch/sparc/include/asm/machines.h b/arch/sparc/include/asm/machines.h
new file mode 100644
index 0000000..9f78f70
--- /dev/null
+++ b/arch/sparc/include/asm/machines.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * machines.h:  Defines for taking apart the machine type value in the
+ *              idprom and determining the kind of machine we are on.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef _SPARC_MACHINES_H
+#define _SPARC_MACHINES_H
+
+struct Sun_Machine_Models {
+	char *name;
+	unsigned char id_machtype;
+};
+
+/* The machine type in the idprom area looks like this:
+ *
+ * ---------------
+ * | ARCH | MACH |
+ * ---------------
+ *  7    4 3    0
+ *
+ * The ARCH field determines the architecture line (sun4m, etc).
+ * The MACH field determines the machine make within that architecture.
+ */
+
+#define SM_ARCH_MASK  0xf0
+#define  M_LEON       0x30
+#define SM_SUN4M      0x70
+#define SM_SUN4M_OBP  0x80
+
+#define SM_TYP_MASK   0x0f
+
+/* Leon machines */
+#define M_LEON3_SOC   0x02    /* Leon3 SoC */
+
+/* Sun4m machines, these predate the OpenBoot.  These values only mean
+ * something if the value in the ARCH field is SM_SUN4M, if it is
+ * SM_SUN4M_OBP then you have the following situation:
+ * 1) You either have a sun4d, a sun4e, or a recently made sun4m.
+ * 2) You have to consult OpenBoot to determine which machine this is.
+ */
+#define SM_4M_SS60    0x01    /* Sun4m SparcSystem 600                  */
+#define SM_4M_SS50    0x02    /* Sun4m SparcStation 10                  */
+#define SM_4M_SS40    0x03    /* Sun4m SparcStation 5                   */
+
+/* Sun4d machines -- N/A */
+/* Sun4e machines -- N/A */
+/* Sun4u machines -- N/A */
+
+#endif /* !(_SPARC_MACHINES_H) */
diff --git a/arch/sparc/include/asm/mbus.h b/arch/sparc/include/asm/mbus.h
new file mode 100644
index 0000000..8b6dbe7
--- /dev/null
+++ b/arch/sparc/include/asm/mbus.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mbus.h:  Various defines for MBUS modules.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_MBUS_H
+#define _SPARC_MBUS_H
+
+#include <asm/ross.h>    /* HyperSparc stuff */
+#include <asm/viking.h>  /* Ugh, bug city... */
+
+enum mbus_module {
+	HyperSparc        = 0,
+	Swift_ok          = 4,
+	Swift_bad_c       = 5,
+	Swift_lots_o_bugs = 6,
+	Tsunami           = 7,
+	Viking_12         = 8,
+	Viking_2x         = 9,
+	Viking_30         = 10,
+	Viking_35         = 11,
+	Viking_new        = 12,
+	TurboSparc	  = 13,
+	SRMMU_INVAL_MOD   = 14,
+};
+
+extern enum mbus_module srmmu_modtype;
+extern unsigned int viking_rev, swift_rev, cypress_rev;
+
+/* HW Mbus module bugs we have to deal with */
+#define HWBUG_COPYBACK_BROKEN        0x00000001
+#define HWBUG_ASIFLUSH_BROKEN        0x00000002
+#define HWBUG_VACFLUSH_BITROT        0x00000004
+#define HWBUG_KERN_ACCBROKEN         0x00000008
+#define HWBUG_KERN_CBITBROKEN        0x00000010
+#define HWBUG_MODIFIED_BITROT        0x00000020
+#define HWBUG_PC_BADFAULT_ADDR       0x00000040
+#define HWBUG_SUPERSCALAR_BAD        0x00000080
+#define HWBUG_PACINIT_BITROT         0x00000100
+
+/* First the module type values. To find out which you have, just load
+ * the mmu control register from ASI_M_MMUREG alternate address space and
+ * shift the value right 28 bits.
+ */
+/* IMPL field means the company which produced the chip. */
+#define MBUS_VIKING        0x4   /* bleech, Texas Instruments Module */
+#define MBUS_LSI           0x3   /* LSI Logics */
+#define MBUS_ROSS          0x1   /* Ross is nice */
+#define MBUS_FMI           0x0   /* Fujitsu Microelectronics/Swift */
+
+/* Ross Module versions */
+#define ROSS_604_REV_CDE        0x0   /* revisions c, d, and e */
+#define ROSS_604_REV_F          0x1   /* revision f */
+#define ROSS_605                0xf   /* revision a, a.1, and a.2 */
+#define ROSS_605_REV_B          0xe   /* revision b */
+
+/* TI Viking Module versions */
+#define VIKING_REV_12           0x1   /* Version 1.2 or SPARCclassic's CPU */
+#define VIKING_REV_2            0x2   /* Version 2.1, 2.2, 2.3, and 2.4 */
+#define VIKING_REV_30           0x3   /* Version 3.0 */
+#define VIKING_REV_35           0x4   /* Version 3.5 */
+
+/* LSI Logics. */
+#define LSI_L64815		0x0
+
+/* Fujitsu */
+#define FMI_AURORA		0x4   /* MB8690x, a Swift module... */
+#define FMI_TURBO		0x5   /* MB86907, a TurboSparc module... */
+
+/* For multiprocessor support we need to be able to obtain the CPU id and
+ * the MBUS Module id.
+ */
+
+/* The CPU ID is encoded in the trap base register, 20 bits to the left of
+ * bit zero, with 2 bits being significant.
+ */
+#define TBR_ID_SHIFT            20
+
+static inline int get_cpuid(void)
+{
+	register int retval;
+	__asm__ __volatile__("rd %%tbr, %0\n\t"
+			     "srl %0, %1, %0\n\t" :
+			     "=r" (retval) :
+			     "i" (TBR_ID_SHIFT));
+	return (retval & 3);
+}
+
+static inline int get_modid(void)
+{
+	return (get_cpuid() | 0x8);
+}
+
+	
+#endif /* !(_SPARC_MBUS_H) */
diff --git a/arch/sparc/include/asm/mc146818rtc.h b/arch/sparc/include/asm/mc146818rtc.h
new file mode 100644
index 0000000..07faf75
--- /dev/null
+++ b/arch/sparc/include/asm/mc146818rtc.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_MC146818RTC_H
+#define ___ASM_SPARC_MC146818RTC_H
+
+#include <linux/spinlock.h>
+
+extern spinlock_t rtc_lock;
+
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/mc146818rtc_64.h>
+#else
+#include <asm/mc146818rtc_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/mc146818rtc_32.h b/arch/sparc/include/asm/mc146818rtc_32.h
new file mode 100644
index 0000000..d8fd75d
--- /dev/null
+++ b/arch/sparc/include/asm/mc146818rtc_32.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef __ASM_SPARC_MC146818RTC_H
+#define __ASM_SPARC_MC146818RTC_H
+
+#include <asm/io.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)	(0x70 + (x))
+#define RTC_ALWAYS_BCD	1	/* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#define RTC_IRQ 8
+
+#endif /* __ASM_SPARC_MC146818RTC_H */
diff --git a/arch/sparc/include/asm/mc146818rtc_64.h b/arch/sparc/include/asm/mc146818rtc_64.h
new file mode 100644
index 0000000..b1708a7
--- /dev/null
+++ b/arch/sparc/include/asm/mc146818rtc_64.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef __ASM_SPARC64_MC146818RTC_H
+#define __ASM_SPARC64_MC146818RTC_H
+
+#include <asm/io.h>
+
+#ifndef RTC_PORT
+extern unsigned long cmos_regs;
+#define RTC_PORT(x)	(cmos_regs + (x))
+#define RTC_ALWAYS_BCD	0
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#endif /* __ASM_SPARC64_MC146818RTC_H */
diff --git a/arch/sparc/include/asm/mdesc.h b/arch/sparc/include/asm/mdesc.h
new file mode 100644
index 0000000..ec31a06
--- /dev/null
+++ b/arch/sparc/include/asm/mdesc.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_MDESC_H
+#define _SPARC64_MDESC_H
+
+#include <linux/types.h>
+#include <linux/cpumask.h>
+#include <asm/prom.h>
+
+struct mdesc_handle;
+
+/* Machine description operations are to be surrounded by grab and
+ * release calls.  The mdesc_handle returned from the grab is
+ * the first argument to all of the operational calls that work
+ * on mdescs.
+ */
+struct mdesc_handle *mdesc_grab(void);
+void mdesc_release(struct mdesc_handle *);
+
+#define MDESC_NODE_NULL		(~(u64)0)
+#define MDESC_MAX_STR_LEN	256
+
+u64 mdesc_node_by_name(struct mdesc_handle *handle,
+		       u64 from_node, const char *name);
+#define mdesc_for_each_node_by_name(__hdl, __node, __name) \
+	for (__node = mdesc_node_by_name(__hdl, MDESC_NODE_NULL, __name); \
+	     (__node) != MDESC_NODE_NULL; \
+	     __node = mdesc_node_by_name(__hdl, __node, __name))
+
+/* Access to property values returned from mdesc_get_property() are
+ * only valid inside of a mdesc_grab()/mdesc_release() sequence.
+ * Once mdesc_release() is called, the memory backed up by these
+ * pointers may reference freed up memory.
+ *
+ * Therefore callers must make copies of any property values
+ * they need.
+ *
+ * These same rules apply to mdesc_node_name().
+ */
+const void *mdesc_get_property(struct mdesc_handle *handle,
+			       u64 node, const char *name, int *lenp);
+const char *mdesc_node_name(struct mdesc_handle *hp, u64 node);
+
+/* MD arc iteration, the standard sequence is:
+ *
+ *	unsigned long arc;
+ *	mdesc_for_each_arc(arc, handle, node, MDESC_ARC_TYPE_{FWD,BACK}) {
+ *		unsigned long target = mdesc_arc_target(handle, arc);
+ *		...
+ *	}
+ */
+
+#define MDESC_ARC_TYPE_FWD	"fwd"
+#define MDESC_ARC_TYPE_BACK	"back"
+
+u64 mdesc_next_arc(struct mdesc_handle *handle, u64 from,
+		   const char *arc_type);
+#define mdesc_for_each_arc(__arc, __hdl, __node, __type) \
+	for (__arc = mdesc_next_arc(__hdl, __node, __type); \
+	     (__arc) != MDESC_NODE_NULL; \
+	     __arc = mdesc_next_arc(__hdl, __arc, __type))
+
+u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc);
+
+void mdesc_update(void);
+
+struct mdesc_notifier_client {
+	void (*add)(struct mdesc_handle *handle, u64 node,
+		    const char *node_name);
+	void (*remove)(struct mdesc_handle *handle, u64 node,
+		       const char *node_name);
+	const char			*node_name;
+	struct mdesc_notifier_client	*next;
+};
+
+void mdesc_register_notifier(struct mdesc_notifier_client *client);
+
+union md_node_info {
+	struct vdev_port {
+		u64 id;				/* id */
+		u64 parent_cfg_hdl;		/* parent config handle */
+		const char *name;		/* name (property) */
+	} vdev_port;
+	struct ds_port {
+		u64 id;				/* id */
+	} ds_port;
+};
+
+u64 mdesc_get_node(struct mdesc_handle *hp, const char *node_name,
+		   union md_node_info *node_info);
+int mdesc_get_node_info(struct mdesc_handle *hp, u64 node,
+			const char *node_name, union md_node_info *node_info);
+
+void mdesc_fill_in_cpu_data(cpumask_t *mask);
+void mdesc_populate_present_mask(cpumask_t *mask);
+void mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask);
+
+void sun4v_mdesc_init(void);
+
+#endif
diff --git a/arch/sparc/include/asm/memctrl.h b/arch/sparc/include/asm/memctrl.h
new file mode 100644
index 0000000..6790ed6
--- /dev/null
+++ b/arch/sparc/include/asm/memctrl.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_MEMCTRL_H
+#define _SPARC_MEMCTRL_H
+
+typedef int (*dimm_printer_t)(int synd_code, unsigned long paddr, char *buf, int buflen);
+
+int register_dimm_printer(dimm_printer_t func);
+void unregister_dimm_printer(dimm_printer_t func);
+
+#endif /* _SPARC_MEMCTRL_H */
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
new file mode 100644
index 0000000..f94532f
--- /dev/null
+++ b/arch/sparc/include/asm/mman.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_MMAN_H__
+#define __SPARC_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#ifndef __ASSEMBLY__
+#define arch_mmap_check(addr,len,flags)	sparc_mmap_check(addr,len)
+int sparc_mmap_check(unsigned long addr, unsigned long len);
+
+#ifdef CONFIG_SPARC64
+#include <asm/adi_64.h>
+
+static inline void ipi_set_tstate_mcde(void *arg)
+{
+	struct mm_struct *mm = arg;
+
+	/* Set TSTATE_MCDE for the task using address map that ADI has been
+	 * enabled on if the task is running. If not, it will be set
+	 * automatically at the next context switch
+	 */
+	if (current->mm == mm) {
+		struct pt_regs *regs;
+
+		regs = task_pt_regs(current);
+		regs->tstate |= TSTATE_MCDE;
+	}
+}
+
+#define arch_calc_vm_prot_bits(prot, pkey) sparc_calc_vm_prot_bits(prot)
+static inline unsigned long sparc_calc_vm_prot_bits(unsigned long prot)
+{
+	if (adi_capable() && (prot & PROT_ADI)) {
+		struct pt_regs *regs;
+
+		if (!current->mm->context.adi) {
+			regs = task_pt_regs(current);
+			regs->tstate |= TSTATE_MCDE;
+			current->mm->context.adi = true;
+			on_each_cpu_mask(mm_cpumask(current->mm),
+					 ipi_set_tstate_mcde, current->mm, 0);
+		}
+		return VM_SPARC_ADI;
+	} else {
+		return 0;
+	}
+}
+
+#define arch_vm_get_page_prot(vm_flags) sparc_vm_get_page_prot(vm_flags)
+static inline pgprot_t sparc_vm_get_page_prot(unsigned long vm_flags)
+{
+	return (vm_flags & VM_SPARC_ADI) ? __pgprot(_PAGE_MCD_4V) : __pgprot(0);
+}
+
+#define arch_validate_prot(prot, addr) sparc_validate_prot(prot, addr)
+static inline int sparc_validate_prot(unsigned long prot, unsigned long addr)
+{
+	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_ADI))
+		return 0;
+	if (prot & PROT_ADI) {
+		if (!adi_capable())
+			return 0;
+
+		if (addr) {
+			struct vm_area_struct *vma;
+
+			vma = find_vma(current->mm, addr);
+			if (vma) {
+				/* ADI can not be enabled on PFN
+				 * mapped pages
+				 */
+				if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+					return 0;
+
+				/* Mergeable pages can become unmergeable
+				 * if ADI is enabled on them even if they
+				 * have identical data on them. This can be
+				 * because ADI enabled pages with identical
+				 * data may still not have identical ADI
+				 * tags on them. Disallow ADI on mergeable
+				 * pages.
+				 */
+				if (vma->vm_flags & VM_MERGEABLE)
+					return 0;
+			}
+		}
+	}
+	return 1;
+}
+#endif /* CONFIG_SPARC64 */
+
+#endif /* __ASSEMBLY__ */
+#endif /* __SPARC_MMAN_H__ */
diff --git a/arch/sparc/include/asm/mmu.h b/arch/sparc/include/asm/mmu.h
new file mode 100644
index 0000000..286457e
--- /dev/null
+++ b/arch/sparc/include/asm/mmu.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_MMU_H
+#define ___ASM_SPARC_MMU_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/mmu_64.h>
+#else
+#include <asm/mmu_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/mmu_32.h b/arch/sparc/include/asm/mmu_32.h
new file mode 100644
index 0000000..d5b220c
--- /dev/null
+++ b/arch/sparc/include/asm/mmu_32.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MMU_H
+#define __MMU_H
+
+/* Default "unsigned long" context */
+typedef unsigned long mm_context_t;
+
+/* mm/srmmu.c */
+extern ctxd_t *srmmu_ctx_table_phys;
+
+#endif
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
new file mode 100644
index 0000000..7e2704c
--- /dev/null
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __MMU_H
+#define __MMU_H
+
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/hypervisor.h>
+
+#define CTX_NR_BITS		13
+
+#define TAG_CONTEXT_BITS	((_AC(1,UL) << CTX_NR_BITS) - _AC(1,UL))
+
+/* UltraSPARC-III+ and later have a feature whereby you can
+ * select what page size the various Data-TLB instances in the
+ * chip.  In order to gracefully support this, we put the version
+ * field in a spot outside of the areas of the context register
+ * where this parameter is specified.
+ */
+#define CTX_VERSION_SHIFT	22
+#define CTX_VERSION_MASK	((~0UL) << CTX_VERSION_SHIFT)
+
+#define CTX_PGSZ_8KB		_AC(0x0,UL)
+#define CTX_PGSZ_64KB		_AC(0x1,UL)
+#define CTX_PGSZ_512KB		_AC(0x2,UL)
+#define CTX_PGSZ_4MB		_AC(0x3,UL)
+#define CTX_PGSZ_BITS		_AC(0x7,UL)
+#define CTX_PGSZ0_NUC_SHIFT	61
+#define CTX_PGSZ1_NUC_SHIFT	58
+#define CTX_PGSZ0_SHIFT		16
+#define CTX_PGSZ1_SHIFT		19
+#define CTX_PGSZ_MASK		((CTX_PGSZ_BITS << CTX_PGSZ0_SHIFT) | \
+				 (CTX_PGSZ_BITS << CTX_PGSZ1_SHIFT))
+
+#define CTX_PGSZ_BASE	CTX_PGSZ_8KB
+#define CTX_PGSZ_HUGE	CTX_PGSZ_4MB
+#define CTX_PGSZ_KERN	CTX_PGSZ_4MB
+
+/* Thus, when running on UltraSPARC-III+ and later, we use the following
+ * PRIMARY_CONTEXT register values for the kernel context.
+ */
+#define CTX_CHEETAH_PLUS_NUC \
+	((CTX_PGSZ_KERN << CTX_PGSZ0_NUC_SHIFT) | \
+	 (CTX_PGSZ_BASE << CTX_PGSZ1_NUC_SHIFT))
+
+#define CTX_CHEETAH_PLUS_CTX0 \
+	((CTX_PGSZ_KERN << CTX_PGSZ0_SHIFT) | \
+	 (CTX_PGSZ_BASE << CTX_PGSZ1_SHIFT))
+
+/* If you want "the TLB context number" use CTX_NR_MASK.  If you
+ * want "the bits I program into the context registers" use
+ * CTX_HW_MASK.
+ */
+#define CTX_NR_MASK		TAG_CONTEXT_BITS
+#define CTX_HW_MASK		(CTX_NR_MASK | CTX_PGSZ_MASK)
+
+#define CTX_FIRST_VERSION	BIT(CTX_VERSION_SHIFT)
+#define CTX_VALID(__ctx)	\
+	 (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
+#define CTX_HWBITS(__ctx)	((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
+#define CTX_NRBITS(__ctx)	((__ctx.sparc64_ctx_val) & CTX_NR_MASK)
+
+#ifndef __ASSEMBLY__
+
+#define TSB_ENTRY_ALIGNMENT	16
+
+struct tsb {
+	unsigned long tag;
+	unsigned long pte;
+} __attribute__((aligned(TSB_ENTRY_ALIGNMENT)));
+
+void __tsb_insert(unsigned long ent, unsigned long tag, unsigned long pte);
+void tsb_flush(unsigned long ent, unsigned long tag);
+void tsb_init(struct tsb *tsb, unsigned long size);
+
+struct tsb_config {
+	struct tsb		*tsb;
+	unsigned long		tsb_rss_limit;
+	unsigned long		tsb_nentries;
+	unsigned long		tsb_reg_val;
+	unsigned long		tsb_map_vaddr;
+	unsigned long		tsb_map_pte;
+};
+
+#define MM_TSB_BASE	0
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define MM_TSB_HUGE	1
+#define MM_NUM_TSBS	2
+#else
+#define MM_NUM_TSBS	1
+#endif
+
+/* ADI tags are stored when a page is swapped out and the storage for
+ * tags is allocated dynamically. There is a tag storage descriptor
+ * associated with each set of tag storage pages. Tag storage descriptors
+ * are allocated dynamically. Since kernel will allocate a full page for
+ * each tag storage descriptor, we can store up to
+ * PAGE_SIZE/sizeof(tag storage descriptor) descriptors on that page.
+ */
+typedef struct {
+	unsigned long	start;		/* Start address for this tag storage */
+	unsigned long	end;		/* Last address for tag storage */
+	unsigned char	*tags;		/* Where the tags are */
+	unsigned long	tag_users;	/* number of references to descriptor */
+} tag_storage_desc_t;
+
+typedef struct {
+	spinlock_t		lock;
+	unsigned long		sparc64_ctx_val;
+	unsigned long		hugetlb_pte_count;
+	unsigned long		thp_pte_count;
+	struct tsb_config	tsb_block[MM_NUM_TSBS];
+	struct hv_tsb_descr	tsb_descr[MM_NUM_TSBS];
+	void			*vdso;
+	bool			adi;
+	tag_storage_desc_t	*tag_store;
+	spinlock_t		tag_lock;
+} mm_context_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#define TSB_CONFIG_TSB		0x00
+#define TSB_CONFIG_RSS_LIMIT	0x08
+#define TSB_CONFIG_NENTRIES	0x10
+#define TSB_CONFIG_REG_VAL	0x18
+#define TSB_CONFIG_MAP_VADDR	0x20
+#define TSB_CONFIG_MAP_PTE	0x28
+
+#endif /* __MMU_H */
diff --git a/arch/sparc/include/asm/mmu_context.h b/arch/sparc/include/asm/mmu_context.h
new file mode 100644
index 0000000..5e1ea38
--- /dev/null
+++ b/arch/sparc/include/asm/mmu_context.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_MMU_CONTEXT_H
+#define ___ASM_SPARC_MMU_CONTEXT_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/mmu_context_64.h>
+#else
+#include <asm/mmu_context_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/mmu_context_32.h b/arch/sparc/include/asm/mmu_context_32.h
new file mode 100644
index 0000000..7ddcb8b
--- /dev/null
+++ b/arch/sparc/include/asm/mmu_context_32.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_MMU_CONTEXT_H
+#define __SPARC_MMU_CONTEXT_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm-generic/mm_hooks.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+/* Initialize a new mmu context.  This is invoked when a new
+ * address space instance (unique or shared) is instantiated.
+ */
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+
+/* Destroy a dead context.  This occurs when mmput drops the
+ * mm_users count to zero, the mmaps have been released, and
+ * all the page tables have been flushed.  Our job is to destroy
+ * any remaining processor-specific state.
+ */
+void destroy_context(struct mm_struct *mm);
+
+/* Switch the current MM context. */
+void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
+	       struct task_struct *tsk);
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+
+/* Activate a new MM instance for the current task. */
+#define activate_mm(active_mm, mm) switch_mm((active_mm), (mm), NULL)
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC_MMU_CONTEXT_H) */
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
new file mode 100644
index 0000000..312fcee
--- /dev/null
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC64_MMU_CONTEXT_H
+#define __SPARC64_MMU_CONTEXT_H
+
+/* Derived heavily from Linus's Alpha/AXP ASN code... */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/spinlock.h>
+#include <linux/mm_types.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+
+#include <asm/spitfire.h>
+#include <asm/adi_64.h>
+#include <asm-generic/mm_hooks.h>
+#include <asm/percpu.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+}
+
+extern spinlock_t ctx_alloc_lock;
+extern unsigned long tlb_context_cache;
+extern unsigned long mmu_context_bmap[];
+
+DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
+void get_new_mmu_context(struct mm_struct *mm);
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+void destroy_context(struct mm_struct *mm);
+
+void __tsb_context_switch(unsigned long pgd_pa,
+			  struct tsb_config *tsb_base,
+			  struct tsb_config *tsb_huge,
+			  unsigned long tsb_descr_pa,
+			  unsigned long secondary_ctx);
+
+static inline void tsb_context_switch_ctx(struct mm_struct *mm,
+					  unsigned long ctx)
+{
+	__tsb_context_switch(__pa(mm->pgd),
+			     &mm->context.tsb_block[MM_TSB_BASE],
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+			     (mm->context.tsb_block[MM_TSB_HUGE].tsb ?
+			      &mm->context.tsb_block[MM_TSB_HUGE] :
+			      NULL)
+#else
+			     NULL
+#endif
+			     , __pa(&mm->context.tsb_descr[MM_TSB_BASE]),
+			     ctx);
+}
+
+#define tsb_context_switch(X) tsb_context_switch_ctx(X, 0)
+
+void tsb_grow(struct mm_struct *mm,
+	      unsigned long tsb_index,
+	      unsigned long mm_rss);
+#ifdef CONFIG_SMP
+void smp_tsb_sync(struct mm_struct *mm);
+#else
+#define smp_tsb_sync(__mm) do { } while (0)
+#endif
+
+/* Set MMU context in the actual hardware. */
+#define load_secondary_context(__mm) \
+	__asm__ __volatile__( \
+	"\n661:	stxa		%0, [%1] %2\n" \
+	"	.section	.sun4v_1insn_patch, \"ax\"\n" \
+	"	.word		661b\n" \
+	"	stxa		%0, [%1] %3\n" \
+	"	.previous\n" \
+	"	flush		%%g6\n" \
+	: /* No outputs */ \
+	: "r" (CTX_HWBITS((__mm)->context)), \
+	  "r" (SECONDARY_CONTEXT), "i" (ASI_DMMU), "i" (ASI_MMU))
+
+void __flush_tlb_mm(unsigned long, unsigned long);
+
+/* Switch the current MM context. */
+static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
+{
+	unsigned long ctx_valid, flags;
+	int cpu = smp_processor_id();
+
+	per_cpu(per_cpu_secondary_mm, cpu) = mm;
+	if (unlikely(mm == &init_mm))
+		return;
+
+	spin_lock_irqsave(&mm->context.lock, flags);
+	ctx_valid = CTX_VALID(mm->context);
+	if (!ctx_valid)
+		get_new_mmu_context(mm);
+
+	/* We have to be extremely careful here or else we will miss
+	 * a TSB grow if we switch back and forth between a kernel
+	 * thread and an address space which has it's TSB size increased
+	 * on another processor.
+	 *
+	 * It is possible to play some games in order to optimize the
+	 * switch, but the safest thing to do is to unconditionally
+	 * perform the secondary context load and the TSB context switch.
+	 *
+	 * For reference the bad case is, for address space "A":
+	 *
+	 *		CPU 0			CPU 1
+	 *	run address space A
+	 *	set cpu0's bits in cpu_vm_mask
+	 *	switch to kernel thread, borrow
+	 *	address space A via entry_lazy_tlb
+	 *					run address space A
+	 *					set cpu1's bit in cpu_vm_mask
+	 *					flush_tlb_pending()
+	 *					reset cpu_vm_mask to just cpu1
+	 *					TSB grow
+	 *	run address space A
+	 *	context was valid, so skip
+	 *	TSB context switch
+	 *
+	 * At that point cpu0 continues to use a stale TSB, the one from
+	 * before the TSB grow performed on cpu1.  cpu1 did not cross-call
+	 * cpu0 to update it's TSB because at that point the cpu_vm_mask
+	 * only had cpu1 set in it.
+	 */
+	tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+
+	/* Any time a processor runs a context on an address space
+	 * for the first time, we must flush that context out of the
+	 * local TLB.
+	 */
+	if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+		cpumask_set_cpu(cpu, mm_cpumask(mm));
+		__flush_tlb_mm(CTX_HWBITS(mm->context),
+			       SECONDARY_CONTEXT);
+	}
+	spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+
+#define deactivate_mm(tsk,mm)	do { } while (0)
+#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL)
+
+#define  __HAVE_ARCH_START_CONTEXT_SWITCH
+static inline void arch_start_context_switch(struct task_struct *prev)
+{
+	/* Save the current state of MCDPER register for the process
+	 * we are switching from
+	 */
+	if (adi_capable()) {
+		register unsigned long tmp_mcdper;
+
+		__asm__ __volatile__(
+			".word 0x83438000\n\t"	/* rd  %mcdper, %g1 */
+			"mov %%g1, %0\n\t"
+			: "=r" (tmp_mcdper)
+			:
+			: "g1");
+		if (tmp_mcdper)
+			set_tsk_thread_flag(prev, TIF_MCDPER);
+		else
+			clear_tsk_thread_flag(prev, TIF_MCDPER);
+	}
+}
+
+#define finish_arch_post_lock_switch	finish_arch_post_lock_switch
+static inline void finish_arch_post_lock_switch(void)
+{
+	/* Restore the state of MCDPER register for the new process
+	 * just switched to.
+	 */
+	if (adi_capable()) {
+		register unsigned long tmp_mcdper;
+
+		tmp_mcdper = test_thread_flag(TIF_MCDPER);
+		__asm__ __volatile__(
+			"mov %0, %%g1\n\t"
+			".word 0x9d800001\n\t"	/* wr %g0, %g1, %mcdper" */
+			".word 0xaf902001\n\t"	/* wrpr %g0, 1, %pmcdper */
+			:
+			: "ir" (tmp_mcdper)
+			: "g1");
+		if (current && current->mm && current->mm->context.adi) {
+			struct pt_regs *regs;
+
+			regs = task_pt_regs(current);
+			regs->tstate |= TSTATE_MCDE;
+		}
+	}
+}
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC64_MMU_CONTEXT_H) */
diff --git a/arch/sparc/include/asm/mmzone.h b/arch/sparc/include/asm/mmzone.h
new file mode 100644
index 0000000..6543fb9
--- /dev/null
+++ b/arch/sparc/include/asm/mmzone.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_MMZONE_H
+#define _SPARC64_MMZONE_H
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+
+#include <linux/cpumask.h>
+
+extern struct pglist_data *node_data[];
+
+#define NODE_DATA(nid)		(node_data[nid])
+
+extern int numa_cpu_lookup_table[];
+extern cpumask_t numa_cpumask_lookup_table[];
+
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#endif /* _SPARC64_MMZONE_H */
diff --git a/arch/sparc/include/asm/mxcc.h b/arch/sparc/include/asm/mxcc.h
new file mode 100644
index 0000000..3a2561b
--- /dev/null
+++ b/arch/sparc/include/asm/mxcc.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mxcc.h:  Definitions of the Viking MXCC registers
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_MXCC_H
+#define _SPARC_MXCC_H
+
+/* These registers are accessed through ASI 0x2. */
+#define MXCC_DATSTREAM       0x1C00000  /* Data stream register */
+#define MXCC_SRCSTREAM       0x1C00100  /* Source stream register */
+#define MXCC_DESSTREAM       0x1C00200  /* Destination stream register */
+#define MXCC_RMCOUNT         0x1C00300  /* Count of references and misses */
+#define MXCC_STEST           0x1C00804  /* Internal self-test */
+#define MXCC_CREG            0x1C00A04  /* Control register */
+#define MXCC_SREG            0x1C00B00  /* Status register */
+#define MXCC_RREG            0x1C00C04  /* Reset register */
+#define MXCC_EREG            0x1C00E00  /* Error code register */
+#define MXCC_PREG            0x1C00F04  /* Address port register */
+
+/* Some MXCC constants. */
+#define MXCC_STREAM_SIZE     0x20       /* Size in bytes of one stream r/w */
+
+/* The MXCC Control Register:
+ *
+ * ----------------------------------------------------------------------
+ * |                                   | RRC | RSV |PRE|MCE|PARE|ECE|RSV|
+ * ----------------------------------------------------------------------
+ *  31                              10    9    8-6   5   4    3   2  1-0
+ *
+ * RRC: Controls what you read from MXCC_RMCOUNT reg.
+ *      0=Misses 1=References
+ * PRE: Prefetch enable
+ * MCE: Multiple Command Enable
+ * PARE: Parity enable
+ * ECE: External cache enable
+ */
+
+#define MXCC_CTL_RRC   0x00000200
+#define MXCC_CTL_PRE   0x00000020
+#define MXCC_CTL_MCE   0x00000010
+#define MXCC_CTL_PARE  0x00000008
+#define MXCC_CTL_ECE   0x00000004
+
+/* The MXCC Error Register:
+ *
+ * --------------------------------------------------------
+ * |ME| RSV|CE|PEW|PEE|ASE|EIV| MOPC|ECODE|PRIV|RSV|HPADDR|
+ * --------------------------------------------------------
+ *  31   30 29  28  27  26  25 24-15  14-7   6  5-3   2-0
+ *
+ * ME: Multiple Errors have occurred
+ * CE: Cache consistency Error
+ * PEW: Parity Error during a Write operation
+ * PEE: Parity Error involving the External cache
+ * ASE: ASynchronous Error
+ * EIV: This register is toast
+ * MOPC: MXCC Operation Code for instance causing error
+ * ECODE: The Error CODE
+ * PRIV: A privileged mode error? 0=no 1=yes
+ * HPADDR: High PhysicalADDRess bits (35-32)
+ */
+
+#define MXCC_ERR_ME     0x80000000
+#define MXCC_ERR_CE     0x20000000
+#define MXCC_ERR_PEW    0x10000000
+#define MXCC_ERR_PEE    0x08000000
+#define MXCC_ERR_ASE    0x04000000
+#define MXCC_ERR_EIV    0x02000000
+#define MXCC_ERR_MOPC   0x01FF8000
+#define MXCC_ERR_ECODE  0x00007F80
+#define MXCC_ERR_PRIV   0x00000040
+#define MXCC_ERR_HPADDR 0x0000000f
+
+/* The MXCC Port register:
+ *
+ * -----------------------------------------------------
+ * |                | MID |                            |
+ * -----------------------------------------------------
+ *  31            21 20-18 17                         0
+ *
+ * MID: The moduleID of the cpu your read this from.
+ */
+
+#ifndef __ASSEMBLY__
+
+static inline void mxcc_set_stream_src(unsigned long *paddr)
+{
+	unsigned long data0 = paddr[0];
+	unsigned long data1 = paddr[1];
+
+	__asm__ __volatile__ ("or %%g0, %0, %%g2\n\t"
+			      "or %%g0, %1, %%g3\n\t"
+			      "stda %%g2, [%2] %3\n\t" : :
+			      "r" (data0), "r" (data1),
+			      "r" (MXCC_SRCSTREAM),
+			      "i" (ASI_M_MXCC) : "g2", "g3");
+}
+
+static inline void mxcc_set_stream_dst(unsigned long *paddr)
+{
+	unsigned long data0 = paddr[0];
+	unsigned long data1 = paddr[1];
+
+	__asm__ __volatile__ ("or %%g0, %0, %%g2\n\t"
+			      "or %%g0, %1, %%g3\n\t"
+			      "stda %%g2, [%2] %3\n\t" : :
+			      "r" (data0), "r" (data1),
+			      "r" (MXCC_DESSTREAM),
+			      "i" (ASI_M_MXCC) : "g2", "g3");
+}
+
+static inline unsigned long mxcc_get_creg(void)
+{
+	unsigned long mxcc_control;
+
+	__asm__ __volatile__("set 0xffffffff, %%g2\n\t"
+			     "set 0xffffffff, %%g3\n\t"
+			     "stda %%g2, [%1] %2\n\t"
+			     "lda [%3] %2, %0\n\t" :
+			     "=r" (mxcc_control) :
+			     "r" (MXCC_EREG), "i" (ASI_M_MXCC),
+			     "r" (MXCC_CREG) : "g2", "g3");
+	return mxcc_control;
+}
+
+static inline void mxcc_set_creg(unsigned long mxcc_control)
+{
+	__asm__ __volatile__("sta %0, [%1] %2\n\t" : :
+			     "r" (mxcc_control), "r" (MXCC_CREG),
+			     "i" (ASI_M_MXCC));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !(_SPARC_MXCC_H) */
diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h
new file mode 100644
index 0000000..90ee786
--- /dev/null
+++ b/arch/sparc/include/asm/nmi.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NMI_H
+#define __NMI_H
+
+int __init nmi_init(void);
+void perfctr_irq(int irq, struct pt_regs *regs);
+void nmi_adjust_hz(unsigned int new_hz);
+
+extern atomic_t nmi_active;
+
+void arch_touch_nmi_watchdog(void);
+void start_nmi_watchdog(void *unused);
+void stop_nmi_watchdog(void *unused);
+
+#endif /* __NMI_H */
diff --git a/arch/sparc/include/asm/ns87303.h b/arch/sparc/include/asm/ns87303.h
new file mode 100644
index 0000000..5401894
--- /dev/null
+++ b/arch/sparc/include/asm/ns87303.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ns87303.h: Configuration Register Description for the
+ *            National Semiconductor PC87303 (SuperIO).
+ *
+ * Copyright (C) 1997  Eddie C. Dost  (ecd@skynet.be)
+ */
+
+#ifndef _SPARC_NS87303_H
+#define _SPARC_NS87303_H 1
+
+/*
+ * Control Register Index Values
+ */
+#define FER	0x00
+#define FAR	0x01
+#define PTR	0x02
+#define FCR	0x03
+#define PCR	0x04
+#define KRR	0x05
+#define PMC	0x06
+#define TUP	0x07
+#define SID	0x08
+#define ASC	0x09
+#define CS0CF0	0x0a
+#define CS0CF1	0x0b
+#define CS1CF0	0x0c
+#define CS1CF1	0x0d
+
+/* Function Enable Register (FER) bits */
+#define FER_EDM		0x10	/* Encoded Drive and Motor pin information   */
+
+/* Function Address Register (FAR) bits */
+#define FAR_LPT_MASK	0x03
+#define FAR_LPTB	0x00
+#define FAR_LPTA	0x01
+#define FAR_LPTC	0x02
+
+/* Power and Test Register (PTR) bits */
+#define PTR_LPTB_IRQ7	0x08
+#define PTR_LEVEL_IRQ	0x80	/* When not ECP/EPP: Use level IRQ           */
+#define PTR_LPT_REG_DIR	0x80	/* When ECP/EPP: LPT CTR controls direction */
+				/*               of the parallel port	     */
+
+/* Function Control Register (FCR) bits */
+#define FCR_LDE		0x10	/* Logical Drive Exchange                    */
+#define FCR_ZWS_ENA	0x20	/* Enable short host read/write in ECP/EPP   */
+
+/* Printer Control Register (PCR) bits */
+#define PCR_EPP_ENABLE	0x01
+#define PCR_EPP_IEEE	0x02	/* Enable EPP Version 1.9 (IEEE 1284)        */
+#define PCR_ECP_ENABLE	0x04
+#define PCR_ECP_CLK_ENA	0x08	/* If 0 ECP Clock is stopped on Power down   */
+#define PCR_IRQ_POLAR	0x20	/* If 0 IRQ is level high or negative pulse, */
+				/* if 1 polarity is inverted                 */
+#define PCR_IRQ_ODRAIN	0x40	/* If 1, IRQ is open drain                   */
+
+/* Tape UARTs and Parallel Port Config Register (TUP) bits */
+#define TUP_EPP_TIMO	0x02	/* Enable EPP timeout IRQ                    */
+
+/* Advanced SuperIO Config Register (ASC) bits */
+#define ASC_LPT_IRQ7	0x01	/* Always use IRQ7 for LPT                  */
+#define ASC_DRV2_SEL	0x02	/* Logical Drive Exchange controlled by TDR  */
+
+#define FER_RESERVED	0x00
+#define FAR_RESERVED	0x00
+#define PTR_RESERVED	0x73
+#define FCR_RESERVED	0xc4
+#define PCR_RESERVED	0x10
+#define KRR_RESERVED	0x00
+#define PMC_RESERVED	0x98
+#define TUP_RESERVED	0xfb
+#define SIP_RESERVED	0x00
+#define ASC_RESERVED	0x18
+#define CS0CF0_RESERVED	0x00
+#define CS0CF1_RESERVED	0x08
+#define CS1CF0_RESERVED	0x00
+#define CS1CF1_RESERVED	0x08
+
+#ifdef __KERNEL__
+
+#include <linux/spinlock.h>
+
+#include <asm/io.h>
+
+extern spinlock_t ns87303_lock;
+
+static inline int ns87303_modify(unsigned long port, unsigned int index,
+				     unsigned char clr, unsigned char set)
+{
+	static unsigned char reserved[] = {
+		FER_RESERVED, FAR_RESERVED, PTR_RESERVED, FCR_RESERVED,
+		PCR_RESERVED, KRR_RESERVED, PMC_RESERVED, TUP_RESERVED,
+		SIP_RESERVED, ASC_RESERVED, CS0CF0_RESERVED, CS0CF1_RESERVED,
+		CS1CF0_RESERVED, CS1CF1_RESERVED
+	};
+	unsigned long flags;
+	unsigned char value;
+
+	if (index > 0x0d)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ns87303_lock, flags);
+
+	outb(index, port);
+	value = inb(port + 1);
+	value &= ~(reserved[index] | clr);
+	value |= set;
+	outb(value, port + 1);
+	outb(value, port + 1);
+
+	spin_unlock_irqrestore(&ns87303_lock, flags);
+
+	return 0;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* !(_SPARC_NS87303_H) */
diff --git a/arch/sparc/include/asm/obio.h b/arch/sparc/include/asm/obio.h
new file mode 100644
index 0000000..1b151f7
--- /dev/null
+++ b/arch/sparc/include/asm/obio.h
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * obio.h:  Some useful locations in 0xFXXXXXXXX PA obio space on sun4d.
+ *
+ * Copyright (C) 1997 Jakub Jelinek <jj@sunsite.mff.cuni.cz>
+ */
+
+#ifndef _SPARC_OBIO_H
+#define _SPARC_OBIO_H
+
+#include <asm/asi.h>
+
+/* This weird monster likes to use the very upper parts of
+   36bit PA for these things :) */
+   
+/* CSR space (for each XDBUS)
+ *  ------------------------------------------------------------------------
+ *  |   0xFE  |   DEVID    |                | XDBUS ID |                   |
+ *  ------------------------------------------------------------------------
+ *  35      28 27        20 19            10 9        8 7                 0
+ */
+   
+#define CSR_BASE_ADDR		0xe0000000
+#define CSR_CPU_SHIFT		(32 - 4 - 5)
+#define CSR_XDBUS_SHIFT		8
+
+#define CSR_BASE(cpu) (((CSR_BASE_ADDR >> CSR_CPU_SHIFT) + cpu) << CSR_CPU_SHIFT)
+
+/* ECSR space (not for each XDBUS)
+ *  ------------------------------------------------------------------------
+ *  |   0xF  | DEVID[7:1] |                			           |
+ *  ------------------------------------------------------------------------
+ *  35     32 31        25 24                 				  0
+ */
+   
+#define ECSR_BASE_ADDR		0x00000000
+#define ECSR_CPU_SHIFT		(32 - 5)
+#define ECSR_DEV_SHIFT		(32 - 8)
+
+#define ECSR_BASE(cpu) ((cpu) << ECSR_CPU_SHIFT)
+#define ECSR_DEV_BASE(devid) ((devid) << ECSR_DEV_SHIFT) 
+
+/* Bus Watcher */
+#define BW_LOCAL_BASE		0xfff00000
+
+#define BW_CID			0x00000000
+#define BW_DBUS_CTRL		0x00000008
+#define BW_DBUS_DATA		0x00000010
+#define BW_CTRL			0x00001000
+#define BW_INTR_TABLE		0x00001040
+#define BW_INTR_TABLE_CLEAR	0x00001080
+#define BW_PRESCALER		0x000010c0
+#define BW_PTIMER_LIMIT		0x00002000
+#define BW_PTIMER_COUNTER2	0x00002004
+#define BW_PTIMER_NDLIMIT	0x00002008
+#define BW_PTIMER_CTRL		0x0000200c
+#define BW_PTIMER_COUNTER	0x00002010
+#define BW_TIMER_LIMIT		0x00003000
+#define BW_TIMER_COUNTER2	0x00003004
+#define BW_TIMER_NDLIMIT	0x00003008
+#define BW_TIMER_CTRL		0x0000300c
+#define BW_TIMER_COUNTER	0x00003010
+
+/* BW Control */
+#define BW_CTRL_USER_TIMER	0x00000004	/* Is User Timer Free run enabled */
+
+/* Boot Bus */
+#define BB_LOCAL_BASE		0xf0000000
+
+#define BB_STAT1		0x00100000
+#define BB_STAT2		0x00120000
+#define BB_STAT3		0x00140000
+#define BB_LEDS			0x002e0000
+
+/* Bits in BB_STAT2 */
+#define BB_STAT2_AC_INTR	0x04	/* Aiee! 5ms and power is gone... */
+#define BB_STAT2_TMP_INTR	0x10	/* My Penguins are burning. Are you able to smell it? */
+#define BB_STAT2_FAN_INTR	0x20	/* My fan refuses to work */
+#define BB_STAT2_PWR_INTR	0x40	/* On SC2000, one of the two ACs died. Ok, we go on... */
+#define BB_STAT2_MASK		(BB_STAT2_AC_INTR|BB_STAT2_TMP_INTR|BB_STAT2_FAN_INTR|BB_STAT2_PWR_INTR)
+
+/* Cache Controller */
+#define CC_BASE		0x1F00000
+#define CC_DATSTREAM	0x1F00000  /* Data stream register */
+#define CC_DATSIZE	0x1F0003F  /* Size */
+#define CC_SRCSTREAM	0x1F00100  /* Source stream register */
+#define CC_DESSTREAM	0x1F00200  /* Destination stream register */
+#define CC_RMCOUNT	0x1F00300  /* Count of references and misses */
+#define CC_IPEN		0x1F00406  /* Pending Interrupts */
+#define CC_IMSK		0x1F00506  /* Interrupt Mask */
+#define CC_ICLR		0x1F00606  /* Clear pending Interrupts */
+#define CC_IGEN		0x1F00704  /* Generate Interrupt register */
+#define CC_STEST	0x1F00804  /* Internal self-test */
+#define CC_CREG		0x1F00A04  /* Control register */
+#define CC_SREG		0x1F00B00  /* Status register */
+#define CC_RREG		0x1F00C04  /* Reset register */
+#define CC_EREG		0x1F00E00  /* Error code register */
+#define CC_CID		0x1F00F04  /* Component ID */
+
+#ifndef __ASSEMBLY__
+
+static inline int bw_get_intr_mask(int sbus_level)
+{
+	int mask;
+	
+	__asm__ __volatile__ ("lduha [%1] %2, %0" :
+			      "=r" (mask) :
+			      "r" (BW_LOCAL_BASE + BW_INTR_TABLE + (sbus_level << 3)),
+			      "i" (ASI_M_CTL));
+	return mask;
+}
+
+static inline void bw_clear_intr_mask(int sbus_level, int mask)
+{
+	__asm__ __volatile__ ("stha %0, [%1] %2" : :
+			      "r" (mask),
+			      "r" (BW_LOCAL_BASE + BW_INTR_TABLE_CLEAR + (sbus_level << 3)),
+			      "i" (ASI_M_CTL));
+}
+
+static inline unsigned int bw_get_prof_limit(int cpu)
+{
+	unsigned int limit;
+	
+	__asm__ __volatile__ ("lda [%1] %2, %0" :
+			      "=r" (limit) :
+			      "r" (CSR_BASE(cpu) + BW_PTIMER_LIMIT),
+			      "i" (ASI_M_CTL));
+	return limit;
+}
+
+static inline void bw_set_prof_limit(int cpu, unsigned int limit)
+{
+	__asm__ __volatile__ ("sta %0, [%1] %2" : :
+			      "r" (limit),
+			      "r" (CSR_BASE(cpu) + BW_PTIMER_LIMIT),
+			      "i" (ASI_M_CTL));
+}
+
+static inline unsigned int bw_get_ctrl(int cpu)
+{
+	unsigned int ctrl;
+	
+	__asm__ __volatile__ ("lda [%1] %2, %0" :
+			      "=r" (ctrl) :
+			      "r" (CSR_BASE(cpu) + BW_CTRL),
+			      "i" (ASI_M_CTL));
+	return ctrl;
+}
+
+static inline void bw_set_ctrl(int cpu, unsigned int ctrl)
+{
+	__asm__ __volatile__ ("sta %0, [%1] %2" : :
+			      "r" (ctrl),
+			      "r" (CSR_BASE(cpu) + BW_CTRL),
+			      "i" (ASI_M_CTL));
+}
+
+static inline unsigned int cc_get_ipen(void)
+{
+	unsigned int pending;
+	
+	__asm__ __volatile__ ("lduha [%1] %2, %0" :
+			      "=r" (pending) :
+			      "r" (CC_IPEN),
+			      "i" (ASI_M_MXCC));
+	return pending;
+}
+
+static inline void cc_set_iclr(unsigned int clear)
+{
+	__asm__ __volatile__ ("stha %0, [%1] %2" : :
+			      "r" (clear),
+			      "r" (CC_ICLR),
+			      "i" (ASI_M_MXCC));
+}
+
+static inline unsigned int cc_get_imsk(void)
+{
+	unsigned int mask;
+	
+	__asm__ __volatile__ ("lduha [%1] %2, %0" :
+			      "=r" (mask) :
+			      "r" (CC_IMSK),
+			      "i" (ASI_M_MXCC));
+	return mask;
+}
+
+static inline void cc_set_imsk(unsigned int mask)
+{
+	__asm__ __volatile__ ("stha %0, [%1] %2" : :
+			      "r" (mask),
+			      "r" (CC_IMSK),
+			      "i" (ASI_M_MXCC));
+}
+
+static inline unsigned int cc_get_imsk_other(int cpuid)
+{
+	unsigned int mask;
+	
+	__asm__ __volatile__ ("lduha [%1] %2, %0" :
+			      "=r" (mask) :
+			      "r" (ECSR_BASE(cpuid) | CC_IMSK),
+			      "i" (ASI_M_CTL));
+	return mask;
+}
+
+static inline void cc_set_imsk_other(int cpuid, unsigned int mask)
+{
+	__asm__ __volatile__ ("stha %0, [%1] %2" : :
+			      "r" (mask),
+			      "r" (ECSR_BASE(cpuid) | CC_IMSK),
+			      "i" (ASI_M_CTL));
+}
+
+static inline void cc_set_igen(unsigned int gen)
+{
+	__asm__ __volatile__ ("sta %0, [%1] %2" : :
+			      "r" (gen),
+			      "r" (CC_IGEN),
+			      "i" (ASI_M_MXCC));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !(_SPARC_OBIO_H) */
diff --git a/arch/sparc/include/asm/openprom.h b/arch/sparc/include/asm/openprom.h
new file mode 100644
index 0000000..69545b3
--- /dev/null
+++ b/arch/sparc/include/asm/openprom.h
@@ -0,0 +1,280 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_OPENPROM_H
+#define __SPARC_OPENPROM_H
+
+/* openprom.h:  Prom structures and defines for access to the OPENBOOT
+ *              prom routines and data areas.
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+/* Empirical constants... */
+#define LINUX_OPPROM_MAGIC      0x10010407
+
+#ifndef __ASSEMBLY__
+#include <linux/of.h>
+
+/* V0 prom device operations. */
+struct linux_dev_v0_funcs {
+	int (*v0_devopen)(char *device_str);
+	int (*v0_devclose)(int dev_desc);
+	int (*v0_rdblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
+	int (*v0_wrblkdev)(int dev_desc, int num_blks, int blk_st, char *buf);
+	int (*v0_wrnetdev)(int dev_desc, int num_bytes, char *buf);
+	int (*v0_rdnetdev)(int dev_desc, int num_bytes, char *buf);
+	int (*v0_rdchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
+	int (*v0_wrchardev)(int dev_desc, int num_bytes, int dummy, char *buf);
+	int (*v0_seekdev)(int dev_desc, long logical_offst, int from);
+};
+
+/* V2 and later prom device operations. */
+struct linux_dev_v2_funcs {
+	phandle (*v2_inst2pkg)(int d);	/* Convert ihandle to phandle */
+	char * (*v2_dumb_mem_alloc)(char *va, unsigned int sz);
+	void (*v2_dumb_mem_free)(char *va, unsigned int sz);
+
+	/* To map devices into virtual I/O space. */
+	char * (*v2_dumb_mmap)(char *virta, int which_io, unsigned int paddr, unsigned int sz);
+	void (*v2_dumb_munmap)(char *virta, unsigned int size);
+
+	int (*v2_dev_open)(char *devpath);
+	void (*v2_dev_close)(int d);
+	int (*v2_dev_read)(int d, char *buf, int nbytes);
+	int (*v2_dev_write)(int d, const char *buf, int nbytes);
+	int (*v2_dev_seek)(int d, int hi, int lo);
+
+	/* Never issued (multistage load support) */
+	void (*v2_wheee2)(void);
+	void (*v2_wheee3)(void);
+};
+
+struct linux_mlist_v0 {
+	struct linux_mlist_v0 *theres_more;
+	unsigned int start_adr;
+	unsigned int num_bytes;
+};
+
+struct linux_mem_v0 {
+	struct linux_mlist_v0 **v0_totphys;
+	struct linux_mlist_v0 **v0_prommap;
+	struct linux_mlist_v0 **v0_available; /* What we can use */
+};
+
+/* Arguments sent to the kernel from the boot prompt. */
+struct linux_arguments_v0 {
+	char *argv[8];
+	char args[100];
+	char boot_dev[2];
+	int boot_dev_ctrl;
+	int boot_dev_unit;
+	int dev_partition;
+	char *kernel_file_name;
+	void *aieee1;           /* XXX */
+};
+
+/* V2 and up boot things. */
+struct linux_bootargs_v2 {
+	char **bootpath;
+	char **bootargs;
+	int *fd_stdin;
+	int *fd_stdout;
+};
+
+/* The top level PROM vector. */
+struct linux_romvec {
+	/* Version numbers. */
+	unsigned int pv_magic_cookie;
+	unsigned int pv_romvers;
+	unsigned int pv_plugin_revision;
+	unsigned int pv_printrev;
+
+	/* Version 0 memory descriptors. */
+	struct linux_mem_v0 pv_v0mem;
+
+	/* Node operations. */
+	struct linux_nodeops *pv_nodeops;
+
+	char **pv_bootstr;
+	struct linux_dev_v0_funcs pv_v0devops;
+
+	char *pv_stdin;
+	char *pv_stdout;
+#define	PROMDEV_KBD	0		/* input from keyboard */
+#define	PROMDEV_SCREEN	0		/* output to screen */
+#define	PROMDEV_TTYA	1		/* in/out to ttya */
+#define	PROMDEV_TTYB	2		/* in/out to ttyb */
+
+	/* Blocking getchar/putchar.  NOT REENTRANT! (grr) */
+	int (*pv_getchar)(void);
+	void (*pv_putchar)(int ch);
+
+	/* Non-blocking variants. */
+	int (*pv_nbgetchar)(void);
+	int (*pv_nbputchar)(int ch);
+
+	void (*pv_putstr)(char *str, int len);
+
+	/* Miscellany. */
+	void (*pv_reboot)(char *bootstr);
+	void (*pv_printf)(__const__ char *fmt, ...);
+	void (*pv_abort)(void);
+	__volatile__ int *pv_ticks;
+	void (*pv_halt)(void);
+	void (**pv_synchook)(void);
+
+	/* Evaluate a forth string, not different proto for V0 and V2->up. */
+	union {
+		void (*v0_eval)(int len, char *str);
+		void (*v2_eval)(char *str);
+	} pv_fortheval;
+
+	struct linux_arguments_v0 **pv_v0bootargs;
+
+	/* Get ether address. */
+	unsigned int (*pv_enaddr)(int d, char *enaddr);
+
+	struct linux_bootargs_v2 pv_v2bootargs;
+	struct linux_dev_v2_funcs pv_v2devops;
+
+	int filler[15];
+
+	/* This one is sun4c/sun4 only. */
+	void (*pv_setctxt)(int ctxt, char *va, int pmeg);
+
+	/* Prom version 3 Multiprocessor routines. This stuff is crazy.
+	 * No joke. Calling these when there is only one cpu probably
+	 * crashes the machine, have to test this. :-)
+	 */
+
+	/* v3_cpustart() will start the cpu 'whichcpu' in mmu-context
+	 * 'thiscontext' executing at address 'prog_counter'
+	 */
+	int (*v3_cpustart)(unsigned int whichcpu, int ctxtbl_ptr,
+			   int thiscontext, char *prog_counter);
+
+	/* v3_cpustop() will cause cpu 'whichcpu' to stop executing
+	 * until a resume cpu call is made.
+	 */
+	int (*v3_cpustop)(unsigned int whichcpu);
+
+	/* v3_cpuidle() will idle cpu 'whichcpu' until a stop or
+	 * resume cpu call is made.
+	 */
+	int (*v3_cpuidle)(unsigned int whichcpu);
+
+	/* v3_cpuresume() will resume processor 'whichcpu' executing
+	 * starting with whatever 'pc' and 'npc' were left at the
+	 * last 'idle' or 'stop' call.
+	 */
+	int (*v3_cpuresume)(unsigned int whichcpu);
+};
+
+/* Routines for traversing the prom device tree. */
+struct linux_nodeops {
+	phandle (*no_nextnode)(phandle node);
+	phandle (*no_child)(phandle node);
+	int (*no_proplen)(phandle node, const char *name);
+	int (*no_getprop)(phandle node, const char *name, char *val);
+	int (*no_setprop)(phandle node, const char *name, char *val, int len);
+	char * (*no_nextprop)(phandle node, char *name);
+};
+
+/* More fun PROM structures for device probing. */
+#if defined(__sparc__) && defined(__arch64__)
+#define PROMREG_MAX     24
+#define PROMVADDR_MAX   16
+#define PROMINTR_MAX    32
+#else
+#define PROMREG_MAX     16
+#define PROMVADDR_MAX   16
+#define PROMINTR_MAX    15
+#endif
+
+struct linux_prom_registers {
+	unsigned int which_io;	/* hi part of physical address */
+	unsigned int phys_addr;	/* The physical address of this register */
+	unsigned int reg_size;	/* How many bytes does this register take up? */
+};
+
+struct linux_prom64_registers {
+	unsigned long phys_addr;
+	unsigned long reg_size;
+};
+
+struct linux_prom_irqs {
+	int pri;    /* IRQ priority */
+	int vector; /* This is foobar, what does it do? */
+};
+
+/* Element of the "ranges" vector */
+struct linux_prom_ranges {
+	unsigned int ot_child_space;
+	unsigned int ot_child_base;		/* Bus feels this */
+	unsigned int ot_parent_space;
+	unsigned int ot_parent_base;		/* CPU looks from here */
+	unsigned int or_size;
+};
+
+/*
+ * Ranges and reg properties are a bit different for PCI.
+ */
+#if defined(__sparc__) && defined(__arch64__)
+struct linux_prom_pci_registers {
+	unsigned int phys_hi;
+	unsigned int phys_mid;
+	unsigned int phys_lo;
+
+	unsigned int size_hi;
+	unsigned int size_lo;
+};
+#else
+struct linux_prom_pci_registers {
+	/*
+	 * We don't know what information this field contain.
+	 * We guess, PCI device function is in bits 15:8
+	 * So, ...
+	 */
+	unsigned int which_io;  /* Let it be which_io */
+
+	unsigned int phys_hi;
+	unsigned int phys_lo;
+
+	unsigned int size_hi;
+	unsigned int size_lo;
+};
+
+#endif
+
+struct linux_prom_pci_ranges {
+	unsigned int child_phys_hi;	/* Only certain bits are encoded here. */
+	unsigned int child_phys_mid;
+	unsigned int child_phys_lo;
+
+	unsigned int parent_phys_hi;
+	unsigned int parent_phys_lo;
+
+	unsigned int size_hi;
+	unsigned int size_lo;
+};
+
+struct linux_prom_pci_intmap {
+	unsigned int phys_hi;
+	unsigned int phys_mid;
+	unsigned int phys_lo;
+
+	unsigned int interrupt;
+
+	int          cnode;
+	unsigned int cinterrupt;
+};
+
+struct linux_prom_pci_intmask {
+	unsigned int phys_hi;
+	unsigned int phys_mid;
+	unsigned int phys_lo;
+	unsigned int interrupt;
+};
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC_OPENPROM_H) */
diff --git a/arch/sparc/include/asm/oplib.h b/arch/sparc/include/asm/oplib.h
new file mode 100644
index 0000000..df49a77
--- /dev/null
+++ b/arch/sparc/include/asm/oplib.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_OPLIB_H
+#define ___ASM_SPARC_OPLIB_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/oplib_64.h>
+#else
+#include <asm/oplib_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/oplib_32.h b/arch/sparc/include/asm/oplib_32.h
new file mode 100644
index 0000000..d1cf3a2
--- /dev/null
+++ b/arch/sparc/include/asm/oplib_32.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * oplib.h:  Describes the interface and available routines in the
+ *           Linux Prom library.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef __SPARC_OPLIB_H
+#define __SPARC_OPLIB_H
+
+#include <asm/openprom.h>
+#include <linux/spinlock.h>
+#include <linux/compiler.h>
+
+/* The master romvec pointer... */
+extern struct linux_romvec *romvec;
+
+/* Enumeration to describe the prom major version we have detected. */
+enum prom_major_version {
+	PROM_V0,      /* Original sun4c V0 prom */
+	PROM_V2,      /* sun4c and early sun4m V2 prom */
+	PROM_V3,      /* sun4m and later, up to sun4d/sun4e machines V3 */
+	PROM_P1275,   /* IEEE compliant ISA based Sun PROM, only sun4u */
+};
+
+extern enum prom_major_version prom_vers;
+/* Revision, and firmware revision. */
+extern unsigned int prom_rev, prom_prev;
+
+/* Root node of the prom device tree, this stays constant after
+ * initialization is complete.
+ */
+extern phandle prom_root_node;
+
+/* Pointer to prom structure containing the device tree traversal
+ * and usage utility functions.  Only prom-lib should use these,
+ * users use the interface defined by the library only!
+ */
+extern struct linux_nodeops *prom_nodeops;
+
+/* The functions... */
+
+/* You must call prom_init() before using any of the library services,
+ * preferably as early as possible.  Pass it the romvec pointer.
+ */
+void prom_init(struct linux_romvec *rom_ptr);
+
+/* Boot argument acquisition, returns the boot command line string. */
+char *prom_getbootargs(void);
+
+/* Miscellaneous routines, don't really fit in any category per se. */
+
+/* Reboot the machine with the command line passed. */
+void prom_reboot(char *boot_command);
+
+/* Evaluate the forth string passed. */
+void prom_feval(char *forth_string);
+
+/* Enter the prom, with possibility of continuation with the 'go'
+ * command in newer proms.
+ */
+void prom_cmdline(void);
+
+/* Enter the prom, with no chance of continuation for the stand-alone
+ * which calls this.
+ */
+void __noreturn prom_halt(void);
+
+/* Set the PROM 'sync' callback function to the passed function pointer.
+ * When the user gives the 'sync' command at the prom prompt while the
+ * kernel is still active, the prom will call this routine.
+ *
+ * XXX The arguments are different on V0 vs. V2->higher proms, grrr! XXX
+ */
+typedef void (*sync_func_t)(void);
+void prom_setsync(sync_func_t func_ptr);
+
+/* Acquire the IDPROM of the root node in the prom device tree.  This
+ * gets passed a buffer where you would like it stuffed.  The return value
+ * is the format type of this idprom or 0xff on error.
+ */
+unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size);
+
+/* Get the prom major version. */
+int prom_version(void);
+
+/* Get the prom plugin revision. */
+int prom_getrev(void);
+
+/* Get the prom firmware revision. */
+int prom_getprev(void);
+
+/* Write a buffer of characters to the console. */
+void prom_console_write_buf(const char *buf, int len);
+
+/* Prom's internal routines, don't use in kernel/boot code. */
+__printf(1, 2) void prom_printf(const char *fmt, ...);
+void prom_write(const char *buf, unsigned int len);
+
+/* Multiprocessor operations... */
+
+/* Start the CPU with the given device tree node, context table, and context
+ * at the passed program counter.
+ */
+int prom_startcpu(int cpunode, struct linux_prom_registers *context_table,
+		  int context, char *program_counter);
+
+/* Initialize the memory lists based upon the prom version. */
+void prom_meminit(void);
+
+/* PROM device tree traversal functions... */
+
+/* Get the child node of the given node, or zero if no child exists. */
+phandle prom_getchild(phandle parent_node);
+
+/* Get the next sibling node of the given node, or zero if no further
+ * siblings exist.
+ */
+phandle prom_getsibling(phandle node);
+
+/* Get the length, at the passed node, of the given property type.
+ * Returns -1 on error (ie. no such property at this node).
+ */
+int prom_getproplen(phandle thisnode, const char *property);
+
+/* Fetch the requested property using the given buffer.  Returns
+ * the number of bytes the prom put into your buffer or -1 on error.
+ */
+int __must_check prom_getproperty(phandle thisnode, const char *property,
+				  char *prop_buffer, int propbuf_size);
+
+/* Acquire an integer property. */
+int prom_getint(phandle node, char *property);
+
+/* Acquire an integer property, with a default value. */
+int prom_getintdefault(phandle node, char *property, int defval);
+
+/* Acquire a boolean property, 0=FALSE 1=TRUE. */
+int prom_getbool(phandle node, char *prop);
+
+/* Acquire a string property, null string on error. */
+void prom_getstring(phandle node, char *prop, char *buf, int bufsize);
+
+/* Search all siblings starting at the passed node for "name" matching
+ * the given string.  Returns the node on success, zero on failure.
+ */
+phandle prom_searchsiblings(phandle node_start, char *name);
+
+/* Returns the next property after the passed property for the given
+ * node.  Returns null string on failure.
+ */
+char *prom_nextprop(phandle node, char *prev_property, char *buffer);
+
+/* Returns phandle of the path specified */
+phandle prom_finddevice(char *name);
+
+/* Set the indicated property at the given node with the passed value.
+ * Returns the number of bytes of your value that the prom took.
+ */
+int prom_setprop(phandle node, const char *prop_name, char *prop_value,
+		 int value_size);
+
+phandle prom_inst2pkg(int);
+
+/* Dorking with Bus ranges... */
+
+/* Apply promlib probes OBIO ranges to registers. */
+void prom_apply_obio_ranges(struct linux_prom_registers *obioregs, int nregs);
+
+/* Apply ranges of any prom node (and optionally parent node as well) to registers. */
+void prom_apply_generic_ranges(phandle node, phandle parent,
+			       struct linux_prom_registers *sbusregs, int nregs);
+
+void prom_ranges_init(void);
+
+/* CPU probing helpers.  */
+int cpu_find_by_instance(int instance, phandle *prom_node, int *mid);
+int cpu_find_by_mid(int mid, phandle *prom_node);
+int cpu_get_hwmid(phandle prom_node);
+
+extern spinlock_t prom_lock;
+
+#endif /* !(__SPARC_OPLIB_H) */
diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h
new file mode 100644
index 0000000..a67abeb
--- /dev/null
+++ b/arch/sparc/include/asm/oplib_64.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* oplib.h:  Describes the interface and available routines in the
+ *           Linux Prom library.
+ *
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#ifndef __SPARC64_OPLIB_H
+#define __SPARC64_OPLIB_H
+
+#include <asm/openprom.h>
+
+/* OBP version string. */
+extern char prom_version[];
+
+/* Root node of the prom device tree, this stays constant after
+ * initialization is complete.
+ */
+extern phandle prom_root_node;
+
+/* PROM stdout */
+extern int prom_stdout;
+
+/* /chosen node of the prom device tree, this stays constant after
+ * initialization is complete.
+ */
+extern phandle prom_chosen_node;
+
+/* Helper values and strings in arch/sparc64/kernel/head.S */
+extern const char prom_peer_name[];
+extern const char prom_compatible_name[];
+extern const char prom_root_compatible[];
+extern const char prom_cpu_compatible[];
+extern const char prom_finddev_name[];
+extern const char prom_chosen_path[];
+extern const char prom_cpu_path[];
+extern const char prom_getprop_name[];
+extern const char prom_mmu_name[];
+extern const char prom_callmethod_name[];
+extern const char prom_translate_name[];
+extern const char prom_map_name[];
+extern const char prom_unmap_name[];
+extern int prom_mmu_ihandle_cache;
+extern unsigned int prom_boot_mapped_pc;
+extern unsigned int prom_boot_mapping_mode;
+extern unsigned long prom_boot_mapping_phys_high, prom_boot_mapping_phys_low;
+
+struct linux_mlist_p1275 {
+	struct linux_mlist_p1275 *theres_more;
+	unsigned long start_adr;
+	unsigned long num_bytes;
+};
+
+struct linux_mem_p1275 {
+	struct linux_mlist_p1275 **p1275_totphys;
+	struct linux_mlist_p1275 **p1275_prommap;
+	struct linux_mlist_p1275 **p1275_available; /* What we can use */
+};
+
+/* The functions... */
+
+/* You must call prom_init() before using any of the library services,
+ * preferably as early as possible.  Pass it the romvec pointer.
+ */
+void prom_init(void *cif_handler);
+void prom_init_report(void);
+
+/* Boot argument acquisition, returns the boot command line string. */
+char *prom_getbootargs(void);
+
+/* Miscellaneous routines, don't really fit in any category per se. */
+
+/* Reboot the machine with the command line passed. */
+void prom_reboot(const char *boot_command);
+
+/* Evaluate the forth string passed. */
+void prom_feval(const char *forth_string);
+
+/* Enter the prom, with possibility of continuation with the 'go'
+ * command in newer proms.
+ */
+void prom_cmdline(void);
+
+/* Enter the prom, with no chance of continuation for the stand-alone
+ * which calls this.
+ */
+void prom_halt(void) __attribute__ ((noreturn));
+
+/* Halt and power-off the machine. */
+void prom_halt_power_off(void) __attribute__ ((noreturn));
+
+/* Acquire the IDPROM of the root node in the prom device tree.  This
+ * gets passed a buffer where you would like it stuffed.  The return value
+ * is the format type of this idprom or 0xff on error.
+ */
+unsigned char prom_get_idprom(char *idp_buffer, int idpbuf_size);
+
+/* Write a buffer of characters to the console. */
+void prom_console_write_buf(const char *buf, int len);
+
+/* Prom's internal routines, don't use in kernel/boot code. */
+__printf(1, 2) void prom_printf(const char *fmt, ...);
+void prom_write(const char *buf, unsigned int len);
+
+/* Multiprocessor operations... */
+#ifdef CONFIG_SMP
+/* Start the CPU with the given device tree node at the passed program
+ * counter with the given arg passed in via register %o0.
+ */
+void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg);
+
+/* Start the CPU with the given cpu ID at the passed program
+ * counter with the given arg passed in via register %o0.
+ */
+void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg);
+
+/* Stop the CPU with the given cpu ID.  */
+void prom_stopcpu_cpuid(int cpuid);
+
+/* Stop the current CPU. */
+void prom_stopself(void);
+
+/* Idle the current CPU. */
+void prom_idleself(void);
+
+/* Resume the CPU with the passed device tree node. */
+void prom_resumecpu(int cpunode);
+#endif
+
+/* Power management interfaces. */
+
+/* Put the current CPU to sleep. */
+void prom_sleepself(void);
+
+/* Put the entire system to sleep. */
+int prom_sleepsystem(void);
+
+/* Initiate a wakeup event. */
+int prom_wakeupsystem(void);
+
+/* MMU and memory related OBP interfaces. */
+
+/* Get unique string identifying SIMM at given physical address. */
+int prom_getunumber(int syndrome_code,
+		    unsigned long phys_addr,
+		    char *buf, int buflen);
+
+/* Retain physical memory to the caller across soft resets. */
+int prom_retain(const char *name, unsigned long size,
+		unsigned long align, unsigned long *paddr);
+
+/* Load explicit I/D TLB entries into the calling processor. */
+long prom_itlb_load(unsigned long index,
+		    unsigned long tte_data,
+		    unsigned long vaddr);
+
+long prom_dtlb_load(unsigned long index,
+		    unsigned long tte_data,
+		    unsigned long vaddr);
+
+/* Map/Unmap client program address ranges.  First the format of
+ * the mapping mode argument.
+ */
+#define PROM_MAP_WRITE	0x0001 /* Writable */
+#define PROM_MAP_READ	0x0002 /* Readable - sw */
+#define PROM_MAP_EXEC	0x0004 /* Executable - sw */
+#define PROM_MAP_LOCKED	0x0010 /* Locked, use i/dtlb load calls for this instead */
+#define PROM_MAP_CACHED	0x0020 /* Cacheable in both L1 and L2 caches */
+#define PROM_MAP_SE	0x0040 /* Side-Effects */
+#define PROM_MAP_GLOB	0x0080 /* Global */
+#define PROM_MAP_IE	0x0100 /* Invert-Endianness */
+#define PROM_MAP_DEFAULT (PROM_MAP_WRITE | PROM_MAP_READ | PROM_MAP_EXEC | PROM_MAP_CACHED)
+
+int prom_map(int mode, unsigned long size,
+	     unsigned long vaddr, unsigned long paddr);
+void prom_unmap(unsigned long size, unsigned long vaddr);
+
+
+/* PROM device tree traversal functions... */
+
+/* Get the child node of the given node, or zero if no child exists. */
+phandle prom_getchild(phandle parent_node);
+
+/* Get the next sibling node of the given node, or zero if no further
+ * siblings exist.
+ */
+phandle prom_getsibling(phandle node);
+
+/* Get the length, at the passed node, of the given property type.
+ * Returns -1 on error (ie. no such property at this node).
+ */
+int prom_getproplen(phandle thisnode, const char *property);
+
+/* Fetch the requested property using the given buffer.  Returns
+ * the number of bytes the prom put into your buffer or -1 on error.
+ */
+int prom_getproperty(phandle thisnode, const char *property,
+		     char *prop_buffer, int propbuf_size);
+
+/* Acquire an integer property. */
+int prom_getint(phandle node, const char *property);
+
+/* Acquire an integer property, with a default value. */
+int prom_getintdefault(phandle node, const char *property, int defval);
+
+/* Acquire a boolean property, 0=FALSE 1=TRUE. */
+int prom_getbool(phandle node, const char *prop);
+
+/* Acquire a string property, null string on error. */
+void prom_getstring(phandle node, const char *prop, char *buf,
+		    int bufsize);
+
+/* Does the passed node have the given "name"? YES=1 NO=0 */
+int prom_nodematch(phandle thisnode, const char *name);
+
+/* Search all siblings starting at the passed node for "name" matching
+ * the given string.  Returns the node on success, zero on failure.
+ */
+phandle prom_searchsiblings(phandle node_start, const char *name);
+
+/* Return the first property type, as a string, for the given node.
+ * Returns a null string on error. Buffer should be at least 32B long.
+ */
+char *prom_firstprop(phandle node, char *buffer);
+
+/* Returns the next property after the passed property for the given
+ * node.  Returns null string on failure. Buffer should be at least 32B long.
+ */
+char *prom_nextprop(phandle node, const char *prev_property, char *buf);
+
+/* Returns 1 if the specified node has given property. */
+int prom_node_has_property(phandle node, const char *property);
+
+/* Returns phandle of the path specified */
+phandle prom_finddevice(const char *name);
+
+/* Set the indicated property at the given node with the passed value.
+ * Returns the number of bytes of your value that the prom took.
+ */
+int prom_setprop(phandle node, const char *prop_name, char *prop_value,
+		 int value_size);
+
+phandle prom_inst2pkg(int);
+void prom_sun4v_guest_soft_state(void);
+
+int prom_ihandle2path(int handle, char *buffer, int bufsize);
+
+/* Client interface level routines. */
+void p1275_cmd_direct(unsigned long *);
+
+#endif /* !(__SPARC64_OPLIB_H) */
diff --git a/arch/sparc/include/asm/page.h b/arch/sparc/include/asm/page.h
new file mode 100644
index 0000000..5e44cdf
--- /dev/null
+++ b/arch/sparc/include/asm/page.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_PAGE_H
+#define ___ASM_SPARC_PAGE_H
+
+#define page_to_phys(page)	(page_to_pfn(page) << PAGE_SHIFT)
+
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/page_64.h>
+#else
+#include <asm/page_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/page_32.h b/arch/sparc/include/asm/page_32.h
new file mode 100644
index 0000000..b76d59e
--- /dev/null
+++ b/arch/sparc/include/asm/page_32.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * page.h:  Various defines and such for MMU operations on the Sparc for
+ *          the Linux kernel.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_PAGE_H
+#define _SPARC_PAGE_H
+
+#include <linux/const.h>
+
+#define PAGE_SHIFT   12
+#define PAGE_SIZE    (_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK    (~(PAGE_SIZE-1))
+
+#ifndef __ASSEMBLY__
+
+#define clear_page(page)	 memset((void *)(page), 0, PAGE_SIZE)
+#define copy_page(to,from) 	memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+#define clear_user_page(addr, vaddr, page)	\
+	do { 	clear_page(addr);		\
+		sparc_flush_page_to_ram(page);	\
+	} while (0)
+#define copy_user_page(to, from, vaddr, page)	\
+	do {	copy_page(to, from);		\
+		sparc_flush_page_to_ram(page);	\
+	} while (0)
+
+/* The following structure is used to hold the physical
+ * memory configuration of the machine.  This is filled in
+ * prom_meminit() and is later used by mem_init() to set up
+ * mem_map[].  We statically allocate SPARC_PHYS_BANKS+1 of
+ * these structs, this is arbitrary.  The entry after the
+ * last valid one has num_bytes==0.
+ */
+struct sparc_phys_banks {
+  unsigned long base_addr;
+  unsigned long num_bytes;
+};
+
+#define SPARC_PHYS_BANKS 32
+
+extern struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS+1];
+
+/* passing structs on the Sparc slow us down tremendously... */
+
+/* #define STRICT_MM_TYPECHECKS */
+
+#ifdef STRICT_MM_TYPECHECKS
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long iopte; } iopte_t;
+typedef struct { unsigned long pmdv[16]; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long ctxd; } ctxd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct { unsigned long iopgprot; } iopgprot_t;
+
+#define pte_val(x)	((x).pte)
+#define iopte_val(x)	((x).iopte)
+#define pmd_val(x)      ((x).pmdv[0])
+#define pgd_val(x)	((x).pgd)
+#define ctxd_val(x)	((x).ctxd)
+#define pgprot_val(x)	((x).pgprot)
+#define iopgprot_val(x)	((x).iopgprot)
+
+#define __pte(x)	((pte_t) { (x) } )
+#define __pmd(x)	((pmd_t) { { (x) }, })
+#define __iopte(x)	((iopte_t) { (x) } )
+#define __pgd(x)	((pgd_t) { (x) } )
+#define __ctxd(x)	((ctxd_t) { (x) } )
+#define __pgprot(x)	((pgprot_t) { (x) } )
+#define __iopgprot(x)	((iopgprot_t) { (x) } )
+
+#else
+/*
+ * .. while these make it easier on the compiler
+ */
+typedef unsigned long pte_t;
+typedef unsigned long iopte_t;
+typedef struct { unsigned long pmdv[16]; } pmd_t;
+typedef unsigned long pgd_t;
+typedef unsigned long ctxd_t;
+typedef unsigned long pgprot_t;
+typedef unsigned long iopgprot_t;
+
+#define pte_val(x)	(x)
+#define iopte_val(x)	(x)
+#define pmd_val(x)      ((x).pmdv[0])
+#define pgd_val(x)	(x)
+#define ctxd_val(x)	(x)
+#define pgprot_val(x)	(x)
+#define iopgprot_val(x)	(x)
+
+#define __pte(x)	(x)
+#define __pmd(x)	((pmd_t) { { (x) }, })
+#define __iopte(x)	(x)
+#define __pgd(x)	(x)
+#define __ctxd(x)	(x)
+#define __pgprot(x)	(x)
+#define __iopgprot(x)	(x)
+
+#endif
+
+typedef struct page *pgtable_t;
+
+#define TASK_UNMAPPED_BASE	0x50000000
+
+#else /* !(__ASSEMBLY__) */
+
+#define __pgprot(x)	(x)
+
+#endif /* !(__ASSEMBLY__) */
+
+#define PAGE_OFFSET	0xf0000000
+#ifndef __ASSEMBLY__
+extern unsigned long phys_base;
+extern unsigned long pfn_base;
+#endif
+#define __pa(x)			((unsigned long)(x) - PAGE_OFFSET + phys_base)
+#define __va(x)			((void *)((unsigned long) (x) - phys_base + PAGE_OFFSET))
+
+#define virt_to_phys		__pa
+#define phys_to_virt		__va
+
+#define ARCH_PFN_OFFSET		(pfn_base)
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+
+#define pfn_valid(pfn)		(((pfn) >= (pfn_base)) && (((pfn)-(pfn_base)) < max_mapnr))
+#define virt_addr_valid(kaddr)	((((unsigned long)(kaddr)-PAGE_OFFSET)>>PAGE_SHIFT) < max_mapnr)
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* _SPARC_PAGE_H */
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
new file mode 100644
index 0000000..e80f2d5
--- /dev/null
+++ b/arch/sparc/include/asm/page_64.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_PAGE_H
+#define _SPARC64_PAGE_H
+
+#include <linux/const.h>
+
+#define PAGE_SHIFT   13
+
+#define PAGE_SIZE    (_AC(1,UL) << PAGE_SHIFT)
+#define PAGE_MASK    (~(PAGE_SIZE-1))
+
+/* Flushing for D-cache alias handling is only needed if
+ * the page size is smaller than 16K.
+ */
+#if PAGE_SHIFT < 14
+#define DCACHE_ALIASING_POSSIBLE
+#endif
+
+#define HPAGE_SHIFT		23
+#define REAL_HPAGE_SHIFT	22
+#define HPAGE_16GB_SHIFT	34
+#define HPAGE_2GB_SHIFT		31
+#define HPAGE_256MB_SHIFT	28
+#define HPAGE_64K_SHIFT		16
+#define REAL_HPAGE_SIZE		(_AC(1,UL) << REAL_HPAGE_SHIFT)
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
+#define HPAGE_MASK		(~(HPAGE_SIZE - 1UL))
+#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#define REAL_HPAGE_PER_HPAGE	(_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
+#define HUGE_MAX_HSTATE		5
+#endif
+
+#ifndef __ASSEMBLY__
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+struct pt_regs;
+void hugetlb_setup(struct pt_regs *regs);
+#endif
+
+#define WANT_PAGE_VIRTUAL
+
+void _clear_page(void *page);
+#define clear_page(X)	_clear_page((void *)(X))
+struct page;
+void clear_user_page(void *addr, unsigned long vaddr, struct page *page);
+#define copy_page(X,Y)	memcpy((void *)(X), (void *)(Y), PAGE_SIZE)
+void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topage);
+#define __HAVE_ARCH_COPY_USER_HIGHPAGE
+struct vm_area_struct;
+void copy_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_HIGHPAGE
+void copy_highpage(struct page *to, struct page *from);
+
+/* Unlike sparc32, sparc64's parameter passing API is more
+ * sane in that structures which as small enough are passed
+ * in registers instead of on the stack.  Thus, setting
+ * STRICT_MM_TYPECHECKS does not generate worse code so
+ * let's enable it to get the type checking.
+ */
+
+#define STRICT_MM_TYPECHECKS
+
+#ifdef STRICT_MM_TYPECHECKS
+/* These are used to make use of C type-checking.. */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long iopte; } iopte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x)	((x).pte)
+#define iopte_val(x)	((x).iopte)
+#define pmd_val(x)      ((x).pmd)
+#define pud_val(x)      ((x).pud)
+#define pgd_val(x)	((x).pgd)
+#define pgprot_val(x)	((x).pgprot)
+
+#define __pte(x)	((pte_t) { (x) } )
+#define __iopte(x)	((iopte_t) { (x) } )
+#define __pmd(x)        ((pmd_t) { (x) } )
+#define __pud(x)        ((pud_t) { (x) } )
+#define __pgd(x)	((pgd_t) { (x) } )
+#define __pgprot(x)	((pgprot_t) { (x) } )
+
+#else
+/* .. while these make it easier on the compiler */
+typedef unsigned long pte_t;
+typedef unsigned long iopte_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
+typedef unsigned long pgd_t;
+typedef unsigned long pgprot_t;
+
+#define pte_val(x)	(x)
+#define iopte_val(x)	(x)
+#define pmd_val(x)      (x)
+#define pud_val(x)      (x)
+#define pgd_val(x)	(x)
+#define pgprot_val(x)	(x)
+
+#define __pte(x)	(x)
+#define __iopte(x)	(x)
+#define __pmd(x)        (x)
+#define __pud(x)        (x)
+#define __pgd(x)	(x)
+#define __pgprot(x)	(x)
+
+#endif /* (STRICT_MM_TYPECHECKS) */
+
+typedef pte_t *pgtable_t;
+
+extern unsigned long sparc64_va_hole_top;
+extern unsigned long sparc64_va_hole_bottom;
+
+/* The next two defines specify the actual exclusion region we
+ * enforce, wherein we use a 4GB red zone on each side of the VA hole.
+ */
+#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL))
+#define VA_EXCLUDE_END   (sparc64_va_hole_top + (1UL << 32UL))
+
+#define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_32BIT) ? \
+				 _AC(0x0000000070000000,UL) : \
+				 VA_EXCLUDE_END)
+
+#include <asm-generic/memory_model.h>
+
+extern unsigned long PAGE_OFFSET;
+
+#endif /* !(__ASSEMBLY__) */
+
+/* The maximum number of physical memory address bits we support.  The
+ * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS"
+ * evaluates to.
+ */
+#define MAX_PHYS_ADDRESS_BITS	53
+
+#define ILOG2_4MB		22
+#define ILOG2_256MB		28
+
+#ifndef __ASSEMBLY__
+
+#define __pa(x)			((unsigned long)(x) - PAGE_OFFSET)
+#define __va(x)			((void *)((unsigned long) (x) + PAGE_OFFSET))
+
+#define pfn_to_kaddr(pfn)	__va((pfn) << PAGE_SHIFT)
+
+#define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr)>>PAGE_SHIFT)
+
+#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+
+#define virt_to_phys __pa
+#define phys_to_virt __va
+
+#endif /* !(__ASSEMBLY__) */
+
+#define VM_DATA_DEFAULT_FLAGS	(VM_READ | VM_WRITE | VM_EXEC | \
+				 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/getorder.h>
+
+#endif /* _SPARC64_PAGE_H */
diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h
new file mode 100644
index 0000000..05df5f0
--- /dev/null
+++ b/arch/sparc/include/asm/parport.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* parport.h: sparc64 specific parport initialization and dma.
+ *
+ * Copyright (C) 1999  Eddie C. Dost  (ecd@skynet.be)
+ */
+
+#ifndef _ASM_SPARC64_PARPORT_H
+#define _ASM_SPARC64_PARPORT_H 1
+
+#include <linux/of_device.h>
+
+#include <asm/ebus_dma.h>
+#include <asm/ns87303.h>
+#include <asm/prom.h>
+
+#define PARPORT_PC_MAX_PORTS	PARPORT_MAX
+
+/*
+ * While sparc64 doesn't have an ISA DMA API, we provide something that looks
+ * close enough to make parport_pc happy
+ */
+#define HAS_DMA
+
+static DEFINE_SPINLOCK(dma_spin_lock);
+
+#define claim_dma_lock() \
+({	unsigned long flags; \
+	spin_lock_irqsave(&dma_spin_lock, flags); \
+	flags; \
+})
+
+#define release_dma_lock(__flags) \
+	spin_unlock_irqrestore(&dma_spin_lock, __flags);
+
+static struct sparc_ebus_info {
+	struct ebus_dma_info info;
+	unsigned int addr;
+	unsigned int count;
+	int lock;
+
+	struct parport *port;
+} sparc_ebus_dmas[PARPORT_PC_MAX_PORTS];
+
+static DECLARE_BITMAP(dma_slot_map, PARPORT_PC_MAX_PORTS);
+
+static inline int request_dma(unsigned int dmanr, const char *device_id)
+{
+	if (dmanr >= PARPORT_PC_MAX_PORTS)
+		return -EINVAL;
+	if (xchg(&sparc_ebus_dmas[dmanr].lock, 1) != 0)
+		return -EBUSY;
+	return 0;
+}
+
+static inline void free_dma(unsigned int dmanr)
+{
+	if (dmanr >= PARPORT_PC_MAX_PORTS) {
+		printk(KERN_WARNING "Trying to free DMA%d\n", dmanr);
+		return;
+	}
+	if (xchg(&sparc_ebus_dmas[dmanr].lock, 0) == 0) {
+		printk(KERN_WARNING "Trying to free free DMA%d\n", dmanr);
+		return;
+	}
+}
+
+static inline void enable_dma(unsigned int dmanr)
+{
+	ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 1);
+
+	if (ebus_dma_request(&sparc_ebus_dmas[dmanr].info,
+			     sparc_ebus_dmas[dmanr].addr,
+			     sparc_ebus_dmas[dmanr].count))
+		BUG();
+}
+
+static inline void disable_dma(unsigned int dmanr)
+{
+	ebus_dma_enable(&sparc_ebus_dmas[dmanr].info, 0);
+}
+
+static inline void clear_dma_ff(unsigned int dmanr)
+{
+	/* nothing */
+}
+
+static inline void set_dma_mode(unsigned int dmanr, char mode)
+{
+	ebus_dma_prepare(&sparc_ebus_dmas[dmanr].info, (mode != DMA_MODE_WRITE));
+}
+
+static inline void set_dma_addr(unsigned int dmanr, unsigned int addr)
+{
+	sparc_ebus_dmas[dmanr].addr = addr;
+}
+
+static inline void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+	sparc_ebus_dmas[dmanr].count = count;
+}
+
+static inline unsigned int get_dma_residue(unsigned int dmanr)
+{
+	return ebus_dma_residue(&sparc_ebus_dmas[dmanr].info);
+}
+
+static int ecpp_probe(struct platform_device *op)
+{
+	unsigned long base = op->resource[0].start;
+	unsigned long config = op->resource[1].start;
+	unsigned long d_base = op->resource[2].start;
+	unsigned long d_len;
+	struct device_node *parent;
+	struct parport *p;
+	int slot, err;
+
+	parent = op->dev.of_node->parent;
+	if (!strcmp(parent->name, "dma")) {
+		p = parport_pc_probe_port(base, base + 0x400,
+					  op->archdata.irqs[0], PARPORT_DMA_NOFIFO,
+					  op->dev.parent->parent, 0);
+		if (!p)
+			return -ENOMEM;
+		dev_set_drvdata(&op->dev, p);
+		return 0;
+	}
+
+	for (slot = 0; slot < PARPORT_PC_MAX_PORTS; slot++) {
+		if (!test_and_set_bit(slot, dma_slot_map))
+			break;
+	}
+	err = -ENODEV;
+	if (slot >= PARPORT_PC_MAX_PORTS)
+		goto out_err;
+
+	spin_lock_init(&sparc_ebus_dmas[slot].info.lock);
+
+	d_len = (op->resource[2].end - d_base) + 1UL;
+	sparc_ebus_dmas[slot].info.regs =
+		of_ioremap(&op->resource[2], 0, d_len, "ECPP DMA");
+
+	if (!sparc_ebus_dmas[slot].info.regs)
+		goto out_clear_map;
+
+	sparc_ebus_dmas[slot].info.flags = 0;
+	sparc_ebus_dmas[slot].info.callback = NULL;
+	sparc_ebus_dmas[slot].info.client_cookie = NULL;
+	sparc_ebus_dmas[slot].info.irq = 0xdeadbeef;
+	strcpy(sparc_ebus_dmas[slot].info.name, "parport");
+	if (ebus_dma_register(&sparc_ebus_dmas[slot].info))
+		goto out_unmap_regs;
+
+	ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 1);
+
+	/* Configure IRQ to Push Pull, Level Low */
+	/* Enable ECP, set bit 2 of the CTR first */
+	outb(0x04, base + 0x02);
+	ns87303_modify(config, PCR,
+		       PCR_EPP_ENABLE |
+		       PCR_IRQ_ODRAIN,
+		       PCR_ECP_ENABLE |
+		       PCR_ECP_CLK_ENA |
+		       PCR_IRQ_POLAR);
+
+	/* CTR bit 5 controls direction of port */
+	ns87303_modify(config, PTR,
+		       0, PTR_LPT_REG_DIR);
+
+	p = parport_pc_probe_port(base, base + 0x400,
+				  op->archdata.irqs[0],
+				  slot,
+				  op->dev.parent,
+				  0);
+	err = -ENOMEM;
+	if (!p)
+		goto out_disable_irq;
+
+	dev_set_drvdata(&op->dev, p);
+
+	return 0;
+
+out_disable_irq:
+	ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0);
+	ebus_dma_unregister(&sparc_ebus_dmas[slot].info);
+
+out_unmap_regs:
+	of_iounmap(&op->resource[2], sparc_ebus_dmas[slot].info.regs, d_len);
+
+out_clear_map:
+	clear_bit(slot, dma_slot_map);
+
+out_err:
+	return err;
+}
+
+static int ecpp_remove(struct platform_device *op)
+{
+	struct parport *p = dev_get_drvdata(&op->dev);
+	int slot = p->dma;
+
+	parport_pc_unregister_port(p);
+
+	if (slot != PARPORT_DMA_NOFIFO) {
+		unsigned long d_base = op->resource[2].start;
+		unsigned long d_len;
+
+		d_len = (op->resource[2].end - d_base) + 1UL;
+
+		ebus_dma_irq_enable(&sparc_ebus_dmas[slot].info, 0);
+		ebus_dma_unregister(&sparc_ebus_dmas[slot].info);
+		of_iounmap(&op->resource[2],
+			   sparc_ebus_dmas[slot].info.regs,
+			   d_len);
+		clear_bit(slot, dma_slot_map);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id ecpp_match[] = {
+	{
+		.name = "ecpp",
+	},
+	{
+		.name = "parallel",
+		.compatible = "ecpp",
+	},
+	{
+		.name = "parallel",
+		.compatible = "ns87317-ecpp",
+	},
+	{
+		.name = "parallel",
+		.compatible = "pnpALI,1533,3",
+	},
+	{},
+};
+
+static struct platform_driver ecpp_driver = {
+	.driver = {
+		.name = "ecpp",
+		.of_match_table = ecpp_match,
+	},
+	.probe			= ecpp_probe,
+	.remove			= ecpp_remove,
+};
+
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
+{
+	return platform_driver_register(&ecpp_driver);
+}
+
+#endif /* !(_ASM_SPARC64_PARPORT_H */
diff --git a/arch/sparc/include/asm/pbm.h b/arch/sparc/include/asm/pbm.h
new file mode 100644
index 0000000..0c86261
--- /dev/null
+++ b/arch/sparc/include/asm/pbm.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *
+ * pbm.h: PCI bus module pseudo driver software state
+ *        Adopted from sparc64 by V. Roganov and G. Raiko
+ *
+ * Original header:
+ * pbm.h: U2P PCI bus module pseudo driver software state.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * To put things into perspective, consider sparc64 with a few PCI controllers.
+ * Each type would have an own structure, with instances related one to one.
+ * We have only pcic on sparc, but we want to be compatible with sparc64 pbm.h.
+ * All three represent different abstractions.
+ *   pci_bus  - Linux PCI subsystem view of a PCI bus (including bridged buses)
+ *   pbm      - Arch-specific view of a PCI bus (sparc or sparc64)
+ *   pcic     - Chip-specific information for PCIC.
+ */
+
+#ifndef __SPARC_PBM_H
+#define __SPARC_PBM_H
+
+#include <linux/pci.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+
+struct linux_pbm_info {
+	int		prom_node;
+	char		prom_name[64];
+	/* struct linux_prom_pci_ranges	pbm_ranges[PROMREG_MAX]; */
+	/* int		num_pbm_ranges; */
+
+	/* Now things for the actual PCI bus probes. */
+	unsigned int	pci_first_busno;	/* Can it be nonzero? */
+	struct pci_bus	*pci_bus;		/* Was inline, MJ allocs now */
+};
+
+/* PCI devices which are not bridges have this placed in their pci_dev
+ * sysdata member.  This makes OBP aware PCI device drivers easier to
+ * code.
+ */
+struct pcidev_cookie {
+	struct linux_pbm_info		*pbm;
+	struct device_node		*prom_node;
+};
+
+#endif /* !(__SPARC_PBM_H) */
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
new file mode 100644
index 0000000..cad79a6
--- /dev/null
+++ b/arch/sparc/include/asm/pci.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_PCI_H
+#define ___ASM_SPARC_PCI_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/pci_64.h>
+#else
+#include <asm/pci_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
new file mode 100644
index 0000000..cfc0ee9
--- /dev/null
+++ b/arch/sparc/include/asm/pci_32.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_PCI_H
+#define __SPARC_PCI_H
+
+#ifdef __KERNEL__
+
+#include <linux/dma-mapping.h>
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+ * already-configured bus numbers - to be used for buggy BIOSes
+ * or architectures with incomplete PCI setup by the loader.
+ */
+#define pcibios_assign_all_busses()	0
+
+#define PCIBIOS_MIN_IO		0UL
+#define PCIBIOS_MIN_MEM		0UL
+
+#define PCI_IRQ_NONE		0xffffffff
+
+#endif /* __KERNEL__ */
+
+#ifndef CONFIG_LEON_PCI
+/* generic pci stuff */
+#include <asm-generic/pci.h>
+#else
+/*
+ * On LEON PCI Memory space is mapped 1:1 with physical address space.
+ *
+ * I/O space is located at low 64Kbytes in PCI I/O space. The I/O addresses
+ * are converted into CPU addresses to virtual addresses that are mapped with
+ * MMU to the PCI Host PCI I/O space window which are translated to the low
+ * 64Kbytes by the Host controller.
+ */
+
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return PCI_IRQ_NONE;
+}
+#endif
+
+#endif /* __SPARC_PCI_H */
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
new file mode 100644
index 0000000..fac7781
--- /dev/null
+++ b/arch/sparc/include/asm/pci_64.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC64_PCI_H
+#define __SPARC64_PCI_H
+
+#ifdef __KERNEL__
+
+#include <linux/dma-mapping.h>
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+ * already-configured bus numbers - to be used for buggy BIOSes
+ * or architectures with incomplete PCI setup by the loader.
+ */
+#define pcibios_assign_all_busses()	0
+
+#define PCIBIOS_MIN_IO		0UL
+#define PCIBIOS_MIN_MEM		0UL
+
+#define PCI_IRQ_NONE		0xffffffff
+
+/* PCI IOMMU mapping bypass support. */
+
+/* PCI 64-bit addressing works for all slots on all controller
+ * types on sparc64.  However, it requires that the device
+ * can drive enough of the 64 bits.
+ */
+#define PCI64_REQUIRED_MASK	(~(u64)0)
+#define PCI64_ADDR_BASE		0xfffc000000000000UL
+
+/* Return the index of the PCI controller for device PDEV. */
+
+int pci_domain_nr(struct pci_bus *bus);
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+	return 1;
+}
+
+/* Platform support for /proc/bus/pci/X/Y mmap()s. */
+
+#define HAVE_PCI_MMAP
+#define arch_can_pci_mmap_io()	1
+#define HAVE_ARCH_PCI_GET_UNMAPPED_AREA
+#define get_pci_unmapped_area get_fb_unmapped_area
+
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+	return PCI_IRQ_NONE;
+}
+
+#define HAVE_ARCH_PCI_RESOURCE_TO_USER
+#endif /* __KERNEL__ */
+
+#endif /* __SPARC64_PCI_H */
diff --git a/arch/sparc/include/asm/pcic.h b/arch/sparc/include/asm/pcic.h
new file mode 100644
index 0000000..238376b
--- /dev/null
+++ b/arch/sparc/include/asm/pcic.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * pcic.h: JavaEngine 1 specific PCI definitions.
+ *
+ * Copyright (C) 1998 V. Roganov and G. Raiko
+ */
+
+#ifndef __SPARC_PCIC_H
+#define __SPARC_PCIC_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/smp.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <asm/pbm.h>
+
+struct linux_pcic {
+        void __iomem            *pcic_regs;
+        unsigned long           pcic_io;
+        void __iomem            *pcic_config_space_addr;
+        void __iomem            *pcic_config_space_data;
+	struct resource		pcic_res_regs;
+	struct resource		pcic_res_io;
+	struct resource		pcic_res_cfg_addr;
+	struct resource		pcic_res_cfg_data;
+        struct linux_pbm_info   pbm;
+	struct pcic_ca2irq	*pcic_imap;
+	int			pcic_imdim;
+};
+
+#ifdef CONFIG_PCIC_PCI
+int pcic_present(void);
+int pcic_probe(void);
+void pci_time_init(void);
+void sun4m_pci_init_IRQ(void);
+#else
+static inline int pcic_present(void) { return 0; }
+static inline int pcic_probe(void) { return 0; }
+static inline void pci_time_init(void) {}
+static inline void sun4m_pci_init_IRQ(void) {}
+#endif
+#endif
+
+/* Size of PCI I/O space which we relocate. */
+#define PCI_SPACE_SIZE                  0x1000000       /* 16 MB */
+
+/* PCIC Register Set. */
+#define PCI_DIAGNOSTIC_0                0x40    /* 32 bits */
+#define PCI_SIZE_0                      0x44    /* 32 bits */
+#define PCI_SIZE_1                      0x48    /* 32 bits */
+#define PCI_SIZE_2                      0x4c    /* 32 bits */
+#define PCI_SIZE_3                      0x50    /* 32 bits */
+#define PCI_SIZE_4                      0x54    /* 32 bits */
+#define PCI_SIZE_5                      0x58    /* 32 bits */
+#define PCI_PIO_CONTROL                 0x60    /* 8  bits */
+#define PCI_DVMA_CONTROL                0x62    /* 8  bits */
+#define  PCI_DVMA_CONTROL_INACTIVITY_REQ        (1<<0)
+#define  PCI_DVMA_CONTROL_IOTLB_ENABLE          (1<<0)
+#define  PCI_DVMA_CONTROL_IOTLB_DISABLE         0
+#define  PCI_DVMA_CONTROL_INACTIVITY_ACK        (1<<4)
+#define PCI_INTERRUPT_CONTROL           0x63    /* 8  bits */
+#define PCI_CPU_INTERRUPT_PENDING       0x64    /* 32 bits */
+#define PCI_DIAGNOSTIC_1                0x68    /* 16 bits */
+#define PCI_SOFTWARE_INT_CLEAR          0x6a    /* 16 bits */
+#define PCI_SOFTWARE_INT_SET            0x6e    /* 16 bits */
+#define PCI_SYS_INT_PENDING             0x70    /* 32 bits */
+#define  PCI_SYS_INT_PENDING_PIO		0x40000000
+#define  PCI_SYS_INT_PENDING_DMA		0x20000000
+#define  PCI_SYS_INT_PENDING_PCI		0x10000000
+#define  PCI_SYS_INT_PENDING_APSR		0x08000000
+#define PCI_SYS_INT_TARGET_MASK         0x74    /* 32 bits */
+#define PCI_SYS_INT_TARGET_MASK_CLEAR   0x78    /* 32 bits */
+#define PCI_SYS_INT_TARGET_MASK_SET     0x7c    /* 32 bits */
+#define PCI_SYS_INT_PENDING_CLEAR       0x83    /* 8  bits */
+#define  PCI_SYS_INT_PENDING_CLEAR_ALL		0x80
+#define  PCI_SYS_INT_PENDING_CLEAR_PIO		0x40
+#define  PCI_SYS_INT_PENDING_CLEAR_DMA		0x20
+#define  PCI_SYS_INT_PENDING_CLEAR_PCI		0x10
+#define PCI_IOTLB_CONTROL               0x84    /* 8  bits */
+#define PCI_INT_SELECT_LO               0x88    /* 16 bits */
+#define PCI_ARBITRATION_SELECT          0x8a    /* 16 bits */
+#define PCI_INT_SELECT_HI               0x8c    /* 16 bits */
+#define PCI_HW_INT_OUTPUT               0x8e    /* 16 bits */
+#define PCI_IOTLB_RAM_INPUT             0x90    /* 32 bits */
+#define PCI_IOTLB_CAM_INPUT             0x94    /* 32 bits */
+#define PCI_IOTLB_RAM_OUTPUT            0x98    /* 32 bits */
+#define PCI_IOTLB_CAM_OUTPUT            0x9c    /* 32 bits */
+#define PCI_SMBAR0                      0xa0    /* 8  bits */
+#define PCI_MSIZE0                      0xa1    /* 8  bits */
+#define PCI_PMBAR0                      0xa2    /* 8  bits */
+#define PCI_SMBAR1                      0xa4    /* 8  bits */
+#define PCI_MSIZE1                      0xa5    /* 8  bits */
+#define PCI_PMBAR1                      0xa6    /* 8  bits */
+#define PCI_SIBAR                       0xa8    /* 8  bits */
+#define   PCI_SIBAR_ADDRESS_MASK        0xf
+#define PCI_ISIZE                       0xa9    /* 8  bits */
+#define   PCI_ISIZE_16M                 0xf
+#define   PCI_ISIZE_32M                 0xe
+#define   PCI_ISIZE_64M                 0xc
+#define   PCI_ISIZE_128M                0x8
+#define   PCI_ISIZE_256M                0x0
+#define PCI_PIBAR                       0xaa    /* 8  bits */
+#define PCI_CPU_COUNTER_LIMIT_HI        0xac    /* 32 bits */
+#define PCI_CPU_COUNTER_LIMIT_LO        0xb0    /* 32 bits */
+#define PCI_CPU_COUNTER_LIMIT           0xb4    /* 32 bits */
+#define PCI_SYS_LIMIT                   0xb8    /* 32 bits */
+#define PCI_SYS_COUNTER                 0xbc    /* 32 bits */
+#define   PCI_SYS_COUNTER_OVERFLOW      (1<<31) /* Limit reached */
+#define PCI_SYS_LIMIT_PSEUDO            0xc0    /* 32 bits */
+#define PCI_USER_TIMER_CONTROL          0xc4    /* 8  bits */
+#define PCI_USER_TIMER_CONFIG           0xc5    /* 8  bits */
+#define PCI_COUNTER_IRQ                 0xc6    /* 8  bits */
+#define  PCI_COUNTER_IRQ_SET(sys_irq, cpu_irq)  ((((sys_irq) & 0xf) << 4) | \
+                                                  ((cpu_irq) & 0xf))
+#define  PCI_COUNTER_IRQ_SYS(v)                 (((v) >> 4) & 0xf)
+#define  PCI_COUNTER_IRQ_CPU(v)                 ((v) & 0xf)
+#define PCI_PIO_ERROR_COMMAND           0xc7    /* 8  bits */
+#define PCI_PIO_ERROR_ADDRESS           0xc8    /* 32 bits */
+#define PCI_IOTLB_ERROR_ADDRESS         0xcc    /* 32 bits */
+#define PCI_SYS_STATUS                  0xd0    /* 8  bits */
+#define   PCI_SYS_STATUS_RESET_ENABLE           (1<<0)
+#define   PCI_SYS_STATUS_RESET                  (1<<1)
+#define   PCI_SYS_STATUS_WATCHDOG_RESET         (1<<4)
+#define   PCI_SYS_STATUS_PCI_RESET              (1<<5)
+#define   PCI_SYS_STATUS_PCI_RESET_ENABLE       (1<<6)
+#define   PCI_SYS_STATUS_PCI_SATTELITE_MODE     (1<<7)
+
+#endif /* !(__SPARC_PCIC_H) */
diff --git a/arch/sparc/include/asm/pcr.h b/arch/sparc/include/asm/pcr.h
new file mode 100644
index 0000000..da834ff
--- /dev/null
+++ b/arch/sparc/include/asm/pcr.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PCR_H
+#define __PCR_H
+
+struct pcr_ops {
+	u64 (*read_pcr)(unsigned long);
+	void (*write_pcr)(unsigned long, u64);
+	u64 (*read_pic)(unsigned long);
+	void (*write_pic)(unsigned long, u64);
+	u64 (*nmi_picl_value)(unsigned int nmi_hz);
+	u64 pcr_nmi_enable;
+	u64 pcr_nmi_disable;
+};
+extern const struct pcr_ops *pcr_ops;
+
+void deferred_pcr_work_irq(int irq, struct pt_regs *regs);
+void schedule_deferred_pcr_work(void);
+
+#define PCR_PIC_PRIV		0x00000001 /* PIC access is privileged */
+#define PCR_STRACE		0x00000002 /* Trace supervisor events  */
+#define PCR_UTRACE		0x00000004 /* Trace user events        */
+#define PCR_N2_HTRACE		0x00000008 /* Trace hypervisor events  */
+#define PCR_N2_TOE_OV0		0x00000010 /* Trap if PIC 0 overflows  */
+#define PCR_N2_TOE_OV1		0x00000020 /* Trap if PIC 1 overflows  */
+#define PCR_N2_MASK0		0x00003fc0
+#define PCR_N2_MASK0_SHIFT	6
+#define PCR_N2_SL0		0x0003c000
+#define PCR_N2_SL0_SHIFT	14
+#define PCR_N2_OV0		0x00040000
+#define PCR_N2_MASK1		0x07f80000
+#define PCR_N2_MASK1_SHIFT	19
+#define PCR_N2_SL1		0x78000000
+#define PCR_N2_SL1_SHIFT	27
+#define PCR_N2_OV1		0x80000000
+
+#define PCR_N4_OV		0x00000001 /* PIC overflow             */
+#define PCR_N4_TOE		0x00000002 /* Trap On Event            */
+#define PCR_N4_UTRACE		0x00000004 /* Trace user events        */
+#define PCR_N4_STRACE		0x00000008 /* Trace supervisor events  */
+#define PCR_N4_HTRACE		0x00000010 /* Trace hypervisor events  */
+#define PCR_N4_MASK		0x000007e0 /* Event mask               */
+#define PCR_N4_MASK_SHIFT	5
+#define PCR_N4_SL		0x0000f800 /* Event Select             */
+#define PCR_N4_SL_SHIFT		11
+#define PCR_N4_PICNPT		0x00010000 /* PIC non-privileged trap  */
+#define PCR_N4_PICNHT		0x00020000 /* PIC non-hypervisor trap  */
+#define PCR_N4_NTC		0x00040000 /* Next-To-Commit wrap      */
+
+int pcr_arch_init(void);
+
+#endif /* __PCR_H */
diff --git a/arch/sparc/include/asm/percpu.h b/arch/sparc/include/asm/percpu.h
new file mode 100644
index 0000000..0cd704e
--- /dev/null
+++ b/arch/sparc/include/asm/percpu.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_PERCPU_H
+#define ___ASM_SPARC_PERCPU_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/percpu_64.h>
+#else
+#include <asm/percpu_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/percpu_32.h b/arch/sparc/include/asm/percpu_32.h
new file mode 100644
index 0000000..ee6c7c1
--- /dev/null
+++ b/arch/sparc/include/asm/percpu_32.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_SPARC_PERCPU__
+#define __ARCH_SPARC_PERCPU__
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ARCH_SPARC_PERCPU__ */
diff --git a/arch/sparc/include/asm/percpu_64.h b/arch/sparc/include/asm/percpu_64.h
new file mode 100644
index 0000000..32ef6f0
--- /dev/null
+++ b/arch/sparc/include/asm/percpu_64.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_SPARC64_PERCPU__
+#define __ARCH_SPARC64_PERCPU__
+
+#include <linux/compiler.h>
+
+register unsigned long __local_per_cpu_offset asm("g5");
+
+#ifdef CONFIG_SMP
+
+#include <asm/trap_block.h>
+
+#define __per_cpu_offset(__cpu) \
+	(trap_block[(__cpu)].__per_cpu_base)
+#define per_cpu_offset(x) (__per_cpu_offset(x))
+
+#define __my_cpu_offset __local_per_cpu_offset
+
+#else /* ! SMP */
+
+#endif	/* SMP */
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ARCH_SPARC64_PERCPU__ */
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
new file mode 100644
index 0000000..c2aec0c
--- /dev/null
+++ b/arch/sparc/include/asm/perf_event.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SPARC_PERF_EVENT_H
+#define __ASM_SPARC_PERF_EVENT_H
+
+#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+
+#define perf_arch_fetch_caller_regs(regs, ip)		\
+do {							\
+	unsigned long _pstate, _asi, _pil, _i7, _fp;	\
+	__asm__ __volatile__("rdpr %%pstate, %0\n\t"	\
+			     "rd %%asi, %1\n\t"		\
+			     "rdpr %%pil, %2\n\t"	\
+			     "mov %%i7, %3\n\t"		\
+			     "mov %%i6, %4\n\t"		\
+			     : "=r" (_pstate),		\
+			       "=r" (_asi),		\
+			       "=r" (_pil),		\
+			       "=r" (_i7),		\
+			       "=r" (_fp));		\
+	(regs)->tstate = (_pstate << 8) |		\
+		(_asi << 24) | (_pil << 20);		\
+	(regs)->tpc = (ip);				\
+	(regs)->tnpc = (regs)->tpc + 4;			\
+	(regs)->u_regs[UREG_I6] = _fp;			\
+	(regs)->u_regs[UREG_I7] = _i7;			\
+} while (0)
+#endif
+
+#endif
diff --git a/arch/sparc/include/asm/pgalloc.h b/arch/sparc/include/asm/pgalloc.h
new file mode 100644
index 0000000..9ea0b37
--- /dev/null
+++ b/arch/sparc/include/asm/pgalloc.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_PGALLOC_H
+#define ___ASM_SPARC_PGALLOC_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/pgalloc_64.h>
+#else
+#include <asm/pgalloc_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
new file mode 100644
index 0000000..9045948
--- /dev/null
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_PGALLOC_H
+#define _SPARC_PGALLOC_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/pgtsrmmu.h>
+#include <asm/pgtable.h>
+#include <asm/vaddrs.h>
+#include <asm/page.h>
+
+struct page;
+
+void *srmmu_get_nocache(int size, int align);
+void srmmu_free_nocache(void *addr, int size);
+
+extern struct resource sparc_iomap;
+
+#define check_pgt_cache()	do { } while (0)
+
+pgd_t *get_pgd_fast(void);
+static inline void free_pgd_fast(pgd_t *pgd)
+{
+	srmmu_free_nocache(pgd, SRMMU_PGD_TABLE_SIZE);
+}
+
+#define pgd_free(mm, pgd)	free_pgd_fast(pgd)
+#define pgd_alloc(mm)	get_pgd_fast()
+
+static inline void pgd_set(pgd_t * pgdp, pmd_t * pmdp)
+{
+	unsigned long pa = __nocache_pa(pmdp);
+
+	set_pte((pte_t *)pgdp, __pte((SRMMU_ET_PTD | (pa >> 4))));
+}
+
+#define pgd_populate(MM, PGD, PMD)      pgd_set(PGD, PMD)
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm,
+				   unsigned long address)
+{
+	return srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
+				 SRMMU_PMD_TABLE_SIZE);
+}
+
+static inline void free_pmd_fast(pmd_t * pmd)
+{
+	srmmu_free_nocache(pmd, SRMMU_PMD_TABLE_SIZE);
+}
+
+#define pmd_free(mm, pmd)		free_pmd_fast(pmd)
+#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
+
+void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep);
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+void pmd_set(pmd_t *pmdp, pte_t *ptep);
+#define pmd_populate_kernel(MM, PMD, PTE) pmd_set(PMD, PTE)
+
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+					  unsigned long address)
+{
+	return srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
+}
+
+
+static inline void free_pte_fast(pte_t *pte)
+{
+	srmmu_free_nocache(pte, PTE_SIZE);
+}
+
+#define pte_free_kernel(mm, pte)	free_pte_fast(pte)
+
+void pte_free(struct mm_struct * mm, pgtable_t pte);
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
+
+#endif /* _SPARC_PGALLOC_H */
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
new file mode 100644
index 0000000..874632f
--- /dev/null
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_PGALLOC_H
+#define _SPARC64_PGALLOC_H
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/spitfire.h>
+#include <asm/cpudata.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* Page table allocation/freeing. */
+
+extern struct kmem_cache *pgtable_cache;
+
+static inline void __pgd_populate(pgd_t *pgd, pud_t *pud)
+{
+	pgd_set(pgd, pud);
+}
+
+#define pgd_populate(MM, PGD, PUD)	__pgd_populate(PGD, PUD)
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	kmem_cache_free(pgtable_cache, pgd);
+}
+
+static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, pmd);
+}
+
+#define pud_populate(MM, PUD, PMD)	__pud_populate(PUD, PMD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	kmem_cache_free(pgtable_cache, pud);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+	kmem_cache_free(pgtable_cache, pmd);
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+			    unsigned long address);
+pgtable_t pte_alloc_one(struct mm_struct *mm,
+			unsigned long address);
+void pte_free_kernel(struct mm_struct *mm, pte_t *pte);
+void pte_free(struct mm_struct *mm, pgtable_t ptepage);
+
+#define pmd_populate_kernel(MM, PMD, PTE)	pmd_set(MM, PMD, PTE)
+#define pmd_populate(MM, PMD, PTE)		pmd_set(MM, PMD, PTE)
+#define pmd_pgtable(PMD)			((pte_t *)__pmd_page(PMD))
+
+#define check_pgt_cache()	do { } while (0)
+
+void pgtable_free(void *table, bool is_page);
+
+#ifdef CONFIG_SMP
+
+struct mmu_gather;
+void tlb_remove_table(struct mmu_gather *, void *);
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is_page)
+{
+	unsigned long pgf = (unsigned long)table;
+	if (is_page)
+		pgf |= 0x1UL;
+	tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+	void *table = (void *)((unsigned long)_table & ~0x1UL);
+	bool is_page = false;
+
+	if ((unsigned long)_table & 0x1UL)
+		is_page = true;
+	pgtable_free(table, is_page);
+}
+#else /* CONFIG_SMP */
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, bool is_page)
+{
+	pgtable_free(table, is_page);
+}
+#endif /* !CONFIG_SMP */
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
+				  unsigned long address)
+{
+	pgtable_free_tlb(tlb, pte, true);
+}
+
+#define __pmd_free_tlb(tlb, pmd, addr)		      \
+	pgtable_free_tlb(tlb, pmd, false)
+
+#define __pud_free_tlb(tlb, pud, addr)		      \
+	pgtable_free_tlb(tlb, pud, false)
+
+#endif /* _SPARC64_PGALLOC_H */
diff --git a/arch/sparc/include/asm/pgtable.h b/arch/sparc/include/asm/pgtable.h
new file mode 100644
index 0000000..e488911
--- /dev/null
+++ b/arch/sparc/include/asm/pgtable.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_PGTABLE_H
+#define ___ASM_SPARC_PGTABLE_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/pgtable_64.h>
+#else
+#include <asm/pgtable_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
new file mode 100644
index 0000000..4eebed6
--- /dev/null
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_PGTABLE_H
+#define _SPARC_PGTABLE_H
+
+/*  asm/pgtable.h:  Defines and functions used to work
+ *                        with Sparc page tables.
+ *
+ *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/const.h>
+
+#ifndef __ASSEMBLY__
+#include <asm-generic/4level-fixup.h>
+
+#include <linux/spinlock.h>
+#include <linux/mm_types.h>
+#include <asm/types.h>
+#include <asm/pgtsrmmu.h>
+#include <asm/vaddrs.h>
+#include <asm/oplib.h>
+#include <asm/cpu_type.h>
+
+
+struct vm_area_struct;
+struct page;
+
+void load_mmu(void);
+unsigned long calc_highpages(void);
+unsigned long __init bootmem_init(unsigned long *pages_avail);
+
+#define pte_ERROR(e)   __builtin_trap()
+#define pmd_ERROR(e)   __builtin_trap()
+#define pgd_ERROR(e)   __builtin_trap()
+
+#define PMD_SHIFT		22
+#define PMD_SIZE        	(1UL << PMD_SHIFT)
+#define PMD_MASK        	(~(PMD_SIZE-1))
+#define PMD_ALIGN(__addr) 	(((__addr) + ~PMD_MASK) & PMD_MASK)
+#define PGDIR_SHIFT     	SRMMU_PGDIR_SHIFT
+#define PGDIR_SIZE      	SRMMU_PGDIR_SIZE
+#define PGDIR_MASK      	SRMMU_PGDIR_MASK
+#define PTRS_PER_PTE    	1024
+#define PTRS_PER_PMD    	SRMMU_PTRS_PER_PMD
+#define PTRS_PER_PGD    	SRMMU_PTRS_PER_PGD
+#define USER_PTRS_PER_PGD	PAGE_OFFSET / SRMMU_PGDIR_SIZE
+#define FIRST_USER_ADDRESS	0UL
+#define PTE_SIZE		(PTRS_PER_PTE*4)
+
+#define PAGE_NONE	SRMMU_PAGE_NONE
+#define PAGE_SHARED	SRMMU_PAGE_SHARED
+#define PAGE_COPY	SRMMU_PAGE_COPY
+#define PAGE_READONLY	SRMMU_PAGE_RDONLY
+#define PAGE_KERNEL	SRMMU_PAGE_KERNEL
+
+/* Top-level page directory - dummy used by init-mm.
+ * srmmu.c will assign the real one (which is dynamically sized) */
+#define swapper_pg_dir NULL
+
+void paging_init(void);
+
+extern unsigned long ptr_in_current_pgd;
+
+/*         xwr */
+#define __P000  PAGE_NONE
+#define __P001  PAGE_READONLY
+#define __P010  PAGE_COPY
+#define __P011  PAGE_COPY
+#define __P100  PAGE_READONLY
+#define __P101  PAGE_READONLY
+#define __P110  PAGE_COPY
+#define __P111  PAGE_COPY
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY
+#define __S101	PAGE_READONLY
+#define __S110	PAGE_SHARED
+#define __S111	PAGE_SHARED
+
+/* First physical page can be anywhere, the following is needed so that
+ * va-->pa and vice versa conversions work properly without performance
+ * hit for all __pa()/__va() operations.
+ */
+extern unsigned long phys_base;
+extern unsigned long pfn_base;
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+/*
+ * In general all page table modifications should use the V8 atomic
+ * swap instruction.  This insures the mmu and the cpu are in sync
+ * with respect to ref/mod bits in the page tables.
+ */
+static inline unsigned long srmmu_swap(unsigned long *addr, unsigned long value)
+{
+	__asm__ __volatile__("swap [%2], %0" :
+			"=&r" (value) : "0" (value), "r" (addr) : "memory");
+	return value;
+}
+
+/* Certain architectures need to do special things when pte's
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+
+static inline void set_pte(pte_t *ptep, pte_t pteval)
+{
+	srmmu_swap((unsigned long *)ptep, pte_val(pteval));
+}
+
+#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+static inline int srmmu_device_memory(unsigned long x)
+{
+	return ((x & 0xF0000000) != 0);
+}
+
+static inline struct page *pmd_page(pmd_t pmd)
+{
+	if (srmmu_device_memory(pmd_val(pmd)))
+		BUG();
+	return pfn_to_page((pmd_val(pmd) & SRMMU_PTD_PMASK) >> (PAGE_SHIFT-4));
+}
+
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
+{
+	if (srmmu_device_memory(pgd_val(pgd))) {
+		return ~0;
+	} else {
+		unsigned long v = pgd_val(pgd) & SRMMU_PTD_PMASK;
+		return (unsigned long)__nocache_va(v << 4);
+	}
+}
+
+static inline int pte_present(pte_t pte)
+{
+	return ((pte_val(pte) & SRMMU_ET_MASK) == SRMMU_ET_PTE);
+}
+
+static inline int pte_none(pte_t pte)
+{
+	return !pte_val(pte);
+}
+
+static inline void __pte_clear(pte_t *ptep)
+{
+	set_pte(ptep, __pte(0));
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+	__pte_clear(ptep);
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+	return (pmd_val(pmd) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return ((pmd_val(pmd) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+	return !pmd_val(pmd);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	int i;
+	for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++)
+		set_pte((pte_t *)&pmdp->pmdv[i], __pte(0));
+}
+
+static inline int pgd_none(pgd_t pgd)          
+{
+	return !(pgd_val(pgd) & 0xFFFFFFF);
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+	return (pgd_val(pgd) & SRMMU_ET_MASK) != SRMMU_ET_PTD;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+	return ((pgd_val(pgd) & SRMMU_ET_MASK) == SRMMU_ET_PTD);
+}
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	set_pte((pte_t *)pgdp, __pte(0));
+}
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_write(pte_t pte)
+{
+	return pte_val(pte) & SRMMU_WRITE;
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+	return pte_val(pte) & SRMMU_DIRTY;
+}
+
+static inline int pte_young(pte_t pte)
+{
+	return pte_val(pte) & SRMMU_REF;
+}
+
+static inline int pte_special(pte_t pte)
+{
+	return 0;
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~SRMMU_WRITE);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~SRMMU_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~SRMMU_REF);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	return __pte(pte_val(pte) | SRMMU_WRITE);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | SRMMU_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | SRMMU_REF);
+}
+
+#define pte_mkspecial(pte)    (pte)
+
+#define pfn_pte(pfn, prot)		mk_pte(pfn_to_page(pfn), prot)
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	if (srmmu_device_memory(pte_val(pte))) {
+		/* Just return something that will cause
+		 * pfn_valid() to return false.  This makes
+		 * copy_one_pte() to just directly copy to
+		 * PTE over.
+		 */
+		return ~0UL;
+	}
+	return (pte_val(pte) & SRMMU_PTE_PMASK) >> (PAGE_SHIFT-4);
+}
+
+#define pte_page(pte)	pfn_to_page(pte_pfn(pte))
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+	return __pte((page_to_pfn(page) << (PAGE_SHIFT-4)) | pgprot_val(pgprot));
+}
+
+static inline pte_t mk_pte_phys(unsigned long page, pgprot_t pgprot)
+{
+	return __pte(((page) >> 4) | pgprot_val(pgprot));
+}
+
+static inline pte_t mk_pte_io(unsigned long page, pgprot_t pgprot, int space)
+{
+	return __pte(((page) >> 4) | (space << 28) | pgprot_val(pgprot));
+}
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	pgprot_val(prot) &= ~pgprot_val(__pgprot(SRMMU_CACHE));
+	return prot;
+}
+
+static pte_t pte_modify(pte_t pte, pgprot_t newprot) __attribute_const__;
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & SRMMU_CHG_MASK) |
+		pgprot_val(newprot));
+}
+
+#define pgd_index(address) ((address) >> PGDIR_SHIFT)
+
+/* to find an entry in a page-table-directory */
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* Find an entry in the second-level page table.. */
+static inline pmd_t *pmd_offset(pgd_t * dir, unsigned long address)
+{
+	return (pmd_t *) pgd_page_vaddr(*dir) +
+		((address >> PMD_SHIFT) & (PTRS_PER_PMD - 1));
+}
+
+/* Find an entry in the third-level page table.. */
+pte_t *pte_offset_kernel(pmd_t * dir, unsigned long address);
+
+/*
+ * This shortcut works on sun4m (and sun4d) because the nocache area is static.
+ */
+#define pte_offset_map(d, a)		pte_offset_kernel(d,a)
+#define pte_unmap(pte)		do{}while(0)
+
+struct seq_file;
+void mmu_info(struct seq_file *m);
+
+/* Fault handler stuff... */
+#define FAULT_CODE_PROT     0x1
+#define FAULT_CODE_WRITE    0x2
+#define FAULT_CODE_USER     0x4
+
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
+
+void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
+                      unsigned long xva, unsigned int len);
+void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len);
+
+/* Encode and de-code a swap entry */
+static inline unsigned long __swp_type(swp_entry_t entry)
+{
+	return (entry.val >> SRMMU_SWP_TYPE_SHIFT) & SRMMU_SWP_TYPE_MASK;
+}
+
+static inline unsigned long __swp_offset(swp_entry_t entry)
+{
+	return (entry.val >> SRMMU_SWP_OFF_SHIFT) & SRMMU_SWP_OFF_MASK;
+}
+
+static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+{
+	return (swp_entry_t) {
+		(type & SRMMU_SWP_TYPE_MASK) << SRMMU_SWP_TYPE_SHIFT
+		| (offset & SRMMU_SWP_OFF_MASK) << SRMMU_SWP_OFF_SHIFT };
+}
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+static inline unsigned long
+__get_phys (unsigned long addr)
+{
+	switch (sparc_cpu_model){
+	case sun4m:
+	case sun4d:
+		return ((srmmu_get_pte (addr) & 0xffffff00) << 4);
+	default:
+		return 0;
+	}
+}
+
+static inline int
+__get_iospace (unsigned long addr)
+{
+	switch (sparc_cpu_model){
+	case sun4m:
+	case sun4d:
+		return (srmmu_get_pte (addr) >> 28);
+	default:
+		return -1;
+	}
+}
+
+extern unsigned long *sparc_valid_addr_bitmap;
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+#define kern_addr_valid(addr) \
+	(test_bit(__pa((unsigned long)(addr))>>20, sparc_valid_addr_bitmap))
+
+/*
+ * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
+ * its high 4 bits.  These macros/functions put it there or get it from there.
+ */
+#define MK_IOSPACE_PFN(space, pfn)	(pfn | (space << (BITS_PER_LONG - 4)))
+#define GET_IOSPACE(pfn)		(pfn >> (BITS_PER_LONG - 4))
+#define GET_PFN(pfn)			(pfn & 0x0fffffffUL)
+
+int remap_pfn_range(struct vm_area_struct *, unsigned long, unsigned long,
+		    unsigned long, pgprot_t);
+
+static inline int io_remap_pfn_range(struct vm_area_struct *vma,
+				     unsigned long from, unsigned long pfn,
+				     unsigned long size, pgprot_t prot)
+{
+	unsigned long long offset, space, phys_base;
+
+	offset = ((unsigned long long) GET_PFN(pfn)) << PAGE_SHIFT;
+	space = GET_IOSPACE(pfn);
+	phys_base = offset | (space << 32ULL);
+
+	return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot);
+}
+#define io_remap_pfn_range io_remap_pfn_range 
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+({									  \
+	int __changed = !pte_same(*(__ptep), __entry);			  \
+	if (__changed) {						  \
+		set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
+		flush_tlb_page(__vma, __address);			  \
+	}								  \
+	__changed;							  \
+})
+
+#include <asm-generic/pgtable.h>
+
+#endif /* !(__ASSEMBLY__) */
+
+#define VMALLOC_START           _AC(0xfe600000,UL)
+#define VMALLOC_END             _AC(0xffc00000,UL)
+
+/* We provide our own get_unmapped_area to cope with VA holes for userland */
+#define HAVE_ARCH_UNMAPPED_AREA
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+
+#endif /* !(_SPARC_PGTABLE_H) */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
new file mode 100644
index 0000000..1393a8a
--- /dev/null
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -0,0 +1,1137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * pgtable.h: SpitFire page table operations.
+ *
+ * Copyright 1996,1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#ifndef _SPARC64_PGTABLE_H
+#define _SPARC64_PGTABLE_H
+
+/* This file contains the functions and defines necessary to modify and use
+ * the SpitFire page tables.
+ */
+
+#include <asm-generic/5level-fixup.h>
+#include <linux/compiler.h>
+#include <linux/const.h>
+#include <asm/types.h>
+#include <asm/spitfire.h>
+#include <asm/asi.h>
+#include <asm/adi.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+/* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB).
+ * The page copy blockops can use 0x6000000 to 0x8000000.
+ * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range.
+ * The 4M TSB is mapped in the 0x8400000 to 0x8800000 range.
+ * The PROM resides in an area spanning 0xf0000000 to 0x100000000.
+ * The vmalloc area spans 0x100000000 to 0x200000000.
+ * Since modules need to be in the lowest 32-bits of the address space,
+ * we place them right before the OBP area from 0x10000000 to 0xf0000000.
+ * There is a single static kernel PMD which maps from 0x0 to address
+ * 0x400000000.
+ */
+#define	TLBTEMP_BASE		_AC(0x0000000006000000,UL)
+#define	TSBMAP_8K_BASE		_AC(0x0000000008000000,UL)
+#define	TSBMAP_4M_BASE		_AC(0x0000000008400000,UL)
+#define MODULES_VADDR		_AC(0x0000000010000000,UL)
+#define MODULES_LEN		_AC(0x00000000e0000000,UL)
+#define MODULES_END		_AC(0x00000000f0000000,UL)
+#define LOW_OBP_ADDRESS		_AC(0x00000000f0000000,UL)
+#define HI_OBP_ADDRESS		_AC(0x0000000100000000,UL)
+#define VMALLOC_START		_AC(0x0000000100000000,UL)
+#define VMEMMAP_BASE		VMALLOC_END
+
+/* PMD_SHIFT determines the size of the area a second-level page
+ * table can map
+ */
+#define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3))
+#define PMD_SIZE	(_AC(1,UL) << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+#define PMD_BITS	(PAGE_SHIFT - 3)
+
+/* PUD_SHIFT determines the size of the area a third-level page
+ * table can map
+ */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_BITS)
+#define PUD_SIZE	(_AC(1,UL) << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+#define PUD_BITS	(PAGE_SHIFT - 3)
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_BITS)
+#define PGDIR_SIZE	(_AC(1,UL) << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+#define PGDIR_BITS	(PAGE_SHIFT - 3)
+
+#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS)
+#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support
+#endif
+
+#if (PGDIR_SHIFT + PGDIR_BITS) != 53
+#error Page table parameters do not cover virtual address space properly.
+#endif
+
+#if (PMD_SHIFT != HPAGE_SHIFT)
+#error PMD_SHIFT must equal HPAGE_SHIFT for transparent huge pages.
+#endif
+
+#ifndef __ASSEMBLY__
+
+extern unsigned long VMALLOC_END;
+
+#define vmemmap			((struct page *)VMEMMAP_BASE)
+
+#include <linux/sched.h>
+
+bool kern_addr_valid(unsigned long addr);
+
+/* Entries per page directory level. */
+#define PTRS_PER_PTE	(1UL << (PAGE_SHIFT-3))
+#define PTRS_PER_PMD	(1UL << PMD_BITS)
+#define PTRS_PER_PUD	(1UL << PUD_BITS)
+#define PTRS_PER_PGD	(1UL << PGDIR_BITS)
+
+/* Kernel has a separate 44bit address space. */
+#define FIRST_USER_ADDRESS	0UL
+
+#define pmd_ERROR(e)							\
+	pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n",		\
+	       __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0))
+#define pud_ERROR(e)							\
+	pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n",		\
+	       __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0))
+#define pgd_ERROR(e)							\
+	pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n",		\
+	       __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0))
+
+#endif /* !(__ASSEMBLY__) */
+
+/* PTE bits which are the same in SUN4U and SUN4V format.  */
+#define _PAGE_VALID	  _AC(0x8000000000000000,UL) /* Valid TTE            */
+#define _PAGE_R	  	  _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
+#define _PAGE_SPECIAL     _AC(0x0200000000000000,UL) /* Special page         */
+#define _PAGE_PMD_HUGE    _AC(0x0100000000000000,UL) /* Huge page            */
+#define _PAGE_PUD_HUGE    _PAGE_PMD_HUGE
+
+/* SUN4U pte bits... */
+#define _PAGE_SZ4MB_4U	  _AC(0x6000000000000000,UL) /* 4MB Page             */
+#define _PAGE_SZ512K_4U	  _AC(0x4000000000000000,UL) /* 512K Page            */
+#define _PAGE_SZ64K_4U	  _AC(0x2000000000000000,UL) /* 64K Page             */
+#define _PAGE_SZ8K_4U	  _AC(0x0000000000000000,UL) /* 8K Page              */
+#define _PAGE_NFO_4U	  _AC(0x1000000000000000,UL) /* No Fault Only        */
+#define _PAGE_IE_4U	  _AC(0x0800000000000000,UL) /* Invert Endianness    */
+#define _PAGE_SOFT2_4U	  _AC(0x07FC000000000000,UL) /* Software bits, set 2 */
+#define _PAGE_SPECIAL_4U  _AC(0x0200000000000000,UL) /* Special page         */
+#define _PAGE_PMD_HUGE_4U _AC(0x0100000000000000,UL) /* Huge page            */
+#define _PAGE_RES1_4U	  _AC(0x0002000000000000,UL) /* Reserved             */
+#define _PAGE_SZ32MB_4U	  _AC(0x0001000000000000,UL) /* (Panther) 32MB page  */
+#define _PAGE_SZ256MB_4U  _AC(0x2001000000000000,UL) /* (Panther) 256MB page */
+#define _PAGE_SZALL_4U	  _AC(0x6001000000000000,UL) /* All pgsz bits        */
+#define _PAGE_SN_4U	  _AC(0x0000800000000000,UL) /* (Cheetah) Snoop      */
+#define _PAGE_RES2_4U	  _AC(0x0000780000000000,UL) /* Reserved             */
+#define _PAGE_PADDR_4U	  _AC(0x000007FFFFFFE000,UL) /* (Cheetah) pa[42:13]  */
+#define _PAGE_SOFT_4U	  _AC(0x0000000000001F80,UL) /* Software bits:       */
+#define _PAGE_EXEC_4U	  _AC(0x0000000000001000,UL) /* Executable SW bit    */
+#define _PAGE_MODIFIED_4U _AC(0x0000000000000800,UL) /* Modified (dirty)     */
+#define _PAGE_ACCESSED_4U _AC(0x0000000000000400,UL) /* Accessed (ref'd)     */
+#define _PAGE_READ_4U	  _AC(0x0000000000000200,UL) /* Readable SW Bit      */
+#define _PAGE_WRITE_4U	  _AC(0x0000000000000100,UL) /* Writable SW Bit      */
+#define _PAGE_PRESENT_4U  _AC(0x0000000000000080,UL) /* Present              */
+#define _PAGE_L_4U	  _AC(0x0000000000000040,UL) /* Locked TTE           */
+#define _PAGE_CP_4U	  _AC(0x0000000000000020,UL) /* Cacheable in P-Cache */
+#define _PAGE_CV_4U	  _AC(0x0000000000000010,UL) /* Cacheable in V-Cache */
+#define _PAGE_E_4U	  _AC(0x0000000000000008,UL) /* side-Effect          */
+#define _PAGE_P_4U	  _AC(0x0000000000000004,UL) /* Privileged Page      */
+#define _PAGE_W_4U	  _AC(0x0000000000000002,UL) /* Writable             */
+
+/* SUN4V pte bits... */
+#define _PAGE_NFO_4V	  _AC(0x4000000000000000,UL) /* No Fault Only        */
+#define _PAGE_SOFT2_4V	  _AC(0x3F00000000000000,UL) /* Software bits, set 2 */
+#define _PAGE_MODIFIED_4V _AC(0x2000000000000000,UL) /* Modified (dirty)     */
+#define _PAGE_ACCESSED_4V _AC(0x1000000000000000,UL) /* Accessed (ref'd)     */
+#define _PAGE_READ_4V	  _AC(0x0800000000000000,UL) /* Readable SW Bit      */
+#define _PAGE_WRITE_4V	  _AC(0x0400000000000000,UL) /* Writable SW Bit      */
+#define _PAGE_SPECIAL_4V  _AC(0x0200000000000000,UL) /* Special page         */
+#define _PAGE_PMD_HUGE_4V _AC(0x0100000000000000,UL) /* Huge page            */
+#define _PAGE_PADDR_4V	  _AC(0x00FFFFFFFFFFE000,UL) /* paddr[55:13]         */
+#define _PAGE_IE_4V	  _AC(0x0000000000001000,UL) /* Invert Endianness    */
+#define _PAGE_E_4V	  _AC(0x0000000000000800,UL) /* side-Effect          */
+#define _PAGE_CP_4V	  _AC(0x0000000000000400,UL) /* Cacheable in P-Cache */
+#define _PAGE_CV_4V	  _AC(0x0000000000000200,UL) /* Cacheable in V-Cache */
+/* Bit 9 is used to enable MCD corruption detection instead on M7 */
+#define _PAGE_MCD_4V      _AC(0x0000000000000200,UL) /* Memory Corruption    */
+#define _PAGE_P_4V	  _AC(0x0000000000000100,UL) /* Privileged Page      */
+#define _PAGE_EXEC_4V	  _AC(0x0000000000000080,UL) /* Executable Page      */
+#define _PAGE_W_4V	  _AC(0x0000000000000040,UL) /* Writable             */
+#define _PAGE_SOFT_4V	  _AC(0x0000000000000030,UL) /* Software bits        */
+#define _PAGE_PRESENT_4V  _AC(0x0000000000000010,UL) /* Present              */
+#define _PAGE_RESV_4V	  _AC(0x0000000000000008,UL) /* Reserved             */
+#define _PAGE_SZ16GB_4V	  _AC(0x0000000000000007,UL) /* 16GB Page            */
+#define _PAGE_SZ2GB_4V	  _AC(0x0000000000000006,UL) /* 2GB Page             */
+#define _PAGE_SZ256MB_4V  _AC(0x0000000000000005,UL) /* 256MB Page           */
+#define _PAGE_SZ32MB_4V	  _AC(0x0000000000000004,UL) /* 32MB Page            */
+#define _PAGE_SZ4MB_4V	  _AC(0x0000000000000003,UL) /* 4MB Page             */
+#define _PAGE_SZ512K_4V	  _AC(0x0000000000000002,UL) /* 512K Page            */
+#define _PAGE_SZ64K_4V	  _AC(0x0000000000000001,UL) /* 64K Page             */
+#define _PAGE_SZ8K_4V	  _AC(0x0000000000000000,UL) /* 8K Page              */
+#define _PAGE_SZALL_4V	  _AC(0x0000000000000007,UL) /* All pgsz bits        */
+
+#define _PAGE_SZBITS_4U	_PAGE_SZ8K_4U
+#define _PAGE_SZBITS_4V	_PAGE_SZ8K_4V
+
+#if REAL_HPAGE_SHIFT != 22
+#error REAL_HPAGE_SHIFT and _PAGE_SZHUGE_foo must match up
+#endif
+
+#define _PAGE_SZHUGE_4U	_PAGE_SZ4MB_4U
+#define _PAGE_SZHUGE_4V	_PAGE_SZ4MB_4V
+
+/* These are actually filled in at boot time by sun4{u,v}_pgprot_init() */
+#define __P000	__pgprot(0)
+#define __P001	__pgprot(0)
+#define __P010	__pgprot(0)
+#define __P011	__pgprot(0)
+#define __P100	__pgprot(0)
+#define __P101	__pgprot(0)
+#define __P110	__pgprot(0)
+#define __P111	__pgprot(0)
+
+#define __S000	__pgprot(0)
+#define __S001	__pgprot(0)
+#define __S010	__pgprot(0)
+#define __S011	__pgprot(0)
+#define __S100	__pgprot(0)
+#define __S101	__pgprot(0)
+#define __S110	__pgprot(0)
+#define __S111	__pgprot(0)
+
+#ifndef __ASSEMBLY__
+
+pte_t mk_pte_io(unsigned long, pgprot_t, int, unsigned long);
+
+unsigned long pte_sz_bits(unsigned long size);
+
+extern pgprot_t PAGE_KERNEL;
+extern pgprot_t PAGE_KERNEL_LOCKED;
+extern pgprot_t PAGE_COPY;
+extern pgprot_t PAGE_SHARED;
+
+/* XXX This ugliness is for the atyfb driver's sparc mmap() support. XXX */
+extern unsigned long _PAGE_IE;
+extern unsigned long _PAGE_E;
+extern unsigned long _PAGE_CACHE;
+
+extern unsigned long pg_iobits;
+extern unsigned long _PAGE_ALL_SZ_BITS;
+
+extern struct page *mem_map_zero;
+#define ZERO_PAGE(vaddr)	(mem_map_zero)
+
+/* This macro must be updated when the size of struct page grows above 80
+ * or reduces below 64.
+ * The idea that compiler optimizes out switch() statement, and only
+ * leaves clrx instructions
+ */
+#define	mm_zero_struct_page(pp) do {					\
+	unsigned long *_pp = (void *)(pp);				\
+									\
+	 /* Check that struct page is either 64, 72, or 80 bytes */	\
+	BUILD_BUG_ON(sizeof(struct page) & 7);				\
+	BUILD_BUG_ON(sizeof(struct page) < 64);				\
+	BUILD_BUG_ON(sizeof(struct page) > 80);				\
+									\
+	switch (sizeof(struct page)) {					\
+	case 80:							\
+		_pp[9] = 0;	/* fallthrough */			\
+	case 72:							\
+		_pp[8] = 0;	/* fallthrough */			\
+	default:							\
+		_pp[7] = 0;						\
+		_pp[6] = 0;						\
+		_pp[5] = 0;						\
+		_pp[4] = 0;						\
+		_pp[3] = 0;						\
+		_pp[2] = 0;						\
+		_pp[1] = 0;						\
+		_pp[0] = 0;						\
+	}								\
+} while (0)
+
+/* PFNs are real physical page numbers.  However, mem_map only begins to record
+ * per-page information starting at pfn_base.  This is to handle systems where
+ * the first physical page in the machine is at some huge physical address,
+ * such as 4GB.   This is common on a partitioned E10000, for example.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
+{
+	unsigned long paddr = pfn << PAGE_SHIFT;
+
+	BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
+	return __pte(paddr | pgprot_val(prot));
+}
+#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+	pte_t pte = pfn_pte(page_nr, pgprot);
+
+	return __pmd(pte_val(pte));
+}
+#define mk_pmd(page, pgprot)	pfn_pmd(page_to_pfn(page), (pgprot))
+#endif
+
+/* This one can be done with two shifts.  */
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__(
+	"\n661:	sllx		%1, %2, %0\n"
+	"	srlx		%0, %3, %0\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sllx		%1, %4, %0\n"
+	"	srlx		%0, %5, %0\n"
+	"	.previous\n"
+	: "=r" (ret)
+	: "r" (pte_val(pte)),
+	  "i" (21), "i" (21 + PAGE_SHIFT),
+	  "i" (8), "i" (8 + PAGE_SHIFT));
+
+	return ret;
+}
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t prot)
+{
+	unsigned long mask, tmp;
+
+	/* SUN4U: 0x630107ffffffec38 (negated == 0x9cfef800000013c7)
+	 * SUN4V: 0x33ffffffffffee07 (negated == 0xcc000000000011f8)
+	 *
+	 * Even if we use negation tricks the result is still a 6
+	 * instruction sequence, so don't try to play fancy and just
+	 * do the most straightforward implementation.
+	 *
+	 * Note: We encode this into 3 sun4v 2-insn patch sequences.
+	 */
+
+	BUILD_BUG_ON(_PAGE_SZBITS_4U != 0UL || _PAGE_SZBITS_4V != 0UL);
+	__asm__ __volatile__(
+	"\n661:	sethi		%%uhi(%2), %1\n"
+	"	sethi		%%hi(%2), %0\n"
+	"\n662:	or		%1, %%ulo(%2), %1\n"
+	"	or		%0, %%lo(%2), %0\n"
+	"\n663:	sllx		%1, 32, %1\n"
+	"	or		%0, %1, %0\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%3), %1\n"
+	"	sethi		%%hi(%3), %0\n"
+	"	.word		662b\n"
+	"	or		%1, %%ulo(%3), %1\n"
+	"	or		%0, %%lo(%3), %0\n"
+	"	.word		663b\n"
+	"	sllx		%1, 32, %1\n"
+	"	or		%0, %1, %0\n"
+	"	.previous\n"
+	"	.section	.sun_m7_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%4), %1\n"
+	"	sethi		%%hi(%4), %0\n"
+	"	.word		662b\n"
+	"	or		%1, %%ulo(%4), %1\n"
+	"	or		%0, %%lo(%4), %0\n"
+	"	.word		663b\n"
+	"	sllx		%1, 32, %1\n"
+	"	or		%0, %1, %0\n"
+	"	.previous\n"
+	: "=r" (mask), "=r" (tmp)
+	: "i" (_PAGE_PADDR_4U | _PAGE_MODIFIED_4U | _PAGE_ACCESSED_4U |
+	       _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_E_4U |
+	       _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4U),
+	  "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+	       _PAGE_CP_4V | _PAGE_CV_4V | _PAGE_E_4V |
+	       _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V),
+	  "i" (_PAGE_PADDR_4V | _PAGE_MODIFIED_4V | _PAGE_ACCESSED_4V |
+	       _PAGE_CP_4V | _PAGE_E_4V |
+	       _PAGE_SPECIAL | _PAGE_PMD_HUGE | _PAGE_SZALL_4V));
+
+	return __pte((pte_val(pte) & mask) | (pgprot_val(prot) & ~mask));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_modify(pte, newprot);
+
+	return __pmd(pte_val(pte));
+}
+#endif
+
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	unsigned long val = pgprot_val(prot);
+
+	__asm__ __volatile__(
+	"\n661:	andn		%0, %2, %0\n"
+	"	or		%0, %3, %0\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	andn		%0, %4, %0\n"
+	"	or		%0, %5, %0\n"
+	"	.previous\n"
+	"	.section	.sun_m7_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	andn		%0, %6, %0\n"
+	"	or		%0, %5, %0\n"
+	"	.previous\n"
+	: "=r" (val)
+	: "0" (val), "i" (_PAGE_CP_4U | _PAGE_CV_4U), "i" (_PAGE_E_4U),
+	             "i" (_PAGE_CP_4V | _PAGE_CV_4V), "i" (_PAGE_E_4V),
+	             "i" (_PAGE_CP_4V));
+
+	return __pgprot(val);
+}
+/* Various pieces of code check for platform support by ifdef testing
+ * on "pgprot_noncached".  That's broken and should be fixed, but for
+ * now...
+ */
+#define pgprot_noncached pgprot_noncached
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+				struct page *page, int writable);
+#define arch_make_huge_pte arch_make_huge_pte
+static inline unsigned long __pte_default_huge_mask(void)
+{
+	unsigned long mask;
+
+	__asm__ __volatile__(
+	"\n661:	sethi		%%uhi(%1), %0\n"
+	"	sllx		%0, 32, %0\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	mov		%2, %0\n"
+	"	nop\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_SZHUGE_4U), "i" (_PAGE_SZHUGE_4V));
+
+	return mask;
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return __pte(pte_val(pte) | __pte_default_huge_mask());
+}
+
+static inline bool is_default_hugetlb_pte(pte_t pte)
+{
+	unsigned long mask = __pte_default_huge_mask();
+
+	return (pte_val(pte) & mask) == mask;
+}
+
+static inline bool is_hugetlb_pmd(pmd_t pmd)
+{
+	return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
+}
+
+static inline bool is_hugetlb_pud(pud_t pud)
+{
+	return !!(pud_val(pud) & _PAGE_PUD_HUGE);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_mkhuge(pte);
+	pte_val(pte) |= _PAGE_PMD_HUGE;
+
+	return __pmd(pte_val(pte));
+}
+#endif
+#else
+static inline bool is_hugetlb_pte(pte_t pte)
+{
+	return false;
+}
+#endif
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	unsigned long val = pte_val(pte), tmp;
+
+	__asm__ __volatile__(
+	"\n661:	or		%0, %3, %0\n"
+	"	nop\n"
+	"\n662:	nop\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%4), %1\n"
+	"	sllx		%1, 32, %1\n"
+	"	.word		662b\n"
+	"	or		%1, %%lo(%4), %1\n"
+	"	or		%0, %1, %0\n"
+	"	.previous\n"
+	: "=r" (val), "=r" (tmp)
+	: "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U),
+	  "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V));
+
+	return __pte(val);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	unsigned long val = pte_val(pte), tmp;
+
+	__asm__ __volatile__(
+	"\n661:	andn		%0, %3, %0\n"
+	"	nop\n"
+	"\n662:	nop\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%4), %1\n"
+	"	sllx		%1, 32, %1\n"
+	"	.word		662b\n"
+	"	or		%1, %%lo(%4), %1\n"
+	"	andn		%0, %1, %0\n"
+	"	.previous\n"
+	: "=r" (val), "=r" (tmp)
+	: "0" (val), "i" (_PAGE_MODIFIED_4U | _PAGE_W_4U),
+	  "i" (_PAGE_MODIFIED_4V | _PAGE_W_4V));
+
+	return __pte(val);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	unsigned long val = pte_val(pte), mask;
+
+	__asm__ __volatile__(
+	"\n661:	mov		%1, %0\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%2), %0\n"
+	"	sllx		%0, 32, %0\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V));
+
+	return __pte(val | mask);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	unsigned long val = pte_val(pte), tmp;
+
+	__asm__ __volatile__(
+	"\n661:	andn		%0, %3, %0\n"
+	"	nop\n"
+	"\n662:	nop\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%4), %1\n"
+	"	sllx		%1, 32, %1\n"
+	"	.word		662b\n"
+	"	or		%1, %%lo(%4), %1\n"
+	"	andn		%0, %1, %0\n"
+	"	.previous\n"
+	: "=r" (val), "=r" (tmp)
+	: "0" (val), "i" (_PAGE_WRITE_4U | _PAGE_W_4U),
+	  "i" (_PAGE_WRITE_4V | _PAGE_W_4V));
+
+	return __pte(val);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	unsigned long mask;
+
+	__asm__ __volatile__(
+	"\n661:	mov		%1, %0\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%2), %0\n"
+	"	sllx		%0, 32, %0\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+	mask |= _PAGE_R;
+
+	return __pte(pte_val(pte) & ~mask);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	unsigned long mask;
+
+	__asm__ __volatile__(
+	"\n661:	mov		%1, %0\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%2), %0\n"
+	"	sllx		%0, 32, %0\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+	mask |= _PAGE_R;
+
+	return __pte(pte_val(pte) | mask);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_SPECIAL;
+	return pte;
+}
+
+static inline pte_t pte_mkmcd(pte_t pte)
+{
+	pte_val(pte) |= _PAGE_MCD_4V;
+	return pte;
+}
+
+static inline pte_t pte_mknotmcd(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_MCD_4V;
+	return pte;
+}
+
+static inline unsigned long pte_young(pte_t pte)
+{
+	unsigned long mask;
+
+	__asm__ __volatile__(
+	"\n661:	mov		%1, %0\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%2), %0\n"
+	"	sllx		%0, 32, %0\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_ACCESSED_4U), "i" (_PAGE_ACCESSED_4V));
+
+	return (pte_val(pte) & mask);
+}
+
+static inline unsigned long pte_dirty(pte_t pte)
+{
+	unsigned long mask;
+
+	__asm__ __volatile__(
+	"\n661:	mov		%1, %0\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%2), %0\n"
+	"	sllx		%0, 32, %0\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_MODIFIED_4U), "i" (_PAGE_MODIFIED_4V));
+
+	return (pte_val(pte) & mask);
+}
+
+static inline unsigned long pte_write(pte_t pte)
+{
+	unsigned long mask;
+
+	__asm__ __volatile__(
+	"\n661:	mov		%1, %0\n"
+	"	nop\n"
+	"	.section	.sun4v_2insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	sethi		%%uhi(%2), %0\n"
+	"	sllx		%0, 32, %0\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_WRITE_4U), "i" (_PAGE_WRITE_4V));
+
+	return (pte_val(pte) & mask);
+}
+
+static inline unsigned long pte_exec(pte_t pte)
+{
+	unsigned long mask;
+
+	__asm__ __volatile__(
+	"\n661:	sethi		%%hi(%1), %0\n"
+	"	.section	.sun4v_1insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	mov		%2, %0\n"
+	"	.previous\n"
+	: "=r" (mask)
+	: "i" (_PAGE_EXEC_4U), "i" (_PAGE_EXEC_4V));
+
+	return (pte_val(pte) & mask);
+}
+
+static inline unsigned long pte_present(pte_t pte)
+{
+	unsigned long val = pte_val(pte);
+
+	__asm__ __volatile__(
+	"\n661:	and		%0, %2, %0\n"
+	"	.section	.sun4v_1insn_patch, \"ax\"\n"
+	"	.word		661b\n"
+	"	and		%0, %3, %0\n"
+	"	.previous\n"
+	: "=r" (val)
+	: "0" (val), "i" (_PAGE_PRESENT_4U), "i" (_PAGE_PRESENT_4V));
+
+	return val;
+}
+
+#define pte_accessible pte_accessible
+static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a)
+{
+	return pte_val(a) & _PAGE_VALID;
+}
+
+static inline unsigned long pte_special(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_SPECIAL;
+}
+
+static inline unsigned long pmd_large(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_val(pte) & _PAGE_PMD_HUGE;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_pfn(pte);
+}
+
+#define pmd_write pmd_write
+static inline unsigned long pmd_write(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_write(pte);
+}
+
+#define pud_write(pud)	pte_write(__pte(pud_val(pud)))
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long pmd_dirty(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_dirty(pte);
+}
+
+static inline unsigned long pmd_young(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_young(pte);
+}
+
+static inline unsigned long pmd_trans_huge(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_val(pte) & _PAGE_PMD_HUGE;
+}
+
+static inline pmd_t pmd_mkold(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_mkold(pte);
+
+	return __pmd(pte_val(pte));
+}
+
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_wrprotect(pte);
+
+	return __pmd(pte_val(pte));
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_mkdirty(pte);
+
+	return __pmd(pte_val(pte));
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_mkclean(pte);
+
+	return __pmd(pte_val(pte));
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_mkyoung(pte);
+
+	return __pmd(pte_val(pte));
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	pte = pte_mkwrite(pte);
+
+	return __pmd(pte_val(pte));
+}
+
+static inline pgprot_t pmd_pgprot(pmd_t entry)
+{
+	unsigned long val = pmd_val(entry);
+
+	return __pgprot(val);
+}
+#endif
+
+static inline int pmd_present(pmd_t pmd)
+{
+	return pmd_val(pmd) != 0UL;
+}
+
+#define pmd_none(pmd)			(!pmd_val(pmd))
+
+/* pmd_bad() is only called on non-trans-huge PMDs.  Our encoding is
+ * very simple, it's just the physical address.  PTE tables are of
+ * size PAGE_SIZE so make sure the sub-PAGE_SIZE bits are clear and
+ * the top bits outside of the range of any physical address size we
+ * support are clear as well.  We also validate the physical itself.
+ */
+#define pmd_bad(pmd)			(pmd_val(pmd) & ~PAGE_MASK)
+
+#define pud_none(pud)			(!pud_val(pud))
+
+#define pud_bad(pud)			(pud_val(pud) & ~PAGE_MASK)
+
+#define pgd_none(pgd)			(!pgd_val(pgd))
+
+#define pgd_bad(pgd)			(pgd_val(pgd) & ~PAGE_MASK)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd);
+#else
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+			      pmd_t *pmdp, pmd_t pmd)
+{
+	*pmdp = pmd;
+}
+#endif
+
+static inline void pmd_set(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+{
+	unsigned long val = __pa((unsigned long) (ptep));
+
+	pmd_val(*pmdp) = val;
+}
+
+#define pud_set(pudp, pmdp)	\
+	(pud_val(*(pudp)) = (__pa((unsigned long) (pmdp))))
+static inline unsigned long __pmd_page(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+	unsigned long pfn;
+
+	pfn = pte_pfn(pte);
+
+	return ((unsigned long) __va(pfn << PAGE_SHIFT));
+}
+
+static inline unsigned long pud_page_vaddr(pud_t pud)
+{
+	pte_t pte = __pte(pud_val(pud));
+	unsigned long pfn;
+
+	pfn = pte_pfn(pte);
+
+	return ((unsigned long) __va(pfn << PAGE_SHIFT));
+}
+
+#define pmd_page(pmd) 			virt_to_page((void *)__pmd_page(pmd))
+#define pud_page(pud) 			virt_to_page((void *)pud_page_vaddr(pud))
+#define pmd_clear(pmdp)			(pmd_val(*(pmdp)) = 0UL)
+#define pud_present(pud)		(pud_val(pud) != 0U)
+#define pud_clear(pudp)			(pud_val(*(pudp)) = 0UL)
+#define pgd_page_vaddr(pgd)		\
+	((unsigned long) __va(pgd_val(pgd)))
+#define pgd_present(pgd)		(pgd_val(pgd) != 0U)
+#define pgd_clear(pgdp)			(pgd_val(*(pgdp)) = 0UL)
+
+static inline unsigned long pud_large(pud_t pud)
+{
+	pte_t pte = __pte(pud_val(pud));
+
+	return pte_val(pte) & _PAGE_PMD_HUGE;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+	pte_t pte = __pte(pud_val(pud));
+
+	return pte_pfn(pte);
+}
+
+/* Same in both SUN4V and SUN4U.  */
+#define pte_none(pte) 			(!pte_val(pte))
+
+#define pgd_set(pgdp, pudp)	\
+	(pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp))))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+#define pgd_offset(mm, address)	((mm)->pgd + pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* Find an entry in the third-level page table.. */
+#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pud_offset(pgdp, address)	\
+	((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address))
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(pudp, address)	\
+	((pmd_t *) pud_page_vaddr(*(pudp)) + \
+	 (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)))
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(dir, address)	\
+	((pte_t *) __pmd_page(*(dir)) + \
+	 ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+#define pte_offset_kernel		pte_index
+#define pte_offset_map			pte_index
+#define pte_unmap(pte)			do { } while (0)
+
+/* We cannot include <linux/mm_types.h> at this point yet: */
+extern struct mm_struct init_mm;
+
+/* Actual page table PTE updates.  */
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+		   pte_t *ptep, pte_t orig, int fullmm,
+		   unsigned int hugepage_shift);
+
+static void maybe_tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+				pte_t *ptep, pte_t orig, int fullmm,
+				unsigned int hugepage_shift)
+{
+	/* It is more efficient to let flush_tlb_kernel_range()
+	 * handle init_mm tlb flushes.
+	 *
+	 * SUN4V NOTE: _PAGE_VALID is the same value in both the SUN4U
+	 *             and SUN4V pte layout, so this inline test is fine.
+	 */
+	if (likely(mm != &init_mm) && pte_accessible(mm, orig))
+		tlb_batch_add(mm, vaddr, ptep, orig, fullmm, hugepage_shift);
+}
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+					    unsigned long addr,
+					    pmd_t *pmdp)
+{
+	pmd_t pmd = *pmdp;
+	set_pmd_at(mm, addr, pmdp, __pmd(0UL));
+	return pmd;
+}
+
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+			     pte_t *ptep, pte_t pte, int fullmm)
+{
+	pte_t orig = *ptep;
+
+	*ptep = pte;
+	maybe_tlb_batch_add(mm, addr, ptep, orig, fullmm, PAGE_SHIFT);
+}
+
+#define set_pte_at(mm,addr,ptep,pte)	\
+	__set_pte_at((mm), (addr), (ptep), (pte), 0)
+
+#define pte_clear(mm,addr,ptep)		\
+	set_pte_at((mm), (addr), (ptep), __pte(0UL))
+
+#define __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
+#define pte_clear_not_present_full(mm,addr,ptep,fullmm)	\
+	__set_pte_at((mm), (addr), (ptep), __pte(0UL), (fullmm))
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+#define __HAVE_ARCH_MOVE_PTE
+#define move_pte(pte, prot, old_addr, new_addr)				\
+({									\
+	pte_t newpte = (pte);						\
+	if (tlb_type != hypervisor && pte_present(pte)) {		\
+		unsigned long this_pfn = pte_pfn(pte);			\
+									\
+		if (pfn_valid(this_pfn) &&				\
+		    (((old_addr) ^ (new_addr)) & (1 << 13)))		\
+			flush_dcache_page_all(current->mm,		\
+					      pfn_to_page(this_pfn));	\
+	}								\
+	newpte;								\
+})
+#endif
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+
+void paging_init(void);
+unsigned long find_ecache_flush_span(unsigned long size);
+
+struct seq_file;
+void mmu_info(struct seq_file *);
+
+struct vm_area_struct;
+void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+			  pmd_t *pmd);
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+			    pmd_t *pmdp);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+#endif
+
+/* Encode and de-code a swap entry */
+#define __swp_type(entry)	(((entry).val >> PAGE_SHIFT) & 0xffUL)
+#define __swp_offset(entry)	((entry).val >> (PAGE_SHIFT + 8UL))
+#define __swp_entry(type, offset)	\
+	( (swp_entry_t) \
+	  { \
+		(((long)(type) << PAGE_SHIFT) | \
+                 ((long)(offset) << (PAGE_SHIFT + 8UL))) \
+	  } )
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+
+int page_in_phys_avail(unsigned long paddr);
+
+/*
+ * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
+ * its high 4 bits.  These macros/functions put it there or get it from there.
+ */
+#define MK_IOSPACE_PFN(space, pfn)	(pfn | (space << (BITS_PER_LONG - 4)))
+#define GET_IOSPACE(pfn)		(pfn >> (BITS_PER_LONG - 4))
+#define GET_PFN(pfn)			(pfn & 0x0fffffffffffffffUL)
+
+int remap_pfn_range(struct vm_area_struct *, unsigned long, unsigned long,
+		    unsigned long, pgprot_t);
+
+void adi_restore_tags(struct mm_struct *mm, struct vm_area_struct *vma,
+		      unsigned long addr, pte_t pte);
+
+int adi_save_tags(struct mm_struct *mm, struct vm_area_struct *vma,
+		  unsigned long addr, pte_t oldpte);
+
+#define __HAVE_ARCH_DO_SWAP_PAGE
+static inline void arch_do_swap_page(struct mm_struct *mm,
+				     struct vm_area_struct *vma,
+				     unsigned long addr,
+				     pte_t pte, pte_t oldpte)
+{
+	/* If this is a new page being mapped in, there can be no
+	 * ADI tags stored away for this page. Skip looking for
+	 * stored tags
+	 */
+	if (pte_none(oldpte))
+		return;
+
+	if (adi_state.enabled && (pte_val(pte) & _PAGE_MCD_4V))
+		adi_restore_tags(mm, vma, addr, pte);
+}
+
+#define __HAVE_ARCH_UNMAP_ONE
+static inline int arch_unmap_one(struct mm_struct *mm,
+				 struct vm_area_struct *vma,
+				 unsigned long addr, pte_t oldpte)
+{
+	if (adi_state.enabled && (pte_val(oldpte) & _PAGE_MCD_4V))
+		return adi_save_tags(mm, vma, addr, oldpte);
+	return 0;
+}
+
+static inline int io_remap_pfn_range(struct vm_area_struct *vma,
+				     unsigned long from, unsigned long pfn,
+				     unsigned long size, pgprot_t prot)
+{
+	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
+	int space = GET_IOSPACE(pfn);
+	unsigned long phys_base;
+
+	phys_base = offset | (((unsigned long) space) << 32UL);
+
+	return remap_pfn_range(vma, from, phys_base >> PAGE_SHIFT, size, prot);
+}
+#define io_remap_pfn_range io_remap_pfn_range 
+
+#include <asm/tlbflush.h>
+#include <asm-generic/pgtable.h>
+
+/* We provide our own get_unmapped_area to cope with VA holes and
+ * SHM area cache aliasing for userland.
+ */
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+/* We provide a special get_unmapped_area for framebuffer mmaps to try and use
+ * the largest alignment possible such that larget PTEs can be used.
+ */
+unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
+				   unsigned long, unsigned long,
+				   unsigned long);
+#define HAVE_ARCH_FB_UNMAPPED_AREA
+
+void pgtable_cache_init(void);
+void sun4v_register_fault_status(void);
+void sun4v_ktsb_register(void);
+void __init cheetah_ecache_flush_init(void);
+void sun4v_patch_tlb_handlers(void);
+
+extern unsigned long cmdline_memory_size;
+
+asmlinkage void do_sparc64_fault(struct pt_regs *regs);
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(_SPARC64_PGTABLE_H) */
diff --git a/arch/sparc/include/asm/pgtsrmmu.h b/arch/sparc/include/asm/pgtsrmmu.h
new file mode 100644
index 0000000..32a5088
--- /dev/null
+++ b/arch/sparc/include/asm/pgtsrmmu.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * pgtsrmmu.h:  SRMMU page table defines and code.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_PGTSRMMU_H
+#define _SPARC_PGTSRMMU_H
+
+#include <asm/page.h>
+
+#ifdef __ASSEMBLY__
+#include <asm/thread_info.h>	/* TI_UWINMASK for WINDOW_FLUSH */
+#endif
+
+/* Number of contexts is implementation-dependent; 64k is the most we support */
+#define SRMMU_MAX_CONTEXTS	65536
+
+/* PMD_SHIFT determines the size of the area a second-level page table entry can map */
+#define SRMMU_REAL_PMD_SHIFT		18
+#define SRMMU_REAL_PMD_SIZE		(1UL << SRMMU_REAL_PMD_SHIFT)
+#define SRMMU_REAL_PMD_MASK		(~(SRMMU_REAL_PMD_SIZE-1))
+#define SRMMU_REAL_PMD_ALIGN(__addr)	(((__addr)+SRMMU_REAL_PMD_SIZE-1)&SRMMU_REAL_PMD_MASK)
+
+/* PGDIR_SHIFT determines what a third-level page table entry can map */
+#define SRMMU_PGDIR_SHIFT       24
+#define SRMMU_PGDIR_SIZE        (1UL << SRMMU_PGDIR_SHIFT)
+#define SRMMU_PGDIR_MASK        (~(SRMMU_PGDIR_SIZE-1))
+#define SRMMU_PGDIR_ALIGN(addr) (((addr)+SRMMU_PGDIR_SIZE-1)&SRMMU_PGDIR_MASK)
+
+#define SRMMU_REAL_PTRS_PER_PTE	64
+#define SRMMU_REAL_PTRS_PER_PMD	64
+#define SRMMU_PTRS_PER_PGD	256
+
+#define SRMMU_REAL_PTE_TABLE_SIZE	(SRMMU_REAL_PTRS_PER_PTE*4)
+#define SRMMU_PMD_TABLE_SIZE		(SRMMU_REAL_PTRS_PER_PMD*4)
+#define SRMMU_PGD_TABLE_SIZE		(SRMMU_PTRS_PER_PGD*4)
+
+/*
+ * To support pagetables in highmem, Linux introduces APIs which
+ * return struct page* and generally manipulate page tables when
+ * they are not mapped into kernel space. Our hardware page tables
+ * are smaller than pages. We lump hardware tabes into big, page sized
+ * software tables.
+ *
+ * PMD_SHIFT determines the size of the area a second-level page table entry
+ * can map, and our pmd_t is 16 times larger than normal.  The values which
+ * were once defined here are now generic for 4c and srmmu, so they're
+ * found in pgtable.h.
+ */
+#define SRMMU_PTRS_PER_PMD	4
+
+/* Definition of the values in the ET field of PTD's and PTE's */
+#define SRMMU_ET_MASK         0x3
+#define SRMMU_ET_INVALID      0x0
+#define SRMMU_ET_PTD          0x1
+#define SRMMU_ET_PTE          0x2
+#define SRMMU_ET_REPTE        0x3 /* AIEEE, SuperSparc II reverse endian page! */
+
+/* Physical page extraction from PTP's and PTE's. */
+#define SRMMU_CTX_PMASK    0xfffffff0
+#define SRMMU_PTD_PMASK    0xfffffff0
+#define SRMMU_PTE_PMASK    0xffffff00
+
+/* The pte non-page bits.  Some notes:
+ * 1) cache, dirty, valid, and ref are frobbable
+ *    for both supervisor and user pages.
+ * 2) exec and write will only give the desired effect
+ *    on user pages
+ * 3) use priv and priv_readonly for changing the
+ *    characteristics of supervisor ptes
+ */
+#define SRMMU_CACHE        0x80
+#define SRMMU_DIRTY        0x40
+#define SRMMU_REF          0x20
+#define SRMMU_NOREAD       0x10
+#define SRMMU_EXEC         0x08
+#define SRMMU_WRITE        0x04
+#define SRMMU_VALID        0x02 /* SRMMU_ET_PTE */
+#define SRMMU_PRIV         0x1c
+#define SRMMU_PRIV_RDONLY  0x18
+
+#define SRMMU_CHG_MASK    (0xffffff00 | SRMMU_REF | SRMMU_DIRTY)
+
+/* SRMMU swap entry encoding
+ *
+ * We use 5 bits for the type and 19 for the offset.  This gives us
+ * 32 swapfiles of 4GB each.  Encoding looks like:
+ *
+ * oooooooooooooooooootttttRRRRRRRR
+ * fedcba9876543210fedcba9876543210
+ *
+ * The bottom 7 bits are reserved for protection and status bits, especially
+ * PRESENT.
+ */
+#define SRMMU_SWP_TYPE_MASK	0x1f
+#define SRMMU_SWP_TYPE_SHIFT	7
+#define SRMMU_SWP_OFF_MASK	0xfffff
+#define SRMMU_SWP_OFF_SHIFT	(SRMMU_SWP_TYPE_SHIFT + 5)
+
+/* Some day I will implement true fine grained access bits for
+ * user pages because the SRMMU gives us the capabilities to
+ * enforce all the protection levels that vma's can have.
+ * XXX But for now...
+ */
+#define SRMMU_PAGE_NONE    __pgprot(SRMMU_CACHE | \
+				    SRMMU_PRIV | SRMMU_REF)
+#define SRMMU_PAGE_SHARED  __pgprot(SRMMU_VALID | SRMMU_CACHE | \
+				    SRMMU_EXEC | SRMMU_WRITE | SRMMU_REF)
+#define SRMMU_PAGE_COPY    __pgprot(SRMMU_VALID | SRMMU_CACHE | \
+				    SRMMU_EXEC | SRMMU_REF)
+#define SRMMU_PAGE_RDONLY  __pgprot(SRMMU_VALID | SRMMU_CACHE | \
+				    SRMMU_EXEC | SRMMU_REF)
+#define SRMMU_PAGE_KERNEL  __pgprot(SRMMU_VALID | SRMMU_CACHE | SRMMU_PRIV | \
+				    SRMMU_DIRTY | SRMMU_REF)
+
+/* SRMMU Register addresses in ASI 0x4.  These are valid for all
+ * current SRMMU implementations that exist.
+ */
+#define SRMMU_CTRL_REG           0x00000000
+#define SRMMU_CTXTBL_PTR         0x00000100
+#define SRMMU_CTX_REG            0x00000200
+#define SRMMU_FAULT_STATUS       0x00000300
+#define SRMMU_FAULT_ADDR         0x00000400
+
+#define WINDOW_FLUSH(tmp1, tmp2)					\
+	mov	0, tmp1;						\
+98:	ld	[%g6 + TI_UWINMASK], tmp2;				\
+	orcc	%g0, tmp2, %g0;						\
+	add	tmp1, 1, tmp1;						\
+	bne	98b;							\
+	 save	%sp, -64, %sp;						\
+99:	subcc	tmp1, 1, tmp1;						\
+	bne	99b;							\
+	 restore %g0, %g0, %g0;
+
+#ifndef __ASSEMBLY__
+extern unsigned long last_valid_pfn;
+
+/* This makes sense. Honest it does - Anton */
+/* XXX Yes but it's ugly as sin.  FIXME. -KMW */
+extern void *srmmu_nocache_pool;
+#define __nocache_pa(VADDR) (((unsigned long)VADDR) - SRMMU_NOCACHE_VADDR + __pa((unsigned long)srmmu_nocache_pool))
+#define __nocache_va(PADDR) (__va((unsigned long)PADDR) - (unsigned long)srmmu_nocache_pool + SRMMU_NOCACHE_VADDR)
+#define __nocache_fix(VADDR) __va(__nocache_pa(VADDR))
+
+/* Accessing the MMU control register. */
+unsigned int srmmu_get_mmureg(void);
+void srmmu_set_mmureg(unsigned long regval);
+void srmmu_set_ctable_ptr(unsigned long paddr);
+void srmmu_set_context(int context);
+int srmmu_get_context(void);
+unsigned int srmmu_get_fstatus(void);
+unsigned int srmmu_get_faddr(void);
+
+/* This is guaranteed on all SRMMU's. */
+static inline void srmmu_flush_whole_tlb(void)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t": :
+			     "r" (0x400),        /* Flush entire TLB!! */
+			     "i" (ASI_M_FLUSH_PROBE) : "memory");
+
+}
+
+static inline int
+srmmu_get_pte (unsigned long addr)
+{
+	register unsigned long entry;
+        
+	__asm__ __volatile__("\n\tlda [%1] %2,%0\n\t" :
+				"=r" (entry):
+				"r" ((addr & 0xfffff000) | 0x400), "i" (ASI_M_FLUSH_PROBE));
+	return entry;
+}
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(_SPARC_PGTSRMMU_H) */
diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h
new file mode 100644
index 0000000..4003c35
--- /dev/null
+++ b/arch/sparc/include/asm/pil.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_PIL_H
+#define _SPARC64_PIL_H
+
+/* To avoid some locking problems, we hard allocate certain PILs
+ * for SMP cross call messages that must do a etrap/rtrap.
+ *
+ * A local_irq_disable() does not block the cross call delivery, so
+ * when SMP locking is an issue we reschedule the event into a PIL
+ * interrupt which is blocked by local_irq_disable().
+ *
+ * In fact any XCALL which has to etrap/rtrap has a problem because
+ * it is difficult to prevent rtrap from running BH's, and that would
+ * need to be done if the XCALL arrived while %pil==PIL_NORMAL_MAX.
+ *
+ * Finally, in order to handle profiling events even when a
+ * local_irq_disable() is in progress, we only disable up to level 14
+ * interrupts.  Profile counter overflow interrupts arrive at level
+ * 15.
+ */
+#define PIL_SMP_CALL_FUNC	1
+#define PIL_SMP_RECEIVE_SIGNAL	2
+#define PIL_SMP_CAPTURE		3
+#define PIL_DEVICE_IRQ		5
+#define PIL_SMP_CALL_FUNC_SNGL	6
+#define PIL_DEFERRED_PCR_WORK	7
+#define PIL_KGDB_CAPTURE	8
+#define PIL_NORMAL_MAX		14
+#define PIL_NMI			15
+
+#endif /* !(_SPARC64_PIL_H) */
diff --git a/arch/sparc/include/asm/processor.h b/arch/sparc/include/asm/processor.h
new file mode 100644
index 0000000..18295ea
--- /dev/null
+++ b/arch/sparc/include/asm/processor.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_PROCESSOR_H
+#define ___ASM_SPARC_PROCESSOR_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/processor_64.h>
+#else
+#include <asm/processor_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
new file mode 100644
index 0000000..192493c
--- /dev/null
+++ b/arch/sparc/include/asm/processor_32.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* include/asm/processor.h
+ *
+ * Copyright (C) 1994 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef __ASM_SPARC_PROCESSOR_H
+#define __ASM_SPARC_PROCESSOR_H
+
+/*
+ * Sparc32 implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; __asm__("sethi %%hi(1f), %0; or %0, %%lo(1f), %0;\n1:" : "=r" (pc)); pc; })
+
+#include <asm/psr.h>
+#include <asm/ptrace.h>
+#include <asm/head.h>
+#include <asm/signal.h>
+#include <asm/page.h>
+
+/* Whee, this is STACK_TOP + PAGE_SIZE and the lowest kernel address too...
+ * That one page is used to protect kernel from intruders, so that
+ * we can make our access_ok test faster
+ */
+#define TASK_SIZE	PAGE_OFFSET
+#ifdef __KERNEL__
+#define STACK_TOP	(PAGE_OFFSET - PAGE_SIZE)
+#define STACK_TOP_MAX	STACK_TOP
+#endif /* __KERNEL__ */
+
+struct task_struct;
+
+#ifdef __KERNEL__
+struct fpq {
+	unsigned long *insn_addr;
+	unsigned long insn;
+};
+#endif
+
+typedef struct {
+	int seg;
+} mm_segment_t;
+
+/* The Sparc processor specific thread struct. */
+struct thread_struct {
+	struct pt_regs *kregs;
+	unsigned int _pad1;
+
+	/* Special child fork kpsr/kwim values. */
+	unsigned long fork_kpsr __attribute__ ((aligned (8)));
+	unsigned long fork_kwim;
+
+	/* Floating point regs */
+	unsigned long   float_regs[32] __attribute__ ((aligned (8)));
+	unsigned long   fsr;
+	unsigned long   fpqdepth;
+	struct fpq	fpqueue[16];
+	unsigned long flags;
+	mm_segment_t current_ds;
+};
+
+#define SPARC_FLAG_KTHREAD      0x1    /* task is a kernel thread */
+#define SPARC_FLAG_UNALIGNED    0x2    /* is allowed to do unaligned accesses */
+
+#define INIT_THREAD  { \
+	.flags = SPARC_FLAG_KTHREAD, \
+	.current_ds = KERNEL_DS, \
+}
+
+/* Do necessary setup to start up a newly executed thread. */
+static inline void start_thread(struct pt_regs * regs, unsigned long pc,
+				    unsigned long sp)
+{
+	register unsigned long zero asm("g1");
+
+	regs->psr = (regs->psr & (PSR_CWP)) | PSR_S;
+	regs->pc = ((pc & (~3)) - 4);
+	regs->npc = regs->pc + 4;
+	regs->y = 0;
+	zero = 0;
+	__asm__ __volatile__("std\t%%g0, [%0 + %3 + 0x00]\n\t"
+			     "std\t%%g0, [%0 + %3 + 0x08]\n\t"
+			     "std\t%%g0, [%0 + %3 + 0x10]\n\t"
+			     "std\t%%g0, [%0 + %3 + 0x18]\n\t"
+			     "std\t%%g0, [%0 + %3 + 0x20]\n\t"
+			     "std\t%%g0, [%0 + %3 + 0x28]\n\t"
+			     "std\t%%g0, [%0 + %3 + 0x30]\n\t"
+			     "st\t%1, [%0 + %3 + 0x38]\n\t"
+			     "st\t%%g0, [%0 + %3 + 0x3c]"
+			     : /* no outputs */
+			     : "r" (regs),
+			       "r" (sp - sizeof(struct reg_window32)),
+			       "r" (zero),
+			       "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))
+			     : "memory");
+}
+
+/* Free all resources held by a thread. */
+#define release_thread(tsk)		do { } while(0)
+
+unsigned long get_wchan(struct task_struct *);
+
+#define task_pt_regs(tsk) ((tsk)->thread.kregs)
+#define KSTK_EIP(tsk)  ((tsk)->thread.kregs->pc)
+#define KSTK_ESP(tsk)  ((tsk)->thread.kregs->u_regs[UREG_FP])
+
+#ifdef __KERNEL__
+
+extern struct task_struct *last_task_used_math;
+int do_mathemu(struct pt_regs *regs, struct task_struct *fpt);
+
+#define cpu_relax()	barrier()
+
+extern void (*sparc_idle)(void);
+
+#endif
+
+#endif /* __ASM_SPARC_PROCESSOR_H */
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
new file mode 100644
index 0000000..aac23d4
--- /dev/null
+++ b/arch/sparc/include/asm/processor_64.h
@@ -0,0 +1,263 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm/processor.h
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef __ASM_SPARC64_PROCESSOR_H
+#define __ASM_SPARC64_PROCESSOR_H
+
+/*
+ * Sparc64 implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ void *pc; __asm__("rd %%pc, %0" : "=r" (pc)); pc; })
+
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+/*
+ * User lives in his very own context, and cannot reference us. Note
+ * that TASK_SIZE is a misnomer, it really gives maximum user virtual
+ * address that the kernel will allocate out.
+ *
+ * XXX No longer using virtual page tables, kill this upper limit...
+ */
+#define VA_BITS		44
+#ifndef __ASSEMBLY__
+#define VPTE_SIZE	(1UL << (VA_BITS - PAGE_SHIFT + 3))
+#else
+#define VPTE_SIZE	(1 << (VA_BITS - PAGE_SHIFT + 3))
+#endif
+
+#define TASK_SIZE_OF(tsk) \
+	(test_tsk_thread_flag(tsk,TIF_32BIT) ? \
+	 (1UL << 32UL) : ((unsigned long)-VPTE_SIZE))
+#define TASK_SIZE \
+	(test_thread_flag(TIF_32BIT) ? \
+	 (1UL << 32UL) : ((unsigned long)-VPTE_SIZE))
+#ifdef __KERNEL__
+
+#define STACK_TOP32	((1UL << 32UL) - PAGE_SIZE)
+#define STACK_TOP64	(0x0000080000000000UL - (1UL << 32UL))
+
+#define STACK_TOP	(test_thread_flag(TIF_32BIT) ? \
+			 STACK_TOP32 : STACK_TOP64)
+
+#define STACK_TOP_MAX	STACK_TOP64
+
+#endif
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+	unsigned char seg;
+} mm_segment_t;
+
+/* The Sparc processor specific thread struct. */
+/* XXX This should die, everything can go into thread_info now. */
+struct thread_struct {
+#ifdef CONFIG_DEBUG_SPINLOCK
+	/* How many spinlocks held by this thread.
+	 * Used with spin lock debugging to catch tasks
+	 * sleeping illegally with locks held.
+	 */
+	int smp_lock_count;
+	unsigned int smp_lock_pc;
+#else
+	int dummy; /* f'in gcc bug... */
+#endif
+};
+
+#endif /* !(__ASSEMBLY__) */
+
+#ifndef CONFIG_DEBUG_SPINLOCK
+#define INIT_THREAD  {			\
+	0,				\
+}
+#else /* CONFIG_DEBUG_SPINLOCK */
+#define INIT_THREAD  {					\
+/* smp_lock_count, smp_lock_pc, */			\
+   0,		   0,					\
+}
+#endif /* !(CONFIG_DEBUG_SPINLOCK) */
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/fpumacro.h>
+
+struct task_struct;
+
+/* On Uniprocessor, even in RMO processes see TSO semantics */
+#ifdef CONFIG_SMP
+#define TSTATE_INITIAL_MM	TSTATE_TSO
+#else
+#define TSTATE_INITIAL_MM	TSTATE_RMO
+#endif
+
+/* Do necessary setup to start up a newly executed thread. */
+#define start_thread(regs, pc, sp) \
+do { \
+	unsigned long __asi = ASI_PNF; \
+	regs->tstate = (regs->tstate & (TSTATE_CWP)) | (TSTATE_INITIAL_MM|TSTATE_IE) | (__asi << 24UL); \
+	regs->tpc = ((pc & (~3)) - 4); \
+	regs->tnpc = regs->tpc + 4; \
+	regs->y = 0; \
+	set_thread_wstate(1 << 3); \
+	if (current_thread_info()->utraps) { \
+		if (*(current_thread_info()->utraps) < 2) \
+			kfree(current_thread_info()->utraps); \
+		else \
+			(*(current_thread_info()->utraps))--; \
+		current_thread_info()->utraps = NULL; \
+	} \
+	__asm__ __volatile__( \
+	"stx		%%g0, [%0 + %2 + 0x00]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x08]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x10]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x18]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x20]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x28]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x30]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x38]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x40]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x48]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x50]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x58]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x60]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x68]\n\t" \
+	"stx		%1,   [%0 + %2 + 0x70]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x78]\n\t" \
+	"wrpr		%%g0, (1 << 3), %%wstate\n\t" \
+	: \
+	: "r" (regs), "r" (sp - sizeof(struct reg_window) - STACK_BIAS), \
+	  "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))); \
+	fprs_write(0);	\
+	current_thread_info()->xfsr[0] = 0;	\
+	current_thread_info()->fpsaved[0] = 0;	\
+	regs->tstate &= ~TSTATE_PEF;	\
+} while (0)
+
+#define start_thread32(regs, pc, sp) \
+do { \
+	unsigned long __asi = ASI_PNF; \
+	pc &= 0x00000000ffffffffUL; \
+	sp &= 0x00000000ffffffffUL; \
+	regs->tstate = (regs->tstate & (TSTATE_CWP))|(TSTATE_INITIAL_MM|TSTATE_IE|TSTATE_AM) | (__asi << 24UL); \
+	regs->tpc = ((pc & (~3)) - 4); \
+	regs->tnpc = regs->tpc + 4; \
+	regs->y = 0; \
+	set_thread_wstate(2 << 3); \
+	if (current_thread_info()->utraps) { \
+		if (*(current_thread_info()->utraps) < 2) \
+			kfree(current_thread_info()->utraps); \
+		else \
+			(*(current_thread_info()->utraps))--; \
+		current_thread_info()->utraps = NULL; \
+	} \
+	__asm__ __volatile__( \
+	"stx		%%g0, [%0 + %2 + 0x00]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x08]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x10]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x18]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x20]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x28]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x30]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x38]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x40]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x48]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x50]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x58]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x60]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x68]\n\t" \
+	"stx		%1,   [%0 + %2 + 0x70]\n\t" \
+	"stx		%%g0, [%0 + %2 + 0x78]\n\t" \
+	"wrpr		%%g0, (2 << 3), %%wstate\n\t" \
+	: \
+	: "r" (regs), "r" (sp - sizeof(struct reg_window32)), \
+	  "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))); \
+	fprs_write(0);	\
+	current_thread_info()->xfsr[0] = 0;	\
+	current_thread_info()->fpsaved[0] = 0;	\
+	regs->tstate &= ~TSTATE_PEF;	\
+} while (0)
+
+/* Free all resources held by a thread. */
+#define release_thread(tsk)		do { } while (0)
+
+unsigned long get_wchan(struct task_struct *task);
+
+#define task_pt_regs(tsk) (task_thread_info(tsk)->kregs)
+#define KSTK_EIP(tsk)  (task_pt_regs(tsk)->tpc)
+#define KSTK_ESP(tsk)  (task_pt_regs(tsk)->u_regs[UREG_FP])
+
+/* Please see the commentary in asm/backoff.h for a description of
+ * what these instructions are doing and how they have been chosen.
+ * To make a long story short, we are trying to yield the current cpu
+ * strand during busy loops.
+ */
+#ifdef	BUILD_VDSO
+#define	cpu_relax()	asm volatile("\n99:\n\t"			\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     ::: "memory")
+#else /* ! BUILD_VDSO */
+#define cpu_relax()	asm volatile("\n99:\n\t"			\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     "rd	%%ccr, %%g0\n\t"	\
+				     ".section	.pause_3insn_patch,\"ax\"\n\t"\
+				     ".word	99b\n\t"		\
+				     "wr	%%g0, 128, %%asr27\n\t"	\
+				     "nop\n\t"				\
+				     "nop\n\t"				\
+				     ".previous"			\
+				     ::: "memory")
+#endif
+
+/* Prefetch support.  This is tuned for UltraSPARC-III and later.
+ * UltraSPARC-I will treat these as nops, and UltraSPARC-II has
+ * a shallower prefetch queue than later chips.
+ */
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+static inline void prefetch(const void *x)
+{
+	/* We do not use the read prefetch mnemonic because that
+	 * prefetches into the prefetch-cache which only is accessible
+	 * by floating point operations in UltraSPARC-III and later.
+	 * By contrast, "#one_write" prefetches into the L2 cache
+	 * in shared state.
+	 */
+	__asm__ __volatile__("prefetch [%0], #one_write"
+			     : /* no outputs */
+			     : "r" (x));
+}
+
+static inline void prefetchw(const void *x)
+{
+	/* The most optimal prefetch to use for writes is
+	 * "#n_writes".  This brings the cacheline into the
+	 * L2 cache in "owned" state.
+	 */
+	__asm__ __volatile__("prefetch [%0], #n_writes"
+			     : /* no outputs */
+			     : "r" (x));
+}
+
+#define spin_lock_prefetch(x)	prefetchw(x)
+
+#define HAVE_ARCH_PICK_MMAP_LAYOUT
+
+int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap);
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__ASM_SPARC64_PROCESSOR_H) */
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
new file mode 100644
index 0000000..d955c8d
--- /dev/null
+++ b/arch/sparc/include/asm/prom.h
@@ -0,0 +1,63 @@
+#include <linux/of.h>	/* linux/of.h gets to determine #include ordering */
+#ifndef _SPARC_PROM_H
+#define _SPARC_PROM_H
+#ifdef __KERNEL__
+
+/*
+ * Definitions for talking to the Open Firmware PROM on
+ * Power Macintosh computers.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp.
+ * Updates for SPARC by David S. Miller
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/of_pdt.h>
+#include <linux/proc_fs.h>
+#include <linux/mutex.h>
+#include <linux/atomic.h>
+#include <linux/irqdomain.h>
+
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT	2
+#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT	1
+
+#define of_compat_cmp(s1, s2, l)	strncmp((s1), (s2), (l))
+#define of_prop_cmp(s1, s2)		strcasecmp((s1), (s2))
+#define of_node_cmp(s1, s2)		strcmp((s1), (s2))
+
+struct of_irq_controller {
+	unsigned int	(*irq_build)(struct device_node *, unsigned int, void *);
+	void		*data;
+};
+
+struct device_node *of_find_node_by_cpuid(int cpuid);
+int of_set_property(struct device_node *node, const char *name, void *val, int len);
+extern struct mutex of_set_property_mutex;
+int of_getintprop_default(struct device_node *np,
+			  const char *name,
+				 int def);
+int of_find_in_proplist(const char *list, const char *match, int len);
+
+void prom_build_devicetree(void);
+void of_populate_present_mask(void);
+void of_fill_in_cpu_data(void);
+
+struct resource;
+void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name);
+void of_iounmap(struct resource *res, void __iomem *base, unsigned long size);
+
+extern struct device_node *of_console_device;
+extern char *of_console_path;
+extern char *of_console_options;
+
+void irq_trans_init(struct device_node *dp);
+char *build_path_component(struct device_node *dp);
+
+#endif /* __KERNEL__ */
+#endif /* _SPARC_PROM_H */
diff --git a/arch/sparc/include/asm/psr.h b/arch/sparc/include/asm/psr.h
new file mode 100644
index 0000000..65127ce
--- /dev/null
+++ b/arch/sparc/include/asm/psr.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * psr.h: This file holds the macros for masking off various parts of
+ *        the processor status register on the Sparc. This is valid
+ *        for Version 8. On the V9 this is renamed to the PSTATE
+ *        register and its members are accessed as fields like
+ *        PSTATE.PRIV for the current CPU privilege level.
+ *
+ * Copyright (C) 1994 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef __LINUX_SPARC_PSR_H
+#define __LINUX_SPARC_PSR_H
+
+#include <uapi/asm/psr.h>
+
+
+#ifndef __ASSEMBLY__
+/* Get the %psr register. */
+static inline unsigned int get_psr(void)
+{
+	unsigned int psr;
+	__asm__ __volatile__(
+		"rd	%%psr, %0\n\t"
+		"nop\n\t"
+		"nop\n\t"
+		"nop\n\t"
+	: "=r" (psr)
+	: /* no inputs */
+	: "memory");
+
+	return psr;
+}
+
+static inline void put_psr(unsigned int new_psr)
+{
+	__asm__ __volatile__(
+		"wr	%0, 0x0, %%psr\n\t"
+		"nop\n\t"
+		"nop\n\t"
+		"nop\n\t"
+	: /* no outputs */
+	: "r" (new_psr)
+	: "memory", "cc");
+}
+
+/* Get the %fsr register.  Be careful, make sure the floating point
+ * enable bit is set in the %psr when you execute this or you will
+ * incur a trap.
+ */
+
+extern unsigned int fsr_storage;
+
+static inline unsigned int get_fsr(void)
+{
+	unsigned int fsr = 0;
+
+	__asm__ __volatile__(
+		"st	%%fsr, %1\n\t"
+		"ld	%1, %0\n\t"
+	: "=r" (fsr)
+	: "m" (fsr_storage));
+
+	return fsr;
+}
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__LINUX_SPARC_PSR_H) */
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
new file mode 100644
index 0000000..71dd82b
--- /dev/null
+++ b/arch/sparc/include/asm/ptrace.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_PTRACE_H
+#define __SPARC_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+
+#if defined(__sparc__) && defined(__arch64__)
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+#include <linux/threads.h>
+#include <asm/switch_to.h>
+
+static inline int pt_regs_trap_type(struct pt_regs *regs)
+{
+	return regs->magic & 0x1ff;
+}
+
+static inline bool pt_regs_is_syscall(struct pt_regs *regs)
+{
+	return (regs->tstate & TSTATE_SYSCALL);
+}
+
+static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
+{
+	return (regs->tstate &= ~TSTATE_SYSCALL);
+}
+
+#define arch_ptrace_stop_needed(exit_code, info) \
+({	flush_user_windows(); \
+	get_thread_wsaved() != 0; \
+})
+
+#define arch_ptrace_stop(exit_code, info) \
+	synchronize_user_stack()
+
+#define current_pt_regs() \
+	((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
+struct global_reg_snapshot {
+	unsigned long		tstate;
+	unsigned long		tpc;
+	unsigned long		tnpc;
+	unsigned long		o7;
+	unsigned long		i7;
+	unsigned long		rpc;
+	struct thread_info	*thread;
+	unsigned long		pad1;
+};
+
+struct global_pmu_snapshot {
+	unsigned long		pcr[4];
+	unsigned long		pic[4];
+};
+
+union global_cpu_snapshot {
+	struct global_reg_snapshot	reg;
+	struct global_pmu_snapshot	pmu;
+};
+
+extern union global_cpu_snapshot global_cpu_snapshot[NR_CPUS];
+
+#define force_successful_syscall_return() set_thread_noerror(1)
+#define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
+#define instruction_pointer(regs) ((regs)->tpc)
+#define instruction_pointer_set(regs, val) do { \
+		(regs)->tpc = (val); \
+		(regs)->tnpc = (val)+4; \
+	} while (0)
+#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	return !(regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY));
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	return regs->u_regs[UREG_I0];
+}
+#ifdef CONFIG_SMP
+unsigned long profile_pc(struct pt_regs *);
+#else
+#define profile_pc(regs) instruction_pointer(regs)
+#endif
+
+#define MAX_REG_OFFSET (offsetof(struct pt_regs, magic))
+
+int regs_query_register_offset(const char *name);
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:	pt_regs from which register value is gotten
+ * @offset:	offset number of the register.
+ *
+ * regs_get_register returns the value of a register whose
+ * offset from @regs. The @offset is the offset of the register
+ * in struct pt_regs. If @offset is bigger than MAX_REG_OFFSET,
+ * this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+					     unsigned long offset)
+{
+	if (unlikely(offset >= MAX_REG_OFFSET))
+		return 0;
+	if (offset == PT_V9_Y)
+		return *(unsigned int *)((unsigned long)regs + offset);
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+/* Valid only for Kernel mode traps. */
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+	return regs->u_regs[UREG_I6];
+}
+#else /* __ASSEMBLY__ */
+#endif /* __ASSEMBLY__ */
+#else /* (defined(__sparc__) && defined(__arch64__)) */
+#ifndef __ASSEMBLY__
+#include <asm/switch_to.h>
+
+static inline bool pt_regs_is_syscall(struct pt_regs *regs)
+{
+	return (regs->psr & PSR_SYSCALL);
+}
+
+static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
+{
+	return (regs->psr &= ~PSR_SYSCALL);
+}
+
+#define arch_ptrace_stop_needed(exit_code, info) \
+({	flush_user_windows(); \
+	current_thread_info()->w_saved != 0;	\
+})
+
+#define arch_ptrace_stop(exit_code, info) \
+	synchronize_user_stack()
+
+#define current_pt_regs() \
+	((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
+#define user_mode(regs) (!((regs)->psr & PSR_PS))
+#define instruction_pointer(regs) ((regs)->pc)
+#define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
+unsigned long profile_pc(struct pt_regs *);
+#else /* (!__ASSEMBLY__) */
+#endif /* (!__ASSEMBLY__) */
+#endif /* (defined(__sparc__) && defined(__arch64__)) */
+#define STACK_BIAS		2047
+
+/* global_reg_snapshot offsets */
+#define GR_SNAP_TSTATE	0x00
+#define GR_SNAP_TPC	0x08
+#define GR_SNAP_TNPC	0x10
+#define GR_SNAP_O7	0x18
+#define GR_SNAP_I7	0x20
+#define GR_SNAP_RPC	0x28
+#define GR_SNAP_THREAD	0x30
+#define GR_SNAP_PAD1	0x38
+
+#endif /* !(__SPARC_PTRACE_H) */
diff --git a/arch/sparc/include/asm/qrwlock.h b/arch/sparc/include/asm/qrwlock.h
new file mode 100644
index 0000000..c277729
--- /dev/null
+++ b/arch/sparc/include/asm/qrwlock.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC_QRWLOCK_H
+#define _ASM_SPARC_QRWLOCK_H
+
+#include <asm-generic/qrwlock_types.h>
+#include <asm-generic/qrwlock.h>
+
+#endif /* _ASM_SPARC_QRWLOCK_H */
diff --git a/arch/sparc/include/asm/qspinlock.h b/arch/sparc/include/asm/qspinlock.h
new file mode 100644
index 0000000..48808f3
--- /dev/null
+++ b/arch/sparc/include/asm/qspinlock.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC_QSPINLOCK_H
+#define _ASM_SPARC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_SPARC_QSPINLOCK_H */
diff --git a/arch/sparc/include/asm/ross.h b/arch/sparc/include/asm/ross.h
new file mode 100644
index 0000000..79a54d6
--- /dev/null
+++ b/arch/sparc/include/asm/ross.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ross.h: Ross module specific definitions and defines.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_ROSS_H
+#define _SPARC_ROSS_H
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+/* Ross made Hypersparcs have a %psr 'impl' field of '0001'.  The 'vers'
+ * field has '1111'.
+ */
+
+/* The MMU control register fields on the HyperSparc.
+ *
+ * -----------------------------------------------------------------
+ * |implvers| RSV |CWR|SE|WBE| MID |BM| C|CS|MR|CM|RSV|CE|RSV|NF|ME|
+ * -----------------------------------------------------------------
+ *  31    24 23-22 21  20  19 18-15 14 13 12 11 10  9   8 7-2  1  0
+ *
+ * Phew, lots of fields there ;-)
+ *
+ * CWR: Cache Wrapping Enabled, if one cache wrapping is on.
+ * SE: Snoop Enable, turns on bus snooping for cache activity if one.
+ * WBE: Write Buffer Enable, one turns it on.
+ * MID: The ModuleID of the chip for MBus transactions.
+ * BM: Boot-Mode. One indicates the MMU is in boot mode.
+ * C: Indicates whether accesses are cachable while the MMU is
+ *    disabled.
+ * CS: Cache Size -- 0 = 128k, 1 = 256k
+ * MR: Memory Reflection, one indicates that the memory bus connected
+ *     to the MBus supports memory reflection.
+ * CM: Cache Mode -- 0 = write-through, 1 = copy-back
+ * CE: Cache Enable -- 0 = no caching, 1 = cache is on
+ * NF: No Fault -- 0 = faults trap the CPU from supervisor mode
+ *                 1 = faults from supervisor mode do not generate traps
+ * ME: MMU Enable -- 0 = MMU is off, 1 = MMU is on
+ */
+
+#define HYPERSPARC_CWENABLE   0x00200000
+#define HYPERSPARC_SBENABLE   0x00100000
+#define HYPERSPARC_WBENABLE   0x00080000
+#define HYPERSPARC_MIDMASK    0x00078000
+#define HYPERSPARC_BMODE      0x00004000
+#define HYPERSPARC_ACENABLE   0x00002000
+#define HYPERSPARC_CSIZE      0x00001000
+#define HYPERSPARC_MRFLCT     0x00000800
+#define HYPERSPARC_CMODE      0x00000400
+#define HYPERSPARC_CENABLE    0x00000100
+#define HYPERSPARC_NFAULT     0x00000002
+#define HYPERSPARC_MENABLE    0x00000001
+
+
+/* The ICCR instruction cache register on the HyperSparc.
+ *
+ * -----------------------------------------------
+ * |                                 | FTD | ICE |
+ * -----------------------------------------------
+ *  31                                  1     0
+ *
+ * This register is accessed using the V8 'wrasr' and 'rdasr'
+ * opcodes, since not all assemblers understand them and those
+ * that do use different semantics I will just hard code the
+ * instruction with a '.word' statement.
+ *
+ * FTD:  If set to one flush instructions executed during an
+ *       instruction cache hit occurs, the corresponding line
+ *       for said cache-hit is invalidated.  If FTD is zero,
+ *       an unimplemented 'flush' trap will occur when any
+ *       flush is executed by the processor.
+ *
+ * ICE:  If set to one, the instruction cache is enabled.  If
+ *       zero, the cache will not be used for instruction fetches.
+ *
+ * All other bits are read as zeros, and writes to them have no
+ * effect.
+ *
+ * Wheee, not many assemblers understand the %iccr register nor
+ * the generic asr r/w instructions.
+ *
+ *  1000 0011 0100 0111 1100 0000 0000 0000   ! rd %iccr, %g1
+ *
+ * 0x  8    3    4    7    c    0    0    0   ! 0x8347c000
+ *
+ *  1011 1111 1000 0000 0110 0000 0000 0000   ! wr %g1, 0x0, %iccr
+ *
+ * 0x  b    f    8    0    6    0    0    0   ! 0xbf806000
+ *
+ */
+
+#define HYPERSPARC_ICCR_FTD     0x00000002
+#define HYPERSPARC_ICCR_ICE     0x00000001
+
+#ifndef __ASSEMBLY__
+
+static inline unsigned int get_ross_icr(void)
+{
+	unsigned int icreg;
+
+	__asm__ __volatile__(".word 0x8347c000\n\t" /* rd %iccr, %g1 */
+			     "mov %%g1, %0\n\t"
+			     : "=r" (icreg)
+			     : /* no inputs */
+			     : "g1", "memory");
+
+	return icreg;
+}
+
+static inline void put_ross_icr(unsigned int icreg)
+{
+	__asm__ __volatile__("or %%g0, %0, %%g1\n\t"
+			     ".word 0xbf806000\n\t" /* wr %g1, 0x0, %iccr */
+			     "nop\n\t"
+			     "nop\n\t"
+			     "nop\n\t"
+			     : /* no outputs */
+			     : "r" (icreg)
+			     : "g1", "memory");
+
+	return;
+}
+
+/* HyperSparc specific cache flushing. */
+
+/* This is for the on-chip instruction cache. */
+static inline void hyper_flush_whole_icache(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t"
+			     : /* no outputs */
+			     : "i" (ASI_M_FLUSH_IWHOLE)
+			     : "memory");
+	return;
+}
+
+extern int vac_cache_size;
+extern int vac_line_size;
+
+static inline void hyper_clear_all_tags(void)
+{
+	unsigned long addr;
+
+	for(addr = 0; addr < vac_cache_size; addr += vac_line_size)
+		__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+				     : /* no outputs */
+				     : "r" (addr), "i" (ASI_M_DATAC_TAG)
+				     : "memory");
+}
+
+static inline void hyper_flush_unconditional_combined(void)
+{
+	unsigned long addr;
+
+	for (addr = 0; addr < vac_cache_size; addr += vac_line_size)
+		__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+				     : /* no outputs */
+				     : "r" (addr), "i" (ASI_M_FLUSH_CTX)
+				     : "memory");
+}
+
+static inline void hyper_flush_cache_user(void)
+{
+	unsigned long addr;
+
+	for (addr = 0; addr < vac_cache_size; addr += vac_line_size)
+		__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+				     : /* no outputs */
+				     : "r" (addr), "i" (ASI_M_FLUSH_USER)
+				     : "memory");
+}
+
+static inline void hyper_flush_cache_page(unsigned long page)
+{
+	unsigned long end;
+
+	page &= PAGE_MASK;
+	end = page + PAGE_SIZE;
+	while (page < end) {
+		__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+				     : /* no outputs */
+				     : "r" (page), "i" (ASI_M_FLUSH_PAGE)
+				     : "memory");
+		page += vac_line_size;
+	}
+}
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(_SPARC_ROSS_H) */
diff --git a/arch/sparc/include/asm/sbi.h b/arch/sparc/include/asm/sbi.h
new file mode 100644
index 0000000..4d6026c
--- /dev/null
+++ b/arch/sparc/include/asm/sbi.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sbi.h:  SBI (Sbus Interface on sun4d) definitions
+ *
+ * Copyright (C) 1997 Jakub Jelinek <jj@sunsite.mff.cuni.cz>
+ */
+
+#ifndef _SPARC_SBI_H
+#define _SPARC_SBI_H
+
+#include <asm/obio.h>
+
+/* SBI */
+struct sbi_regs {
+/* 0x0000 */	u32		cid;		/* Component ID */
+/* 0x0004 */	u32		ctl;		/* Control */
+/* 0x0008 */	u32		status;		/* Status */
+		u32		_unused1;
+		
+/* 0x0010 */	u32		cfg0;		/* Slot0 config reg */
+/* 0x0014 */	u32		cfg1;		/* Slot1 config reg */
+/* 0x0018 */	u32		cfg2;		/* Slot2 config reg */
+/* 0x001c */	u32		cfg3;		/* Slot3 config reg */
+
+/* 0x0020 */	u32		stb0;		/* Streaming buf control for slot 0 */
+/* 0x0024 */	u32		stb1;		/* Streaming buf control for slot 1 */
+/* 0x0028 */	u32		stb2;		/* Streaming buf control for slot 2 */
+/* 0x002c */	u32		stb3;		/* Streaming buf control for slot 3 */
+
+/* 0x0030 */	u32		intr_state;	/* Interrupt state */
+/* 0x0034 */	u32		intr_tid;	/* Interrupt target ID */
+/* 0x0038 */	u32		intr_diag;	/* Interrupt diagnostics */
+};
+
+#define SBI_CID			0x02800000
+#define SBI_CTL			0x02800004
+#define SBI_STATUS		0x02800008
+#define SBI_CFG0		0x02800010
+#define SBI_CFG1		0x02800014
+#define SBI_CFG2		0x02800018
+#define SBI_CFG3		0x0280001c
+#define SBI_STB0		0x02800020
+#define SBI_STB1		0x02800024
+#define SBI_STB2		0x02800028
+#define SBI_STB3		0x0280002c
+#define SBI_INTR_STATE		0x02800030
+#define SBI_INTR_TID		0x02800034
+#define SBI_INTR_DIAG		0x02800038
+
+/* Burst bits for 8, 16, 32, 64 are in cfgX registers at bits 2, 3, 4, 5 respectively */
+#define SBI_CFG_BURST_MASK	0x0000001e
+
+/* How to make devid from sbi no */
+#define SBI2DEVID(sbino) ((sbino<<4)|2)
+
+/* intr_state has 4 bits for slots 0 .. 3 and these bits are repeated for each sbus irq level
+ *
+ *		   +-------+-------+-------+-------+-------+-------+-------+-------+
+ *  SBUS IRQ LEVEL |   7   |   6   |   5   |   4   |   3   |   2   |   1   |       |
+ *		   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Reser |
+ *  SLOT #         |3|2|1|0|3|2|1|0|3|2|1|0|3|2|1|0|3|2|1|0|3|2|1|0|3|2|1|0|  ved  |
+ *                 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-------+
+ *  Bits           31      27      23      19      15      11      7       3      0
+ */
+
+
+#ifndef __ASSEMBLY__
+
+static inline int acquire_sbi(int devid, int mask)
+{
+	__asm__ __volatile__ ("swapa [%2] %3, %0" :
+			      "=r" (mask) :
+			      "0" (mask),
+			      "r" (ECSR_DEV_BASE(devid) | SBI_INTR_STATE),
+			      "i" (ASI_M_CTL));
+	return mask;
+}
+
+static inline void release_sbi(int devid, int mask)
+{
+	__asm__ __volatile__ ("sta %0, [%1] %2" : :
+			      "r" (mask),
+			      "r" (ECSR_DEV_BASE(devid) | SBI_INTR_STATE),
+			      "i" (ASI_M_CTL));
+}
+
+static inline void set_sbi_tid(int devid, int targetid)
+{
+	__asm__ __volatile__ ("sta %0, [%1] %2" : :
+			      "r" (targetid),
+			      "r" (ECSR_DEV_BASE(devid) | SBI_INTR_TID),
+			      "i" (ASI_M_CTL));
+}
+
+static inline int get_sbi_ctl(int devid, int cfgno)
+{
+	int cfg;
+	
+	__asm__ __volatile__ ("lda [%1] %2, %0" :
+			      "=r" (cfg) :
+			      "r" ((ECSR_DEV_BASE(devid) | SBI_CFG0) + (cfgno<<2)),
+			      "i" (ASI_M_CTL));
+	return cfg;
+}
+
+static inline void set_sbi_ctl(int devid, int cfgno, int cfg)
+{
+	__asm__ __volatile__ ("sta %0, [%1] %2" : :
+			      "r" (cfg),
+			      "r" ((ECSR_DEV_BASE(devid) | SBI_CFG0) + (cfgno<<2)),
+			      "i" (ASI_M_CTL));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !(_SPARC_SBI_H) */
diff --git a/arch/sparc/include/asm/scratchpad.h b/arch/sparc/include/asm/scratchpad.h
new file mode 100644
index 0000000..9583511
--- /dev/null
+++ b/arch/sparc/include/asm/scratchpad.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_SCRATCHPAD_H
+#define _SPARC64_SCRATCHPAD_H
+
+/* Sun4v scratchpad registers, accessed via ASI_SCRATCHPAD.  */
+
+#define SCRATCHPAD_MMU_MISS	0x00 /* Shared with OBP - set by OBP	    */
+#define SCRATCHPAD_CPUID	0x08 /* Shared with OBP - set by hypervisor */
+#define SCRATCHPAD_UTSBREG1	0x10
+#define SCRATCHPAD_UTSBREG2	0x18
+	/* 0x20 and 0x28, hypervisor only... */
+#define SCRATCHPAD_UNUSED1	0x30
+#define SCRATCHPAD_UNUSED2	0x38 /* Reserved for OBP		    */
+
+#endif /* !(_SPARC64_SCRATCHPAD_H) */
diff --git a/arch/sparc/include/asm/seccomp.h b/arch/sparc/include/asm/seccomp.h
new file mode 100644
index 0000000..62d4579
--- /dev/null
+++ b/arch/sparc/include/asm/seccomp.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/sparc/include/asm/sections.h b/arch/sparc/include/asm/sections.h
new file mode 100644
index 0000000..08f8334
--- /dev/null
+++ b/arch/sparc/include/asm/sections.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_SECTIONS_H
+#define __SPARC_SECTIONS_H
+
+/* nothing to see, move along */
+#include <asm-generic/sections.h>
+
+/* sparc entry point */
+extern char _start[];
+
+extern char __leon_1insn_patch[];
+extern char __leon_1insn_patch_end[];
+
+#endif
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
new file mode 100644
index 0000000..7220568
--- /dev/null
+++ b/arch/sparc/include/asm/setup.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	Just a place holder.
+ */
+#ifndef _SPARC_SETUP_H
+#define _SPARC_SETUP_H
+
+#include <linux/interrupt.h>
+
+#include <uapi/asm/setup.h>
+
+extern char reboot_command[];
+
+#ifdef CONFIG_SPARC32
+/* The CPU that was used for booting
+ * Only sun4d + leon may have boot_cpu_id != 0
+ */
+extern unsigned char boot_cpu_id;
+
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+
+extern int serial_console;
+static inline int con_is_present(void)
+{
+	return serial_console ? 0 : 1;
+}
+
+/* from irq_32.c */
+extern volatile unsigned char *fdc_status;
+extern char *pdma_vaddr;
+extern unsigned long pdma_size;
+extern volatile int doing_pdma;
+
+/* This is software state */
+extern char *pdma_base;
+extern unsigned long pdma_areasize;
+
+int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler);
+
+/* setup_32.c */
+extern unsigned long cmdline_memory_size;
+
+/* devices.c */
+void __init device_scan(void);
+
+/* unaligned_32.c */
+unsigned long safe_compute_effective_address(struct pt_regs *, unsigned int);
+
+#endif
+
+#ifdef CONFIG_SPARC64
+void __init start_early_boot(void);
+
+/* unaligned_64.c */
+int handle_ldf_stq(u32 insn, struct pt_regs *regs);
+void handle_ld_nf(u32 insn, struct pt_regs *regs);
+
+/* init_64.c */
+extern atomic_t dcpage_flushes;
+extern atomic_t dcpage_flushes_xcall;
+
+extern int sysctl_tsb_ratio;
+
+#ifdef CONFIG_SERIAL_SUNHV
+void sunhv_migrate_hvcons_irq(int cpu);
+#endif
+#endif
+void sun_do_break(void);
+extern int stop_a_enabled;
+extern int scons_pwroff;
+
+#endif /* _SPARC_SETUP_H */
diff --git a/arch/sparc/include/asm/sfafsr.h b/arch/sparc/include/asm/sfafsr.h
new file mode 100644
index 0000000..9c98f4f
--- /dev/null
+++ b/arch/sparc/include/asm/sfafsr.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_SFAFSR_H
+#define _SPARC64_SFAFSR_H
+
+#include <linux/const.h>
+
+/* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */
+
+#define SFAFSR_ME		(_AC(1,UL) << SFAFSR_ME_SHIFT)
+#define SFAFSR_ME_SHIFT		32
+#define SFAFSR_PRIV		(_AC(1,UL) << SFAFSR_PRIV_SHIFT)
+#define SFAFSR_PRIV_SHIFT	31
+#define SFAFSR_ISAP		(_AC(1,UL) << SFAFSR_ISAP_SHIFT)
+#define SFAFSR_ISAP_SHIFT	30
+#define SFAFSR_ETP		(_AC(1,UL) << SFAFSR_ETP_SHIFT)
+#define SFAFSR_ETP_SHIFT	29
+#define SFAFSR_IVUE		(_AC(1,UL) << SFAFSR_IVUE_SHIFT)
+#define SFAFSR_IVUE_SHIFT	28
+#define SFAFSR_TO		(_AC(1,UL) << SFAFSR_TO_SHIFT)
+#define SFAFSR_TO_SHIFT		27
+#define SFAFSR_BERR		(_AC(1,UL) << SFAFSR_BERR_SHIFT)
+#define SFAFSR_BERR_SHIFT	26
+#define SFAFSR_LDP		(_AC(1,UL) << SFAFSR_LDP_SHIFT)
+#define SFAFSR_LDP_SHIFT	25
+#define SFAFSR_CP		(_AC(1,UL) << SFAFSR_CP_SHIFT)
+#define SFAFSR_CP_SHIFT		24
+#define SFAFSR_WP		(_AC(1,UL) << SFAFSR_WP_SHIFT)
+#define SFAFSR_WP_SHIFT		23
+#define SFAFSR_EDP		(_AC(1,UL) << SFAFSR_EDP_SHIFT)
+#define SFAFSR_EDP_SHIFT	22
+#define SFAFSR_UE		(_AC(1,UL) << SFAFSR_UE_SHIFT)
+#define SFAFSR_UE_SHIFT		21
+#define SFAFSR_CE		(_AC(1,UL) << SFAFSR_CE_SHIFT)
+#define SFAFSR_CE_SHIFT		20
+#define SFAFSR_ETS		(_AC(0xf,UL) << SFAFSR_ETS_SHIFT)
+#define SFAFSR_ETS_SHIFT	16
+#define SFAFSR_PSYND		(_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT)
+#define SFAFSR_PSYND_SHIFT	0
+
+/* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read
+ *                     ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write
+ */
+
+#define UDBE_UE			(_AC(1,UL) << 9)
+#define UDBE_CE			(_AC(1,UL) << 8)
+#define UDBE_E_SYNDR		(_AC(0xff,UL) << 0)
+
+/* The trap handlers for asynchronous errors encode the AFSR and
+ * other pieces of information into a 64-bit argument for C code
+ * encoded as follows:
+ *
+ * -----------------------------------------------
+ * |  UDB_H  |  UDB_L  | TL>1  |  TT  |   AFSR   |
+ * -----------------------------------------------
+ *  63     54 53     44    42   41  33 32       0
+ *
+ * The AFAR is passed in unchanged.
+ */
+#define SFSTAT_UDBH_MASK	(_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
+#define SFSTAT_UDBH_SHIFT	54
+#define SFSTAT_UDBL_MASK	(_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT)
+#define SFSTAT_UDBL_SHIFT	44
+#define SFSTAT_TL_GT_ONE	(_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT)
+#define SFSTAT_TL_GT_ONE_SHIFT	42
+#define SFSTAT_TRAP_TYPE	(_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT)
+#define SFSTAT_TRAP_TYPE_SHIFT	33
+#define SFSTAT_AFSR_MASK	(_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT)
+#define SFSTAT_AFSR_SHIFT	0
+
+/* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */
+#define ESTATE_ERR_CE		0x1 /* Correctable errors                    */
+#define ESTATE_ERR_NCE		0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */
+#define ESTATE_ERR_ISAP		0x4 /* System address parity error           */
+#define ESTATE_ERR_ALL		(ESTATE_ERR_CE | \
+				 ESTATE_ERR_NCE | \
+				 ESTATE_ERR_ISAP)
+
+/* The various trap types that report using the above state. */
+#define TRAP_TYPE_IAE		0x09 /* Instruction Access Error             */
+#define TRAP_TYPE_DAE		0x32 /* Data Access Error                    */
+#define TRAP_TYPE_CEE		0x63 /* Correctable ECC Error                */
+
+#endif /* _SPARC64_SFAFSR_H */
diff --git a/arch/sparc/include/asm/sfp-machine.h b/arch/sparc/include/asm/sfp-machine.h
new file mode 100644
index 0000000..4a247b4
--- /dev/null
+++ b/arch/sparc/include/asm/sfp-machine.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_SFP_MACHINE_H
+#define ___ASM_SPARC_SFP_MACHINE_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/sfp-machine_64.h>
+#else
+#include <asm/sfp-machine_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/sfp-machine_32.h b/arch/sparc/include/asm/sfp-machine_32.h
new file mode 100644
index 0000000..838c9d5
--- /dev/null
+++ b/arch/sparc/include/asm/sfp-machine_32.h
@@ -0,0 +1,212 @@
+/* Machine-dependent software floating-point definitions.
+   Sparc userland (_Q_*) version.
+   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz),
+		  David S. Miller (davem@redhat.com) and
+		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _SFP_MACHINE_H
+#define _SFP_MACHINE_H
+
+
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)					\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)					\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)					\
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_udiv(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose X.
+ */
+/* For _Qp_* and _Q_*, this should prefer X, for
+ * CPU instruction emulation this should prefer Y.
+ * (see SPAMv9 B.2.2 section).
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    else							\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+/* Some assembly to speed things up. */
+#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
+  __asm__ ("addcc %r7,%8,%2\n\t"					\
+	   "addxcc %r5,%6,%1\n\t"					\
+	   "addx %r3,%4,%0\n"						\
+	   : "=r" (r2),							\
+	     "=&r" (r1),						\
+	     "=&r" (r0)							\
+	   : "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc")
+
+#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)			\
+  __asm__ ("subcc %r7,%8,%2\n\t"					\
+	    "subxcc %r5,%6,%1\n\t"					\
+	    "subx %r3,%4,%0\n"						\
+	   : "=r" (r2),							\
+	     "=&r" (r1),						\
+	     "=&r" (r0)							\
+	   : "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc")
+
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  do {									\
+    /* We need to fool gcc,  as we need to pass more than 10		\
+       input/outputs.  */						\
+    register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
+    __asm__ __volatile__ (						\
+	    "addcc %r8,%9,%1\n\t"					\
+	    "addxcc %r6,%7,%0\n\t"					\
+	    "addxcc %r4,%5,%%g2\n\t"					\
+	    "addx %r2,%3,%%g1\n\t"					\
+	   : "=&r" (r1),						\
+	     "=&r" (r0)							\
+	   : "%rJ" ((USItype)(x3)),					\
+	     "rI" ((USItype)(y3)),					\
+	     "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc", "g1", "g2");						\
+    __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2));			\
+    r3 = _t1; r2 = _t2;							\
+  } while (0)
+
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)		\
+  do {									\
+    /* We need to fool gcc,  as we need to pass more than 10		\
+       input/outputs.  */						\
+    register USItype _t1 __asm__ ("g1"), _t2 __asm__ ("g2");		\
+    __asm__ __volatile__ (						\
+	    "subcc %r8,%9,%1\n\t"					\
+	    "subxcc %r6,%7,%0\n\t"					\
+	    "subxcc %r4,%5,%%g2\n\t"					\
+	    "subx %r2,%3,%%g1\n\t"					\
+	   : "=&r" (r1),						\
+	     "=&r" (r0)							\
+	   : "%rJ" ((USItype)(x3)),					\
+	     "rI" ((USItype)(y3)),					\
+	     "%rJ" ((USItype)(x2)),					\
+	     "rI" ((USItype)(y2)),					\
+	     "%rJ" ((USItype)(x1)),					\
+	     "rI" ((USItype)(y1)),					\
+	     "%rJ" ((USItype)(x0)),					\
+	     "rI" ((USItype)(y0))					\
+	   : "cc", "g1", "g2");						\
+    __asm__ __volatile__ ("" : "=r" (_t1), "=r" (_t2));			\
+    r3 = _t1; r2 = _t2;							\
+  } while (0)
+
+#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) __FP_FRAC_SUB_3(x2,x1,x0,x2,x1,x0,y2,y1,y0)
+
+#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) __FP_FRAC_SUB_4(x3,x2,x1,x0,x3,x2,x1,x0,y3,y2,y1,y0)
+
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
+  __asm__ ("addcc %3,%4,%3\n\t"						\
+	   "addxcc %2,%%g0,%2\n\t"					\
+	   "addxcc %1,%%g0,%1\n\t"					\
+	   "addx %0,%%g0,%0\n\t"					\
+	   : "=&r" (x3),						\
+	     "=&r" (x2),						\
+	     "=&r" (x1),						\
+	     "=&r" (x0)							\
+	   : "rI" ((USItype)(i)),					\
+	     "0" ((USItype)(x3)),					\
+	     "1" ((USItype)(x2)),					\
+	     "2" ((USItype)(x1)),					\
+	     "3" ((USItype)(x0))					\
+	   : "cc")
+
+#ifndef CONFIG_SMP
+extern struct task_struct *last_task_used_math;
+#endif
+
+/* Obtain the current rounding mode. */
+#ifndef FP_ROUNDMODE
+#ifdef CONFIG_SMP
+#define FP_ROUNDMODE	((current->thread.fsr >> 30) & 0x3)
+#else
+#define FP_ROUNDMODE	((last_task_used_math->thread.fsr >> 30) & 0x3)
+#endif
+#endif
+
+/* Exception flags. */
+#define FP_EX_INVALID		(1 << 4)
+#define FP_EX_OVERFLOW		(1 << 3)
+#define FP_EX_UNDERFLOW		(1 << 2)
+#define FP_EX_DIVZERO		(1 << 1)
+#define FP_EX_INEXACT		(1 << 0)
+
+#define FP_HANDLE_EXCEPTIONS return _fex
+
+#ifdef CONFIG_SMP
+#define FP_INHIBIT_RESULTS ((current->thread.fsr >> 23) & _fex)
+#else
+#define FP_INHIBIT_RESULTS ((last_task_used_math->thread.fsr >> 23) & _fex)
+#endif
+
+#ifdef CONFIG_SMP
+#define FP_TRAPPING_EXCEPTIONS ((current->thread.fsr >> 23) & 0x1f)
+#else
+#define FP_TRAPPING_EXCEPTIONS ((last_task_used_math->thread.fsr >> 23) & 0x1f)
+#endif
+
+#endif
diff --git a/arch/sparc/include/asm/sfp-machine_64.h b/arch/sparc/include/asm/sfp-machine_64.h
new file mode 100644
index 0000000..ca913ef
--- /dev/null
+++ b/arch/sparc/include/asm/sfp-machine_64.h
@@ -0,0 +1,93 @@
+/* Machine-dependent software floating-point definitions.
+   Sparc64 kernel version.
+   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@cygnus.com),
+		  Jakub Jelinek (jj@ultra.linux.cz) and
+		  David S. Miller (davem@redhat.com).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _SFP_MACHINE_H
+#define _SFP_MACHINE_H
+
+#define _FP_W_TYPE_SIZE		64
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define _FP_MUL_MEAT_S(R,X,Y)					\
+  _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y)
+#define _FP_MUL_MEAT_D(R,X,Y)					\
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y)					\
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)	_FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+#define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_2_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1)
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+
+/* If one NaN is signaling and the other is not,
+ * we choose that one, otherwise we choose X.
+ */
+/* For _Qp_* and _Q_*, this should prefer X, for
+ * CPU instruction emulation this should prefer Y.
+ * (see SPAMv9 B.2.2 section).
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)		\
+	&& !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs))	\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    else							\
+      {								\
+	R##_s = Y##_s;						\
+	_FP_FRAC_COPY_##wc(R,Y);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+/* Obtain the current rounding mode. */
+#ifndef FP_ROUNDMODE
+#define FP_ROUNDMODE	((current_thread_info()->xfsr[0] >> 30) & 0x3)
+#endif
+
+/* Exception flags. */
+#define FP_EX_INVALID		(1 << 4)
+#define FP_EX_OVERFLOW		(1 << 3)
+#define FP_EX_UNDERFLOW		(1 << 2)
+#define FP_EX_DIVZERO		(1 << 1)
+#define FP_EX_INEXACT		(1 << 0)
+
+#define FP_HANDLE_EXCEPTIONS return _fex
+
+#define FP_INHIBIT_RESULTS ((current_thread_info()->xfsr[0] >> 23) & _fex)
+
+#define FP_TRAPPING_EXCEPTIONS ((current_thread_info()->xfsr[0] >> 23) & 0x1f)
+
+#endif
diff --git a/arch/sparc/include/asm/shmparam.h b/arch/sparc/include/asm/shmparam.h
new file mode 100644
index 0000000..951a452
--- /dev/null
+++ b/arch/sparc/include/asm/shmparam.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_SHMPARAM_H
+#define ___ASM_SPARC_SHMPARAM_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/shmparam_64.h>
+#else
+#include <asm/shmparam_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/shmparam_32.h b/arch/sparc/include/asm/shmparam_32.h
new file mode 100644
index 0000000..9767a8b
--- /dev/null
+++ b/arch/sparc/include/asm/shmparam_32.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMSPARC_SHMPARAM_H
+#define _ASMSPARC_SHMPARAM_H
+
+#define __ARCH_FORCE_SHMLBA 	1
+
+extern int vac_cache_size;
+#define SHMLBA (vac_cache_size ? vac_cache_size : PAGE_SIZE)
+
+#endif /* _ASMSPARC_SHMPARAM_H */
diff --git a/arch/sparc/include/asm/shmparam_64.h b/arch/sparc/include/asm/shmparam_64.h
new file mode 100644
index 0000000..c0731b5
--- /dev/null
+++ b/arch/sparc/include/asm/shmparam_64.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASMSPARC64_SHMPARAM_H
+#define _ASMSPARC64_SHMPARAM_H
+
+#include <asm/spitfire.h>
+
+#define __ARCH_FORCE_SHMLBA	1
+/* attach addr a multiple of this */
+#define	SHMLBA	((PAGE_SIZE > L1DCACHE_SIZE) ? PAGE_SIZE : L1DCACHE_SIZE)
+
+#endif /* _ASMSPARC64_SHMPARAM_H */
diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h
new file mode 100644
index 0000000..ee05f9d
--- /dev/null
+++ b/arch/sparc/include/asm/sigcontext.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_SIGCONTEXT_H
+#define __SPARC_SIGCONTEXT_H
+
+#include <asm/ptrace.h>
+#include <uapi/asm/sigcontext.h>
+
+#ifndef __ASSEMBLY__
+
+#define __SUNOS_MAXWIN   31
+
+/* This is what SunOS does, so shall I unless we use new 32bit signals or rt signals. */
+struct sigcontext32 {
+	int sigc_onstack;      /* state to restore */
+	int sigc_mask;         /* sigmask to restore */
+	int sigc_sp;           /* stack pointer */
+	int sigc_pc;           /* program counter */
+	int sigc_npc;          /* next program counter */
+	int sigc_psr;          /* for condition codes etc */
+	int sigc_g1;           /* User uses these two registers */
+	int sigc_o0;           /* within the trampoline code. */
+
+	/* Now comes information regarding the users window set
+	 * at the time of the signal.
+	 */
+	int sigc_oswins;       /* outstanding windows */
+
+	/* stack ptrs for each regwin buf */
+	unsigned int sigc_spbuf[__SUNOS_MAXWIN];
+
+	/* Windows to restore after signal */
+	struct reg_window32 sigc_wbuf[__SUNOS_MAXWIN];
+};
+
+
+/* This is what we use for 32bit new non-rt signals. */
+
+typedef struct {
+	struct {
+		unsigned int psr;
+		unsigned int pc;
+		unsigned int npc;
+		unsigned int y;
+		unsigned int u_regs[16]; /* globals and ins */
+	}			si_regs;
+	int			si_mask;
+} __siginfo32_t;
+
+#define __SIGC_MAXWIN	7
+
+typedef struct {
+	unsigned long locals[8];
+	unsigned long ins[8];
+} __siginfo_reg_window;
+
+typedef struct {
+	int			wsaved;
+	__siginfo_reg_window	reg_window[__SIGC_MAXWIN];
+	unsigned long		rwbuf_stkptrs[__SIGC_MAXWIN];
+} __siginfo_rwin_t;
+
+#ifdef CONFIG_SPARC64
+typedef struct {
+	unsigned   int si_float_regs [64];
+	unsigned   long si_fsr;
+	unsigned   long si_gsr;
+	unsigned   long si_fprs;
+} __siginfo_fpu_t;
+
+/* This is what SunOS doesn't, so we have to write this alone
+   and do it properly. */
+struct sigcontext {
+	/* The size of this array has to match SI_MAX_SIZE from siginfo.h */
+	char			sigc_info[128];
+	struct {
+		unsigned long	u_regs[16]; /* globals and ins */
+		unsigned long	tstate;
+		unsigned long	tpc;
+		unsigned long	tnpc;
+		unsigned int	y;
+		unsigned int	fprs;
+	}			sigc_regs;
+	__siginfo_fpu_t *	sigc_fpu_save;
+	struct {
+		void	*	ss_sp;
+		int		ss_flags;
+		unsigned long	ss_size;
+	}			sigc_stack;
+	unsigned long		sigc_mask;
+	__siginfo_rwin_t *	sigc_rwin_save;
+};
+
+#else
+
+typedef struct {
+	unsigned long si_float_regs [32];
+	unsigned long si_fsr;
+	unsigned long si_fpqdepth;
+	struct {
+		unsigned long *insn_addr;
+		unsigned long insn;
+	} si_fpqueue [16];
+} __siginfo_fpu_t;
+#endif /* (CONFIG_SPARC64) */
+
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC_SIGCONTEXT_H) */
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
new file mode 100644
index 0000000..827b73a
--- /dev/null
+++ b/arch/sparc/include/asm/signal.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_SIGNAL_H
+#define __SPARC_SIGNAL_H
+
+#ifndef __ASSEMBLY__
+#include <linux/personality.h>
+#include <linux/types.h>
+#endif
+#include <uapi/asm/signal.h>
+
+#ifndef __ASSEMBLY__
+/*
+ * DJHR
+ * SA_STATIC_ALLOC is used for the sparc32 system to indicate that this
+ * interrupt handler's irq structure should be statically allocated
+ * by the request_irq routine.
+ * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
+ * of interrupt usage and that sucks. Also without a flag like this
+ * it may be possible for the free_irq routine to attempt to free
+ * statically allocated data.. which is NOT GOOD.
+ *
+ */
+#define SA_STATIC_ALLOC         0x8000
+
+#define __ARCH_HAS_KA_RESTORER
+#define __ARCH_HAS_SA_RESTORER
+
+#endif /* !(__ASSEMBLY__) */
+#endif /* !(__SPARC_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/smp.h b/arch/sparc/include/asm/smp.h
new file mode 100644
index 0000000..dea59f6
--- /dev/null
+++ b/arch/sparc/include/asm/smp.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_SMP_H
+#define ___ASM_SPARC_SMP_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/smp_64.h>
+#else
+#include <asm/smp_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
new file mode 100644
index 0000000..8560817
--- /dev/null
+++ b/arch/sparc/include/asm/smp_32.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* smp.h: Sparc specific SMP stuff.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_SMP_H
+#define _SPARC_SMP_H
+
+#include <linux/threads.h>
+#include <asm/head.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cpumask.h>
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_SMP
+
+#ifndef __ASSEMBLY__
+
+#include <asm/ptrace.h>
+#include <asm/asi.h>
+#include <linux/atomic.h>
+
+/*
+ *	Private routines/data
+ */
+
+extern unsigned char boot_cpu_id;
+extern volatile unsigned long cpu_callin_map[NR_CPUS];
+extern cpumask_t smp_commenced_mask;
+extern struct linux_prom_registers smp_penguin_ctable;
+
+typedef void (*smpfunc_t)(unsigned long, unsigned long, unsigned long,
+		       unsigned long, unsigned long);
+
+void cpu_panic(void);
+
+/*
+ *	General functions that each host system must provide.
+ */
+
+void sun4m_init_smp(void);
+void sun4d_init_smp(void);
+
+void smp_callin(void);
+void smp_store_cpu_info(int);
+
+void smp_resched_interrupt(void);
+void smp_call_function_single_interrupt(void);
+void smp_call_function_interrupt(void);
+
+struct seq_file;
+void smp_bogo(struct seq_file *);
+void smp_info(struct seq_file *);
+
+struct sparc32_ipi_ops {
+	void (*cross_call)(smpfunc_t func, cpumask_t mask, unsigned long arg1,
+			   unsigned long arg2, unsigned long arg3,
+			   unsigned long arg4);
+	void (*resched)(int cpu);
+	void (*single)(int cpu);
+	void (*mask_one)(int cpu);
+};
+extern const struct sparc32_ipi_ops *sparc32_ipi_ops;
+
+static inline void xc0(smpfunc_t func)
+{
+	sparc32_ipi_ops->cross_call(func, *cpu_online_mask, 0, 0, 0, 0);
+}
+
+static inline void xc1(smpfunc_t func, unsigned long arg1)
+{
+	sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, 0, 0, 0);
+}
+static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
+{
+	sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0);
+}
+
+static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
+		       unsigned long arg3)
+{
+	sparc32_ipi_ops->cross_call(func, *cpu_online_mask,
+				    arg1, arg2, arg3, 0);
+}
+
+static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
+		       unsigned long arg3, unsigned long arg4)
+{
+	sparc32_ipi_ops->cross_call(func, *cpu_online_mask,
+				    arg1, arg2, arg3, arg4);
+}
+
+void arch_send_call_function_single_ipi(int cpu);
+void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+static inline int cpu_logical_map(int cpu)
+{
+	return cpu;
+}
+
+int hard_smp_processor_id(void);
+
+#define raw_smp_processor_id()		(current_thread_info()->cpu)
+
+void smp_setup_cpu_possible_map(void);
+
+#endif /* !(__ASSEMBLY__) */
+
+/* Sparc specific messages. */
+#define MSG_CROSS_CALL         0x0005       /* run func on cpus */
+
+/* Empirical PROM processor mailbox constants.  If the per-cpu mailbox
+ * contains something other than one of these then the ipi is from
+ * Linux's active_kernel_processor.  This facility exists so that
+ * the boot monitor can capture all the other cpus when one catches
+ * a watchdog reset or the user enters the monitor using L1-A keys.
+ */
+#define MBOX_STOPCPU          0xFB
+#define MBOX_IDLECPU          0xFC
+#define MBOX_IDLECPU2         0xFD
+#define MBOX_STOPCPU2         0xFE
+
+#else /* SMP */
+
+#define hard_smp_processor_id()		0
+#define smp_setup_cpu_possible_map() do { } while (0)
+
+#endif /* !(SMP) */
+#endif /* !(_SPARC_SMP_H) */
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
new file mode 100644
index 0000000..e75783b
--- /dev/null
+++ b/arch/sparc/include/asm/smp_64.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* smp.h: Sparc64 specific SMP stuff.
+ *
+ * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _SPARC64_SMP_H
+#define _SPARC64_SMP_H
+
+#include <linux/threads.h>
+#include <asm/asi.h>
+#include <asm/starfire.h>
+#include <asm/spitfire.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/cpumask.h>
+#include <linux/cache.h>
+
+#endif /* !(__ASSEMBLY__) */
+
+#ifdef CONFIG_SMP
+
+#ifndef __ASSEMBLY__
+
+/*
+ *	Private routines/data
+ */
+
+#include <linux/bitops.h>
+#include <linux/atomic.h>
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+extern cpumask_t cpu_core_map[NR_CPUS];
+
+void smp_init_cpu_poke(void);
+void scheduler_poke(void);
+
+void arch_send_call_function_single_ipi(int cpu);
+void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+/*
+ *	General functions that each host system must provide.
+ */
+
+int hard_smp_processor_id(void);
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+void smp_fill_in_cpu_possible_map(void);
+void smp_fill_in_sib_core_maps(void);
+void cpu_play_dead(void);
+
+void smp_fetch_global_regs(void);
+void smp_fetch_global_pmu(void);
+
+struct seq_file;
+void smp_bogo(struct seq_file *);
+void smp_info(struct seq_file *);
+
+void smp_callin(void);
+void cpu_panic(void);
+void smp_synchronize_tick_client(void);
+void smp_capture(void);
+void smp_release(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+int __cpu_disable(void);
+void __cpu_die(unsigned int cpu);
+#endif
+
+#endif /* !(__ASSEMBLY__) */
+
+#else
+
+#define hard_smp_processor_id()		0
+#define smp_fill_in_sib_core_maps() do { } while (0)
+#define smp_fetch_global_regs() do { } while (0)
+#define smp_fetch_global_pmu() do { } while (0)
+#define smp_fill_in_cpu_possible_map() do { } while (0)
+#define smp_init_cpu_poke() do { } while (0)
+#define scheduler_poke() do { } while (0)
+
+#endif /* !(CONFIG_SMP) */
+
+#endif /* !(_SPARC64_SMP_H) */
diff --git a/arch/sparc/include/asm/sparsemem.h b/arch/sparc/include/asm/sparsemem.h
new file mode 100644
index 0000000..1dd1b61
--- /dev/null
+++ b/arch/sparc/include/asm/sparsemem.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_SPARSEMEM_H
+#define _SPARC64_SPARSEMEM_H
+
+#ifdef __KERNEL__
+
+#include <asm/page.h>
+
+#define SECTION_SIZE_BITS       30
+#define MAX_PHYSADDR_BITS       MAX_PHYS_ADDRESS_BITS
+#define MAX_PHYSMEM_BITS        MAX_PHYS_ADDRESS_BITS
+
+#endif /* !(__KERNEL__) */
+
+#endif /* !(_SPARC64_SPARSEMEM_H) */
diff --git a/arch/sparc/include/asm/spinlock.h b/arch/sparc/include/asm/spinlock.h
new file mode 100644
index 0000000..3f4ce55
--- /dev/null
+++ b/arch/sparc/include/asm/spinlock.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_SPINLOCK_H
+#define ___ASM_SPARC_SPINLOCK_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/spinlock_64.h>
+#else
+#include <asm/spinlock_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h
new file mode 100644
index 0000000..bc5aa6f
--- /dev/null
+++ b/arch/sparc/include/asm/spinlock_32.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* spinlock.h: 32-bit Sparc spinlock support.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef __SPARC_SPINLOCK_H
+#define __SPARC_SPINLOCK_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/psr.h>
+#include <asm/barrier.h>
+#include <asm/processor.h> /* for cpu_relax */
+
+#define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	__asm__ __volatile__(
+	"\n1:\n\t"
+	"ldstub	[%0], %%g2\n\t"
+	"orcc	%%g2, 0x0, %%g0\n\t"
+	"bne,a	2f\n\t"
+	" ldub	[%0], %%g2\n\t"
+	".subsection	2\n"
+	"2:\n\t"
+	"orcc	%%g2, 0x0, %%g0\n\t"
+	"bne,a	2b\n\t"
+	" ldub	[%0], %%g2\n\t"
+	"b,a	1b\n\t"
+	".previous\n"
+	: /* no outputs */
+	: "r" (lock)
+	: "g2", "memory", "cc");
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned int result;
+	__asm__ __volatile__("ldstub [%1], %0"
+			     : "=r" (result)
+			     : "r" (lock)
+			     : "memory");
+	return (result == 0);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	__asm__ __volatile__("stb %%g0, [%0]" : : "r" (lock) : "memory");
+}
+
+/* Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ *
+ * XXX This might create some problems with my dual spinlock
+ * XXX scheme, deadlocks etc. -DaveM
+ *
+ * Sort of like atomic_t's on Sparc, but even more clever.
+ *
+ *	------------------------------------
+ *	| 24-bit counter           | wlock |  arch_rwlock_t
+ *	------------------------------------
+ *	 31                       8 7     0
+ *
+ * wlock signifies the one writer is in or somebody is updating
+ * counter. For a writer, if he successfully acquires the wlock,
+ * but counter is non-zero, he has to release the lock and wait,
+ * till both counter and wlock are zero.
+ *
+ * Unfortunately this scheme limits us to ~16,000,000 cpus.
+ */
+static inline void __arch_read_lock(arch_rwlock_t *rw)
+{
+	register arch_rwlock_t *lp asm("g1");
+	lp = rw;
+	__asm__ __volatile__(
+	"mov	%%o7, %%g4\n\t"
+	"call	___rw_read_enter\n\t"
+	" ldstub	[%%g1 + 3], %%g2\n"
+	: /* no outputs */
+	: "r" (lp)
+	: "g2", "g4", "memory", "cc");
+}
+
+#define arch_read_lock(lock) \
+do {	unsigned long flags; \
+	local_irq_save(flags); \
+	__arch_read_lock(lock); \
+	local_irq_restore(flags); \
+} while(0)
+
+static inline void __arch_read_unlock(arch_rwlock_t *rw)
+{
+	register arch_rwlock_t *lp asm("g1");
+	lp = rw;
+	__asm__ __volatile__(
+	"mov	%%o7, %%g4\n\t"
+	"call	___rw_read_exit\n\t"
+	" ldstub	[%%g1 + 3], %%g2\n"
+	: /* no outputs */
+	: "r" (lp)
+	: "g2", "g4", "memory", "cc");
+}
+
+#define arch_read_unlock(lock) \
+do {	unsigned long flags; \
+	local_irq_save(flags); \
+	__arch_read_unlock(lock); \
+	local_irq_restore(flags); \
+} while(0)
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	register arch_rwlock_t *lp asm("g1");
+	lp = rw;
+	__asm__ __volatile__(
+	"mov	%%o7, %%g4\n\t"
+	"call	___rw_write_enter\n\t"
+	" ldstub	[%%g1 + 3], %%g2\n"
+	: /* no outputs */
+	: "r" (lp)
+	: "g2", "g4", "memory", "cc");
+	*(volatile __u32 *)&lp->lock = ~0U;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *lock)
+{
+	__asm__ __volatile__(
+"	st		%%g0, [%0]"
+	: /* no outputs */
+	: "r" (lock)
+	: "memory");
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	unsigned int val;
+
+	__asm__ __volatile__("ldstub [%1 + 3], %0"
+			     : "=r" (val)
+			     : "r" (&rw->lock)
+			     : "memory");
+
+	if (val == 0) {
+		val = rw->lock & ~0xff;
+		if (val)
+			((volatile u8*)&rw->lock)[3] = 0;
+		else
+			*(volatile u32*)&rw->lock = ~0U;
+	}
+
+	return (val == 0);
+}
+
+static inline int __arch_read_trylock(arch_rwlock_t *rw)
+{
+	register arch_rwlock_t *lp asm("g1");
+	register int res asm("o0");
+	lp = rw;
+	__asm__ __volatile__(
+	"mov	%%o7, %%g4\n\t"
+	"call	___rw_read_try\n\t"
+	" ldstub	[%%g1 + 3], %%g2\n"
+	: "=r" (res)
+	: "r" (lp)
+	: "g2", "g4", "memory", "cc");
+	return res;
+}
+
+#define arch_read_trylock(lock) \
+({	unsigned long flags; \
+	int res; \
+	local_irq_save(flags); \
+	res = __arch_read_trylock(lock); \
+	local_irq_restore(flags); \
+	res; \
+})
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* __SPARC_SPINLOCK_H */
diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h
new file mode 100644
index 0000000..7fc82a2
--- /dev/null
+++ b/arch/sparc/include/asm/spinlock_64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* spinlock.h: 64-bit Sparc spinlock support.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef __SPARC64_SPINLOCK_H
+#define __SPARC64_SPINLOCK_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/processor.h>
+#include <asm/barrier.h>
+#include <asm/qrwlock.h>
+#include <asm/qspinlock.h>
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* !(__SPARC64_SPINLOCK_H) */
diff --git a/arch/sparc/include/asm/spinlock_types.h b/arch/sparc/include/asm/spinlock_types.h
new file mode 100644
index 0000000..ed1d569
--- /dev/null
+++ b/arch/sparc/include/asm/spinlock_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_SPINLOCK_TYPES_H
+#define __SPARC_SPINLOCK_TYPES_H
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#else
+
+typedef struct {
+	volatile unsigned char lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+#endif /* CONFIG_QUEUED_SPINLOCKS */
+
+#ifdef CONFIG_QUEUED_RWLOCKS
+#include <asm-generic/qrwlock_types.h>
+#else
+typedef struct {
+	volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+#endif /* CONFIG_QUEUED_RWLOCKS */
+#endif
diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
new file mode 100644
index 0000000..e9b7d25
--- /dev/null
+++ b/arch/sparc/include/asm/spitfire.h
@@ -0,0 +1,368 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* spitfire.h: SpitFire/BlackBird/Cheetah inline MMU operations.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _SPARC64_SPITFIRE_H
+#define _SPARC64_SPITFIRE_H
+
+#ifdef CONFIG_SPARC64
+
+#include <asm/asi.h>
+
+/* The following register addresses are accessible via ASI_DMMU
+ * and ASI_IMMU, that is there is a distinct and unique copy of
+ * each these registers for each TLB.
+ */
+#define TSB_TAG_TARGET		0x0000000000000000 /* All chips				*/
+#define TLB_SFSR		0x0000000000000018 /* All chips				*/
+#define TSB_REG			0x0000000000000028 /* All chips				*/
+#define TLB_TAG_ACCESS		0x0000000000000030 /* All chips				*/
+#define VIRT_WATCHPOINT		0x0000000000000038 /* All chips				*/
+#define PHYS_WATCHPOINT		0x0000000000000040 /* All chips				*/
+#define TSB_EXTENSION_P		0x0000000000000048 /* Ultra-III and later		*/
+#define TSB_EXTENSION_S		0x0000000000000050 /* Ultra-III and later, D-TLB only	*/
+#define TSB_EXTENSION_N		0x0000000000000058 /* Ultra-III and later		*/
+#define TLB_TAG_ACCESS_EXT	0x0000000000000060 /* Ultra-III+ and later		*/
+
+/* These registers only exist as one entity, and are accessed
+ * via ASI_DMMU only.
+ */
+#define PRIMARY_CONTEXT		0x0000000000000008
+#define SECONDARY_CONTEXT	0x0000000000000010
+#define DMMU_SFAR		0x0000000000000020
+#define VIRT_WATCHPOINT		0x0000000000000038
+#define PHYS_WATCHPOINT		0x0000000000000040
+
+#define SPITFIRE_HIGHEST_LOCKED_TLBENT	(64 - 1)
+#define CHEETAH_HIGHEST_LOCKED_TLBENT	(16 - 1)
+
+#define L1DCACHE_SIZE		0x4000
+
+#define SUN4V_CHIP_INVALID	0x00
+#define SUN4V_CHIP_NIAGARA1	0x01
+#define SUN4V_CHIP_NIAGARA2	0x02
+#define SUN4V_CHIP_NIAGARA3	0x03
+#define SUN4V_CHIP_NIAGARA4	0x04
+#define SUN4V_CHIP_NIAGARA5	0x05
+#define SUN4V_CHIP_SPARC_M6	0x06
+#define SUN4V_CHIP_SPARC_M7	0x07
+#define SUN4V_CHIP_SPARC_M8	0x08
+#define SUN4V_CHIP_SPARC64X	0x8a
+#define SUN4V_CHIP_SPARC_SN	0x8b
+#define SUN4V_CHIP_UNKNOWN	0xff
+
+/*
+ * The following CPU_ID_xxx constants are used
+ * to identify the CPU type in the setup phase
+ * (see head_64.S)
+ */
+#define CPU_ID_NIAGARA1		('1')
+#define CPU_ID_NIAGARA2		('2')
+#define CPU_ID_NIAGARA3		('3')
+#define CPU_ID_NIAGARA4		('4')
+#define CPU_ID_NIAGARA5		('5')
+#define CPU_ID_M6		('6')
+#define CPU_ID_M7		('7')
+#define CPU_ID_M8		('8')
+#define CPU_ID_SONOMA1		('N')
+
+#ifndef __ASSEMBLY__
+
+enum ultra_tlb_layout {
+	spitfire = 0,
+	cheetah = 1,
+	cheetah_plus = 2,
+	hypervisor = 3,
+};
+
+extern enum ultra_tlb_layout tlb_type;
+
+extern int sun4v_chip_type;
+
+extern int cheetah_pcache_forced_on;
+void cheetah_enable_pcache(void);
+
+#define sparc64_highest_locked_tlbent()	\
+	(tlb_type == spitfire ? \
+	 SPITFIRE_HIGHEST_LOCKED_TLBENT : \
+	 CHEETAH_HIGHEST_LOCKED_TLBENT)
+
+extern int num_kernel_image_mappings;
+
+/* The data cache is write through, so this just invalidates the
+ * specified line.
+ */
+static inline void spitfire_put_dcache_tag(unsigned long addr, unsigned long tag)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (tag), "r" (addr), "i" (ASI_DCACHE_TAG));
+}
+
+/* The instruction cache lines are flushed with this, but note that
+ * this does not flush the pipeline.  It is possible for a line to
+ * get flushed but stale instructions to still be in the pipeline,
+ * a flush instruction (to any address) is sufficient to handle
+ * this issue after the line is invalidated.
+ */
+static inline void spitfire_put_icache_tag(unsigned long addr, unsigned long tag)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (tag), "r" (addr), "i" (ASI_IC_TAG));
+}
+
+static inline unsigned long spitfire_get_dtlb_data(int entry)
+{
+	unsigned long data;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (data)
+			     : "r" (entry << 3), "i" (ASI_DTLB_DATA_ACCESS));
+
+	/* Clear TTE diag bits. */
+	data &= ~0x0003fe0000000000UL;
+
+	return data;
+}
+
+static inline unsigned long spitfire_get_dtlb_tag(int entry)
+{
+	unsigned long tag;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (tag)
+			     : "r" (entry << 3), "i" (ASI_DTLB_TAG_READ));
+	return tag;
+}
+
+static inline void spitfire_put_dtlb_data(int entry, unsigned long data)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (data), "r" (entry << 3),
+			       "i" (ASI_DTLB_DATA_ACCESS));
+}
+
+static inline unsigned long spitfire_get_itlb_data(int entry)
+{
+	unsigned long data;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (data)
+			     : "r" (entry << 3), "i" (ASI_ITLB_DATA_ACCESS));
+
+	/* Clear TTE diag bits. */
+	data &= ~0x0003fe0000000000UL;
+
+	return data;
+}
+
+static inline unsigned long spitfire_get_itlb_tag(int entry)
+{
+	unsigned long tag;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (tag)
+			     : "r" (entry << 3), "i" (ASI_ITLB_TAG_READ));
+	return tag;
+}
+
+static inline void spitfire_put_itlb_data(int entry, unsigned long data)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (data), "r" (entry << 3),
+			       "i" (ASI_ITLB_DATA_ACCESS));
+}
+
+static inline void spitfire_flush_dtlb_nucleus_page(unsigned long page)
+{
+	__asm__ __volatile__("stxa	%%g0, [%0] %1\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (page | 0x20), "i" (ASI_DMMU_DEMAP));
+}
+
+static inline void spitfire_flush_itlb_nucleus_page(unsigned long page)
+{
+	__asm__ __volatile__("stxa	%%g0, [%0] %1\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (page | 0x20), "i" (ASI_IMMU_DEMAP));
+}
+
+/* Cheetah has "all non-locked" tlb flushes. */
+static inline void cheetah_flush_dtlb_all(void)
+{
+	__asm__ __volatile__("stxa	%%g0, [%0] %1\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (0x80), "i" (ASI_DMMU_DEMAP));
+}
+
+static inline void cheetah_flush_itlb_all(void)
+{
+	__asm__ __volatile__("stxa	%%g0, [%0] %1\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (0x80), "i" (ASI_IMMU_DEMAP));
+}
+
+/* Cheetah has a 4-tlb layout so direct access is a bit different.
+ * The first two TLBs are fully assosciative, hold 16 entries, and are
+ * used only for locked and >8K sized translations.  One exists for
+ * data accesses and one for instruction accesses.
+ *
+ * The third TLB is for data accesses to 8K non-locked translations, is
+ * 2 way assosciative, and holds 512 entries.  The fourth TLB is for
+ * instruction accesses to 8K non-locked translations, is 2 way
+ * assosciative, and holds 128 entries.
+ *
+ * Cheetah has some bug where bogus data can be returned from
+ * ASI_{D,I}TLB_DATA_ACCESS loads, doing the load twice fixes
+ * the problem for me. -DaveM
+ */
+static inline unsigned long cheetah_get_ldtlb_data(int entry)
+{
+	unsigned long data;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %%g0\n\t"
+			     "ldxa	[%1] %2, %0"
+			     : "=r" (data)
+			     : "r" ((0 << 16) | (entry << 3)),
+			     "i" (ASI_DTLB_DATA_ACCESS));
+
+	return data;
+}
+
+static inline unsigned long cheetah_get_litlb_data(int entry)
+{
+	unsigned long data;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %%g0\n\t"
+			     "ldxa	[%1] %2, %0"
+			     : "=r" (data)
+			     : "r" ((0 << 16) | (entry << 3)),
+			     "i" (ASI_ITLB_DATA_ACCESS));
+
+	return data;
+}
+
+static inline unsigned long cheetah_get_ldtlb_tag(int entry)
+{
+	unsigned long tag;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (tag)
+			     : "r" ((0 << 16) | (entry << 3)),
+			     "i" (ASI_DTLB_TAG_READ));
+
+	return tag;
+}
+
+static inline unsigned long cheetah_get_litlb_tag(int entry)
+{
+	unsigned long tag;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (tag)
+			     : "r" ((0 << 16) | (entry << 3)),
+			     "i" (ASI_ITLB_TAG_READ));
+
+	return tag;
+}
+
+static inline void cheetah_put_ldtlb_data(int entry, unsigned long data)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (data),
+			       "r" ((0 << 16) | (entry << 3)),
+			       "i" (ASI_DTLB_DATA_ACCESS));
+}
+
+static inline void cheetah_put_litlb_data(int entry, unsigned long data)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (data),
+			       "r" ((0 << 16) | (entry << 3)),
+			       "i" (ASI_ITLB_DATA_ACCESS));
+}
+
+static inline unsigned long cheetah_get_dtlb_data(int entry, int tlb)
+{
+	unsigned long data;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %%g0\n\t"
+			     "ldxa	[%1] %2, %0"
+			     : "=r" (data)
+			     : "r" ((tlb << 16) | (entry << 3)), "i" (ASI_DTLB_DATA_ACCESS));
+
+	return data;
+}
+
+static inline unsigned long cheetah_get_dtlb_tag(int entry, int tlb)
+{
+	unsigned long tag;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (tag)
+			     : "r" ((tlb << 16) | (entry << 3)), "i" (ASI_DTLB_TAG_READ));
+	return tag;
+}
+
+static inline void cheetah_put_dtlb_data(int entry, unsigned long data, int tlb)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (data),
+			       "r" ((tlb << 16) | (entry << 3)),
+			       "i" (ASI_DTLB_DATA_ACCESS));
+}
+
+static inline unsigned long cheetah_get_itlb_data(int entry)
+{
+	unsigned long data;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %%g0\n\t"
+			     "ldxa	[%1] %2, %0"
+			     : "=r" (data)
+			     : "r" ((2 << 16) | (entry << 3)),
+                               "i" (ASI_ITLB_DATA_ACCESS));
+
+	return data;
+}
+
+static inline unsigned long cheetah_get_itlb_tag(int entry)
+{
+	unsigned long tag;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=r" (tag)
+			     : "r" ((2 << 16) | (entry << 3)), "i" (ASI_ITLB_TAG_READ));
+	return tag;
+}
+
+static inline void cheetah_put_itlb_data(int entry, unsigned long data)
+{
+	__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+			     "membar	#Sync"
+			     : /* No outputs */
+			     : "r" (data), "r" ((2 << 16) | (entry << 3)),
+			       "i" (ASI_ITLB_DATA_ACCESS));
+}
+
+#endif /* !(__ASSEMBLY__) */
+#endif /* CONFIG_SPARC64 */
+#endif /* !(_SPARC64_SPITFIRE_H) */
diff --git a/arch/sparc/include/asm/stacktrace.h b/arch/sparc/include/asm/stacktrace.h
new file mode 100644
index 0000000..556ec5d
--- /dev/null
+++ b/arch/sparc/include/asm/stacktrace.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_STACKTRACE_H
+#define _SPARC64_STACKTRACE_H
+
+void stack_trace_flush(void);
+
+#endif /* _SPARC64_STACKTRACE_H */
diff --git a/arch/sparc/include/asm/starfire.h b/arch/sparc/include/asm/starfire.h
new file mode 100644
index 0000000..fb1a8c4
--- /dev/null
+++ b/arch/sparc/include/asm/starfire.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * starfire.h: Group all starfire specific code together.
+ *
+ * Copyright (C) 2000 Anton Blanchard (anton@samba.org)
+ */
+
+#ifndef _SPARC64_STARFIRE_H
+#define _SPARC64_STARFIRE_H
+
+#ifndef __ASSEMBLY__
+
+extern int this_is_starfire;
+
+void check_if_starfire(void);
+void starfire_hookup(int);
+unsigned int starfire_translate(unsigned long imap, unsigned int upaid);
+
+#endif
+#endif
diff --git a/arch/sparc/include/asm/string.h b/arch/sparc/include/asm/string.h
new file mode 100644
index 0000000..3d9cd08
--- /dev/null
+++ b/arch/sparc/include/asm/string.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_STRING_H
+#define ___ASM_SPARC_STRING_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/string_64.h>
+#else
+#include <asm/string_32.h>
+#endif
+
+/* First the mem*() things. */
+#define __HAVE_ARCH_MEMMOVE
+void *memmove(void *, const void *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMCPY
+#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
+
+#define __HAVE_ARCH_MEMSET
+#define memset(s, c, count) __builtin_memset(s, c, count)
+
+#define __HAVE_ARCH_MEMSCAN
+
+#define memscan(__arg0, __char, __arg2)						\
+({										\
+	void *__memscan_zero(void *, size_t);					\
+	void *__memscan_generic(void *, int, size_t);				\
+	void *__retval, *__addr = (__arg0);					\
+	size_t __size = (__arg2);						\
+										\
+	if(__builtin_constant_p(__char) && !(__char))				\
+		__retval = __memscan_zero(__addr, __size);			\
+	else									\
+		__retval = __memscan_generic(__addr, (__char), __size);		\
+										\
+	__retval;								\
+})
+
+#define __HAVE_ARCH_MEMCMP
+int memcmp(const void *,const void *,__kernel_size_t);
+
+#define __HAVE_ARCH_STRNCMP
+int strncmp(const char *, const char *, __kernel_size_t);
+
+#endif
diff --git a/arch/sparc/include/asm/string_32.h b/arch/sparc/include/asm/string_32.h
new file mode 100644
index 0000000..f488946
--- /dev/null
+++ b/arch/sparc/include/asm/string_32.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * string.h: External definitions for optimized assembly string
+ *           routines for the Linux Kernel.
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#ifndef __SPARC_STRING_H__
+#define __SPARC_STRING_H__
+
+#include <asm/page.h>
+
+#endif /* !(__SPARC_STRING_H__) */
diff --git a/arch/sparc/include/asm/string_64.h b/arch/sparc/include/asm/string_64.h
new file mode 100644
index 0000000..ee9ba67
--- /dev/null
+++ b/arch/sparc/include/asm/string_64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * string.h: External definitions for optimized assembly string
+ *           routines for the Linux Kernel.
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997,1999 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#ifndef __SPARC64_STRING_H__
+#define __SPARC64_STRING_H__
+
+#include <asm/asi.h>
+
+/* Now the str*() stuff... */
+#define __HAVE_ARCH_STRLEN
+__kernel_size_t strlen(const char *);
+
+#endif /* !(__SPARC64_STRING_H__) */
diff --git a/arch/sparc/include/asm/sunbpp.h b/arch/sparc/include/asm/sunbpp.h
new file mode 100644
index 0000000..55de4da
--- /dev/null
+++ b/arch/sparc/include/asm/sunbpp.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * include/asm/sunbpp.h
+ */
+
+#ifndef _ASM_SPARC_SUNBPP_H
+#define _ASM_SPARC_SUNBPP_H
+
+struct bpp_regs {
+  /* DMA registers */
+  __volatile__ __u32 p_csr;		/* DMA Control/Status Register */
+  __volatile__ __u32 p_addr;		/* Address Register */
+  __volatile__ __u32 p_bcnt;		/* Byte Count Register */
+  __volatile__ __u32 p_tst_csr;		/* Test Control/Status (DMA2 only) */
+  /* Parallel Port registers */
+  __volatile__ __u16 p_hcr;		/* Hardware Configuration Register */
+  __volatile__ __u16 p_ocr;		/* Operation Configuration Register */
+  __volatile__ __u8 p_dr;		/* Parallel Data Register */
+  __volatile__ __u8 p_tcr;		/* Transfer Control Register */
+  __volatile__ __u8 p_or;		/* Output Register */
+  __volatile__ __u8 p_ir;		/* Input Register */
+  __volatile__ __u16 p_icr;		/* Interrupt Control Register */
+};
+
+/* P_HCR. Time is in increments of SBus clock. */
+#define P_HCR_TEST      0x8000      /* Allows buried counters to be read */
+#define P_HCR_DSW       0x7f00      /* Data strobe width (in ticks) */
+#define P_HCR_DDS       0x007f      /* Data setup before strobe (in ticks) */
+
+/* P_OCR. */
+#define P_OCR_MEM_CLR   0x8000
+#define P_OCR_DATA_SRC  0x4000      /* )                  */
+#define P_OCR_DS_DSEL   0x2000      /* )  Bidirectional      */
+#define P_OCR_BUSY_DSEL 0x1000      /* )    selects            */
+#define P_OCR_ACK_DSEL  0x0800      /* )                  */
+#define P_OCR_EN_DIAG   0x0400
+#define P_OCR_BUSY_OP   0x0200      /* Busy operation */
+#define P_OCR_ACK_OP    0x0100      /* Ack operation */
+#define P_OCR_SRST      0x0080      /* Reset state machines. Not selfcleaning. */
+#define P_OCR_IDLE      0x0008      /* PP data transfer state machine is idle */
+#define P_OCR_V_ILCK    0x0002      /* Versatec faded. Zebra only. */
+#define P_OCR_EN_VER    0x0001      /* Enable Versatec (0 - enable). Zebra only. */
+
+/* P_TCR */
+#define P_TCR_DIR       0x08
+#define P_TCR_BUSY      0x04
+#define P_TCR_ACK       0x02
+#define P_TCR_DS        0x01        /* Strobe */
+
+/* P_OR */
+#define P_OR_V3         0x20        /* )                 */
+#define P_OR_V2         0x10        /* ) on Zebra only   */
+#define P_OR_V1         0x08        /* )                 */
+#define P_OR_INIT       0x04
+#define P_OR_AFXN       0x02        /* Auto Feed */
+#define P_OR_SLCT_IN    0x01
+
+/* P_IR */
+#define P_IR_PE         0x04
+#define P_IR_SLCT       0x02
+#define P_IR_ERR        0x01
+
+/* P_ICR */
+#define P_DS_IRQ        0x8000      /* RW1  */
+#define P_ACK_IRQ       0x4000      /* RW1  */
+#define P_BUSY_IRQ      0x2000      /* RW1  */
+#define P_PE_IRQ        0x1000      /* RW1  */
+#define P_SLCT_IRQ      0x0800      /* RW1  */
+#define P_ERR_IRQ       0x0400      /* RW1  */
+#define P_DS_IRQ_EN     0x0200      /* RW   Always on rising edge */
+#define P_ACK_IRQ_EN    0x0100      /* RW   Always on rising edge */
+#define P_BUSY_IRP      0x0080      /* RW   1= rising edge */
+#define P_BUSY_IRQ_EN   0x0040      /* RW   */
+#define P_PE_IRP        0x0020      /* RW   1= rising edge */
+#define P_PE_IRQ_EN     0x0010      /* RW   */
+#define P_SLCT_IRP      0x0008      /* RW   1= rising edge */
+#define P_SLCT_IRQ_EN   0x0004      /* RW   */
+#define P_ERR_IRP       0x0002      /* RW1  1= rising edge */
+#define P_ERR_IRQ_EN    0x0001      /* RW   */
+
+#endif /* !(_ASM_SPARC_SUNBPP_H) */
diff --git a/arch/sparc/include/asm/swift.h b/arch/sparc/include/asm/swift.h
new file mode 100644
index 0000000..96f6526
--- /dev/null
+++ b/arch/sparc/include/asm/swift.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* swift.h: Specific definitions for the _broken_ Swift SRMMU
+ *          MMU module.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_SWIFT_H
+#define _SPARC_SWIFT_H
+
+/* Swift is so brain damaged, here is the mmu control register. */
+#define SWIFT_ST       0x00800000   /* SW tablewalk enable */
+#define SWIFT_WP       0x00400000   /* Watchpoint enable   */
+
+/* Branch folding (buggy, disable on production systems!)  */
+#define SWIFT_BF       0x00200000
+#define SWIFT_PMC      0x00180000   /* Page mode control   */
+#define SWIFT_PE       0x00040000   /* Parity enable       */
+#define SWIFT_PC       0x00020000   /* Parity control      */
+#define SWIFT_AP       0x00010000   /* Graphics page mode control (TCX/SX) */
+#define SWIFT_AC       0x00008000   /* Alternate Cacheability (see viking.h) */
+#define SWIFT_BM       0x00004000   /* Boot mode */
+#define SWIFT_RC       0x00003c00   /* DRAM refresh control */
+#define SWIFT_IE       0x00000200   /* Instruction cache enable */
+#define SWIFT_DE       0x00000100   /* Data cache enable */
+#define SWIFT_SA       0x00000080   /* Store Allocate */
+#define SWIFT_NF       0x00000002   /* No fault mode */
+#define SWIFT_EN       0x00000001   /* MMU enable */
+
+/* Bits [13:5] select one of 512 instruction cache tags */
+static inline void swift_inv_insn_tag(unsigned long addr)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (addr), "i" (ASI_M_TXTC_TAG)
+			     : "memory");
+}
+
+/* Bits [12:4] select one of 512 data cache tags */
+static inline void swift_inv_data_tag(unsigned long addr)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (addr), "i" (ASI_M_DATAC_TAG)
+			     : "memory");
+}
+
+static inline void swift_flush_dcache(void)
+{
+	unsigned long addr;
+
+	for (addr = 0; addr < 0x2000; addr += 0x10)
+		swift_inv_data_tag(addr);
+}
+
+static inline void swift_flush_icache(void)
+{
+	unsigned long addr;
+
+	for (addr = 0; addr < 0x4000; addr += 0x20)
+		swift_inv_insn_tag(addr);
+}
+
+static inline void swift_idflash_clear(void)
+{
+	unsigned long addr;
+
+	for (addr = 0; addr < 0x2000; addr += 0x10) {
+		swift_inv_insn_tag(addr<<1);
+		swift_inv_data_tag(addr);
+	}
+}
+
+/* Swift is so broken, it isn't even safe to use the following. */
+static inline void swift_flush_page(unsigned long page)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (page), "i" (ASI_M_FLUSH_PAGE)
+			     : "memory");
+}
+
+static inline void swift_flush_segment(unsigned long addr)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (addr), "i" (ASI_M_FLUSH_SEG)
+			     : "memory");
+}
+
+static inline void swift_flush_region(unsigned long addr)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (addr), "i" (ASI_M_FLUSH_REGION)
+			     : "memory");
+}
+
+static inline void swift_flush_context(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t"
+			     : /* no outputs */
+			     : "i" (ASI_M_FLUSH_CTX)
+			     : "memory");
+}
+
+#endif /* !(_SPARC_SWIFT_H) */
diff --git a/arch/sparc/include/asm/switch_to.h b/arch/sparc/include/asm/switch_to.h
new file mode 100644
index 0000000..7cf1c5d
--- /dev/null
+++ b/arch/sparc/include/asm/switch_to.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_SWITCH_TO_H
+#define ___ASM_SPARC_SWITCH_TO_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/switch_to_64.h>
+#else
+#include <asm/switch_to_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/switch_to_32.h b/arch/sparc/include/asm/switch_to_32.h
new file mode 100644
index 0000000..42eeafc
--- /dev/null
+++ b/arch/sparc/include/asm/switch_to_32.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_SWITCH_TO_H
+#define __SPARC_SWITCH_TO_H
+
+#include <asm/smp.h>
+
+extern struct thread_info *current_set[NR_CPUS];
+
+/*
+ * Flush windows so that the VM switch which follows
+ * would not pull the stack from under us.
+ *
+ * SWITCH_ENTER and SWITCH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
+ * XXX WTF is the above comment? Found in late teen 2.4.x.
+ */
+#ifdef CONFIG_SMP
+#define SWITCH_ENTER(prv) \
+	do {			\
+	if (test_tsk_thread_flag(prv, TIF_USEDFPU)) { \
+		put_psr(get_psr() | PSR_EF); \
+		fpsave(&(prv)->thread.float_regs[0], &(prv)->thread.fsr, \
+		       &(prv)->thread.fpqueue[0], &(prv)->thread.fpqdepth); \
+		clear_tsk_thread_flag(prv, TIF_USEDFPU); \
+		(prv)->thread.kregs->psr &= ~PSR_EF; \
+	} \
+	} while(0)
+
+#define SWITCH_DO_LAZY_FPU(next)	/* */
+#else
+#define SWITCH_ENTER(prv)		/* */
+#define SWITCH_DO_LAZY_FPU(nxt)	\
+	do {			\
+	if (last_task_used_math != (nxt))		\
+		(nxt)->thread.kregs->psr&=~PSR_EF;	\
+	} while(0)
+#endif
+
+#define prepare_arch_switch(next) do { \
+	__asm__ __volatile__( \
+	".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
+	"save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
+	"save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
+	"save %sp, -0x40, %sp\n\t" \
+	"restore; restore; restore; restore; restore; restore; restore"); \
+} while(0)
+
+	/* Much care has gone into this code, do not touch it.
+	 *
+	 * We need to loadup regs l0/l1 for the newly forked child
+	 * case because the trap return path relies on those registers
+	 * holding certain values, gcc is told that they are clobbered.
+	 * Gcc needs registers for 3 values in and 1 value out, so we
+	 * clobber every non-fixed-usage register besides l2/l3/o4/o5.  -DaveM
+	 *
+	 * Hey Dave, that do not touch sign is too much of an incentive
+	 * - Anton & Pete
+	 */
+#define switch_to(prev, next, last) do {						\
+	SWITCH_ENTER(prev);								\
+	SWITCH_DO_LAZY_FPU(next);							\
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next->active_mm));		\
+	__asm__ __volatile__(								\
+	"sethi	%%hi(here - 0x8), %%o7\n\t"						\
+	"mov	%%g6, %%g3\n\t"								\
+	"or	%%o7, %%lo(here - 0x8), %%o7\n\t"					\
+	"rd	%%psr, %%g4\n\t"							\
+	"std	%%sp, [%%g6 + %4]\n\t"							\
+	"rd	%%wim, %%g5\n\t"							\
+	"wr	%%g4, 0x20, %%psr\n\t"							\
+	"nop\n\t"									\
+	"std	%%g4, [%%g6 + %3]\n\t"							\
+	"ldd	[%2 + %3], %%g4\n\t"							\
+	"mov	%2, %%g6\n\t"								\
+	".globl	patchme_store_new_current\n"						\
+"patchme_store_new_current:\n\t"							\
+	"st	%2, [%1]\n\t"								\
+	"wr	%%g4, 0x20, %%psr\n\t"							\
+	"nop\n\t"									\
+	"nop\n\t"									\
+	"nop\n\t"	/* LEON needs all 3 nops: load to %sp depends on CWP. */		\
+	"ldd	[%%g6 + %4], %%sp\n\t"							\
+	"wr	%%g5, 0x0, %%wim\n\t"							\
+	"ldd	[%%sp + 0x00], %%l0\n\t"						\
+	"ldd	[%%sp + 0x38], %%i6\n\t"						\
+	"wr	%%g4, 0x0, %%psr\n\t"							\
+	"nop\n\t"									\
+	"nop\n\t"									\
+	"jmpl	%%o7 + 0x8, %%g0\n\t"							\
+	" ld	[%%g3 + %5], %0\n\t"							\
+	"here:\n"									\
+        : "=&r" (last)									\
+        : "r" (&(current_set[hard_smp_processor_id()])),	\
+	  "r" (task_thread_info(next)),				\
+	  "i" (TI_KPSR),					\
+	  "i" (TI_KSP),						\
+	  "i" (TI_TASK)						\
+	:       "g1", "g2", "g3", "g4", "g5",       "g7",	\
+	  "l0", "l1",       "l3", "l4", "l5", "l6", "l7",	\
+	  "i0", "i1", "i2", "i3", "i4", "i5",			\
+	  "o0", "o1", "o2", "o3",                   "o7");	\
+	} while(0)
+
+void fpsave(unsigned long *fpregs, unsigned long *fsr,
+	    void *fpqueue, unsigned long *fpqdepth);
+void synchronize_user_stack(void);
+
+#endif /* __SPARC_SWITCH_TO_H */
diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h
new file mode 100644
index 0000000..b1d4e2e
--- /dev/null
+++ b/arch/sparc/include/asm/switch_to_64.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC64_SWITCH_TO_64_H
+#define __SPARC64_SWITCH_TO_64_H
+
+#include <asm/visasm.h>
+
+#define prepare_arch_switch(next)		\
+do {						\
+	flushw_all();				\
+} while (0)
+
+	/* See what happens when you design the chip correctly?
+	 *
+	 * We tell gcc we clobber all non-fixed-usage registers except
+	 * for l0/l1.  It will use one for 'next' and the other to hold
+	 * the output value of 'last'.  'next' is not referenced again
+	 * past the invocation of switch_to in the scheduler, so we need
+	 * not preserve it's value.  Hairy, but it lets us remove 2 loads
+	 * and 2 stores in this critical code path.  -DaveM
+	 */
+#define switch_to(prev, next, last)					\
+do {	save_and_clear_fpu();						\
+	/* If you are tempted to conditionalize the following */	\
+	/* so that ASI is only written if it changes, think again. */	\
+	__asm__ __volatile__("wr %%g0, %0, %%asi"			\
+	: : "r" (task_thread_info(next)->current_ds));\
+	trap_block[current_thread_info()->cpu].thread =			\
+		task_thread_info(next);					\
+	__asm__ __volatile__(						\
+	"mov	%%g4, %%g7\n\t"						\
+	"stx	%%i6, [%%sp + 2047 + 0x70]\n\t"				\
+	"stx	%%i7, [%%sp + 2047 + 0x78]\n\t"				\
+	"rdpr	%%wstate, %%o5\n\t"					\
+	"stx	%%o6, [%%g6 + %6]\n\t"					\
+	"stb	%%o5, [%%g6 + %5]\n\t"					\
+	"rdpr	%%cwp, %%o5\n\t"					\
+	"stb	%%o5, [%%g6 + %8]\n\t"					\
+	"wrpr	%%g0, 15, %%pil\n\t"					\
+	"mov	%4, %%g6\n\t"						\
+	"ldub	[%4 + %8], %%g1\n\t"					\
+	"wrpr	%%g1, %%cwp\n\t"					\
+	"ldx	[%%g6 + %6], %%o6\n\t"					\
+	"ldub	[%%g6 + %5], %%o5\n\t"					\
+	"ldub	[%%g6 + %7], %%o7\n\t"					\
+	"wrpr	%%o5, 0x0, %%wstate\n\t"				\
+	"ldx	[%%sp + 2047 + 0x70], %%i6\n\t"				\
+	"ldx	[%%sp + 2047 + 0x78], %%i7\n\t"				\
+	"ldx	[%%g6 + %9], %%g4\n\t"					\
+	"wrpr	%%g0, 14, %%pil\n\t"					\
+	"brz,pt %%o7, switch_to_pc\n\t"					\
+	" mov	%%g7, %0\n\t"						\
+	"sethi	%%hi(ret_from_fork), %%g1\n\t"				\
+	"jmpl	%%g1 + %%lo(ret_from_fork), %%g0\n\t"			\
+	" nop\n\t"							\
+	".globl switch_to_pc\n\t"					\
+	"switch_to_pc:\n\t"						\
+	: "=&r" (last), "=r" (current), "=r" (current_thread_info_reg),	\
+	  "=r" (__local_per_cpu_offset)					\
+	: "0" (task_thread_info(next)),					\
+	  "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_NEW_CHILD),            \
+	  "i" (TI_CWP), "i" (TI_TASK)					\
+	: "cc",								\
+	        "g1", "g2", "g3",                   "g7",		\
+	        "l1", "l2", "l3", "l4", "l5", "l6", "l7",		\
+	  "i0", "i1", "i2", "i3", "i4", "i5",				\
+	  "o0", "o1", "o2", "o3", "o4", "o5",       "o7");		\
+} while(0)
+
+void synchronize_user_stack(void);
+struct pt_regs;
+void fault_in_user_windows(struct pt_regs *);
+
+#endif /* __SPARC64_SWITCH_TO_64_H */
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
new file mode 100644
index 0000000..053989e
--- /dev/null
+++ b/arch/sparc/include/asm/syscall.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SPARC_SYSCALL_H
+#define __ASM_SPARC_SYSCALL_H
+
+#include <uapi/linux/audit.h>
+#include <linux/kernel.h>
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+
+/*
+ * The syscall table always contains 32 bit pointers since we know that the
+ * address of the function to be called is (way) below 4GB.  So the "int"
+ * type here is what we want [need] for both 32 bit and 64 bit systems.
+ */
+extern const unsigned int sys_call_table[];
+
+/* The system call number is given by the user in %g1 */
+static inline long syscall_get_nr(struct task_struct *task,
+				  struct pt_regs *regs)
+{
+	int syscall_p = pt_regs_is_syscall(regs);
+
+	return (syscall_p ? regs->u_regs[UREG_G1] : -1L);
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+				    struct pt_regs *regs)
+{
+	/* XXX This needs some thought.  On Sparc we don't
+	 * XXX save away the original %o0 value somewhere.
+	 * XXX Instead we hold it in register %l5 at the top
+	 * XXX level trap frame and pass this down to the signal
+	 * XXX dispatch code which is the only place that value
+	 * XXX ever was needed.
+	 */
+}
+
+#ifdef CONFIG_SPARC32
+static inline bool syscall_has_error(struct pt_regs *regs)
+{
+	return (regs->psr & PSR_C) ? true : false;
+}
+static inline void syscall_set_error(struct pt_regs *regs)
+{
+	regs->psr |= PSR_C;
+}
+static inline void syscall_clear_error(struct pt_regs *regs)
+{
+	regs->psr &= ~PSR_C;
+}
+#else
+static inline bool syscall_has_error(struct pt_regs *regs)
+{
+	return (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY)) ? true : false;
+}
+static inline void syscall_set_error(struct pt_regs *regs)
+{
+	regs->tstate |= (TSTATE_XCARRY | TSTATE_ICARRY);
+}
+static inline void syscall_clear_error(struct pt_regs *regs)
+{
+	regs->tstate &= ~(TSTATE_XCARRY | TSTATE_ICARRY);
+}
+#endif
+
+static inline long syscall_get_error(struct task_struct *task,
+				     struct pt_regs *regs)
+{
+	long val = regs->u_regs[UREG_I0];
+
+	return (syscall_has_error(regs) ? -val : 0);
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+					    struct pt_regs *regs)
+{
+	long val = regs->u_regs[UREG_I0];
+
+	return val;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+					    struct pt_regs *regs,
+					    int error, long val)
+{
+	if (error) {
+		syscall_set_error(regs);
+		regs->u_regs[UREG_I0] = -error;
+	} else {
+		syscall_clear_error(regs);
+		regs->u_regs[UREG_I0] = val;
+	}
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 unsigned long *args)
+{
+	int zero_extend = 0;
+	unsigned int j;
+
+#ifdef CONFIG_SPARC64
+	if (test_tsk_thread_flag(task, TIF_32BIT))
+		zero_extend = 1;
+#endif
+
+	for (j = 0; j < n; j++) {
+		unsigned long val = regs->u_regs[UREG_I0 + i + j];
+
+		if (zero_extend)
+			args[j] = (u32) val;
+		else
+			args[j] = val;
+	}
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+					 struct pt_regs *regs,
+					 unsigned int i, unsigned int n,
+					 const unsigned long *args)
+{
+	unsigned int j;
+
+	for (j = 0; j < n; j++)
+		regs->u_regs[UREG_I0 + i + j] = args[j];
+}
+
+static inline int syscall_get_arch(void)
+{
+#if defined(CONFIG_SPARC64) && defined(CONFIG_COMPAT)
+	return in_compat_syscall() ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64;
+#elif defined(CONFIG_SPARC64)
+	return AUDIT_ARCH_SPARC64;
+#else
+	return AUDIT_ARCH_SPARC;
+#endif
+}
+
+#endif /* __ASM_SPARC_SYSCALL_H */
diff --git a/arch/sparc/include/asm/syscalls.h b/arch/sparc/include/asm/syscalls.h
new file mode 100644
index 0000000..1d819f5
--- /dev/null
+++ b/arch/sparc/include/asm/syscalls.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_SYSCALLS_H
+#define _SPARC64_SYSCALLS_H
+
+struct pt_regs;
+
+asmlinkage long sparc_do_fork(unsigned long clone_flags,
+			      unsigned long stack_start,
+			      struct pt_regs *regs,
+			      unsigned long stack_size);
+
+#endif /* _SPARC64_SYSCALLS_H */
diff --git a/arch/sparc/include/asm/termbits.h b/arch/sparc/include/asm/termbits.h
new file mode 100644
index 0000000..fa9de4a
--- /dev/null
+++ b/arch/sparc/include/asm/termbits.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_TERMBITS_H
+#define _SPARC_TERMBITS_H
+
+#include <uapi/asm/termbits.h>
+
+#define VMIN     16
+#define VTIME    17
+#endif /* !(_SPARC_TERMBITS_H) */
diff --git a/arch/sparc/include/asm/termios.h b/arch/sparc/include/asm/termios.h
new file mode 100644
index 0000000..4a558ef
--- /dev/null
+++ b/arch/sparc/include/asm/termios.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_TERMIOS_H
+#define _SPARC_TERMIOS_H
+
+#include <uapi/asm/termios.h>
+
+
+/*
+ * c_cc characters in the termio structure.  Oh, how I love being
+ * backwardly compatible.  Notice that character 4 and 5 are
+ * interpreted differently depending on whether ICANON is set in
+ * c_lflag.  If it's set, they are used as _VEOF and _VEOL, otherwise
+ * as _VMIN and V_TIME.  This is for compatibility with OSF/1 (which
+ * is compatible with sysV)...
+ */
+#define _VMIN	4
+#define _VTIME	5
+
+/*	intr=^C		quit=^\		erase=del	kill=^U
+	eof=^D		eol=\0		eol2=\0		sxtc=\0
+	start=^Q	stop=^S		susp=^Z		dsusp=^Y
+	reprint=^R	discard=^U	werase=^W	lnext=^V
+	vmin=\1         vtime=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\000\000\000\021\023\032\031\022\025\027\026\001"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+#define user_termio_to_kernel_termios(termios, termio) \
+({ \
+	unsigned short tmp; \
+	int err; \
+	err = get_user(tmp, &(termio)->c_iflag); \
+	(termios)->c_iflag = (0xffff0000 & ((termios)->c_iflag)) | tmp; \
+	err |= get_user(tmp, &(termio)->c_oflag); \
+	(termios)->c_oflag = (0xffff0000 & ((termios)->c_oflag)) | tmp; \
+	err |= get_user(tmp, &(termio)->c_cflag); \
+	(termios)->c_cflag = (0xffff0000 & ((termios)->c_cflag)) | tmp; \
+	err |= get_user(tmp, &(termio)->c_lflag); \
+	(termios)->c_lflag = (0xffff0000 & ((termios)->c_lflag)) | tmp; \
+	err |= copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
+	err; \
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ *
+ * Note the "fun" _VMIN overloading.
+ */
+#define kernel_termios_to_user_termio(termio, termios) \
+({ \
+	int err; \
+	err  = put_user((termios)->c_iflag, &(termio)->c_iflag); \
+	err |= put_user((termios)->c_oflag, &(termio)->c_oflag); \
+	err |= put_user((termios)->c_cflag, &(termio)->c_cflag); \
+	err |= put_user((termios)->c_lflag, &(termio)->c_lflag); \
+	err |= put_user((termios)->c_line,  &(termio)->c_line); \
+	err |= copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
+	if (!((termios)->c_lflag & ICANON)) { \
+		err |= put_user((termios)->c_cc[VMIN], &(termio)->c_cc[_VMIN]); \
+		err |= put_user((termios)->c_cc[VTIME], &(termio)->c_cc[_VTIME]); \
+	} \
+	err; \
+})
+
+#define user_termios_to_kernel_termios(k, u) \
+({ \
+	int err; \
+	err  = get_user((k)->c_iflag, &(u)->c_iflag); \
+	err |= get_user((k)->c_oflag, &(u)->c_oflag); \
+	err |= get_user((k)->c_cflag, &(u)->c_cflag); \
+	err |= get_user((k)->c_lflag, &(u)->c_lflag); \
+	err |= get_user((k)->c_line,  &(u)->c_line); \
+	err |= copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \
+	if ((k)->c_lflag & ICANON) { \
+		err |= get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
+		err |= get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
+	} else { \
+		err |= get_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
+		err |= get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
+	} \
+	err |= get_user((k)->c_ispeed,  &(u)->c_ispeed); \
+	err |= get_user((k)->c_ospeed,  &(u)->c_ospeed); \
+	err; \
+})
+
+#define kernel_termios_to_user_termios(u, k) \
+({ \
+	int err; \
+	err  = put_user((k)->c_iflag, &(u)->c_iflag); \
+	err |= put_user((k)->c_oflag, &(u)->c_oflag); \
+	err |= put_user((k)->c_cflag, &(u)->c_cflag); \
+	err |= put_user((k)->c_lflag, &(u)->c_lflag); \
+	err |= put_user((k)->c_line, &(u)->c_line); \
+	err |= copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \
+	if (!((k)->c_lflag & ICANON)) { \
+		err |= put_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
+		err |= put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
+	} else { \
+		err |= put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
+		err |= put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
+	} \
+	err |= put_user((k)->c_ispeed, &(u)->c_ispeed); \
+	err |= put_user((k)->c_ospeed, &(u)->c_ospeed); \
+	err; \
+})
+
+#define user_termios_to_kernel_termios_1(k, u) \
+({ \
+	int err; \
+	err  = get_user((k)->c_iflag, &(u)->c_iflag); \
+	err |= get_user((k)->c_oflag, &(u)->c_oflag); \
+	err |= get_user((k)->c_cflag, &(u)->c_cflag); \
+	err |= get_user((k)->c_lflag, &(u)->c_lflag); \
+	err |= get_user((k)->c_line,  &(u)->c_line); \
+	err |= copy_from_user((k)->c_cc, (u)->c_cc, NCCS); \
+	if ((k)->c_lflag & ICANON) { \
+		err |= get_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
+		err |= get_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
+	} else { \
+		err |= get_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
+		err |= get_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
+	} \
+	err; \
+})
+
+#define kernel_termios_to_user_termios_1(u, k) \
+({ \
+	int err; \
+	err  = put_user((k)->c_iflag, &(u)->c_iflag); \
+	err |= put_user((k)->c_oflag, &(u)->c_oflag); \
+	err |= put_user((k)->c_cflag, &(u)->c_cflag); \
+	err |= put_user((k)->c_lflag, &(u)->c_lflag); \
+	err |= put_user((k)->c_line, &(u)->c_line); \
+	err |= copy_to_user((u)->c_cc, (k)->c_cc, NCCS); \
+	if (!((k)->c_lflag & ICANON)) { \
+		err |= put_user((k)->c_cc[VMIN],  &(u)->c_cc[_VMIN]); \
+		err |= put_user((k)->c_cc[VTIME], &(u)->c_cc[_VTIME]); \
+	} else { \
+		err |= put_user((k)->c_cc[VEOF], &(u)->c_cc[VEOF]); \
+		err |= put_user((k)->c_cc[VEOL], &(u)->c_cc[VEOL]); \
+	} \
+	err; \
+})
+
+#endif /* _SPARC_TERMIOS_H */
diff --git a/arch/sparc/include/asm/thread_info.h b/arch/sparc/include/asm/thread_info.h
new file mode 100644
index 0000000..740b174
--- /dev/null
+++ b/arch/sparc/include/asm/thread_info.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_THREAD_INFO_H
+#define ___ASM_SPARC_THREAD_INFO_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/thread_info_64.h>
+#else
+#include <asm/thread_info_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
new file mode 100644
index 0000000..548b366
--- /dev/null
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * thread_info.h: sparc low-level thread information
+ * adapted from the ppc version by Pete Zaitcev, which was
+ * adapted from the i386 version by Paul Mackerras
+ *
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * Copyright (c) 2002  Pete Zaitcev (zaitcev@yahoo.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+
+#include <asm/ptrace.h>
+#include <asm/page.h>
+
+/*
+ * Low level task data.
+ *
+ * If you change this, change the TI_* offsets below to match.
+ */
+#define NSWINS 8
+struct thread_info {
+	unsigned long		uwinmask;
+	struct task_struct	*task;		/* main task structure */
+	unsigned long		flags;		/* low level flags */
+	int			cpu;		/* cpu we're on */
+	int			preempt_count;	/* 0 => preemptable,
+						   <0 => BUG */
+	int			softirq_count;
+	int			hardirq_count;
+
+	u32 __unused;
+
+	/* Context switch saved kernel state. */
+	unsigned long ksp;	/* ... ksp __attribute__ ((aligned (8))); */
+	unsigned long kpc;
+	unsigned long kpsr;
+	unsigned long kwim;
+
+	/* A place to store user windows and stack pointers
+	 * when the stack needs inspection.
+	 */
+	struct reg_window32	reg_window[NSWINS];	/* align for ldd! */
+	unsigned long		rwbuf_stkptrs[NSWINS];
+	unsigned long		w_saved;
+};
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#define INIT_THREAD_INFO(tsk)				\
+{							\
+	.uwinmask	=	0,			\
+	.task		=	&tsk,			\
+	.flags		=	0,			\
+	.cpu		=	0,			\
+	.preempt_count	=	INIT_PREEMPT_COUNT,	\
+}
+
+/* how to get the thread information struct from C */
+register struct thread_info *current_thread_info_reg asm("g6");
+#define current_thread_info()   (current_thread_info_reg)
+
+/*
+ * thread information allocation
+ */
+#define THREAD_SIZE_ORDER  1
+
+#endif /* __ASSEMBLY__ */
+
+/* Size of kernel stack for each process */
+#define THREAD_SIZE		(2 * PAGE_SIZE)
+
+/*
+ * Offsets in thread_info structure, used in assembly code
+ * The "#define REGWIN_SZ 0x40" was abolished, so no multiplications.
+ */
+#define TI_UWINMASK	0x00	/* uwinmask */
+#define TI_TASK		0x04
+#define TI_FLAGS	0x08
+#define TI_CPU		0x0c
+#define TI_PREEMPT	0x10	/* preempt_count */
+#define TI_SOFTIRQ	0x14	/* softirq_count */
+#define TI_HARDIRQ	0x18	/* hardirq_count */
+#define TI_KSP		0x20	/* ksp */
+#define TI_KPC		0x24	/* kpc (ldd'ed with kpc) */
+#define TI_KPSR		0x28	/* kpsr */
+#define TI_KWIM		0x2c	/* kwim (ldd'ed with kpsr) */
+#define TI_REG_WINDOW	0x30
+#define TI_RWIN_SPTRS	0x230
+#define TI_W_SAVED	0x250
+
+/*
+ * thread information flag bit numbers
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+#define TIF_RESTORE_SIGMASK	4	/* restore signal mask in do_signal() */
+#define TIF_USEDFPU		8	/* FPU was used by this task
+					 * this quantum (SMP) */
+#define TIF_POLLING_NRFLAG	9	/* true if poll_idle() is polling
+					 * TIF_NEED_RESCHED */
+#define TIF_MEMDIE		10	/* is terminating due to OOM killer */
+
+/* as above, but as bit values */
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_USEDFPU		(1<<TIF_USEDFPU)
+#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+
+#define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | \
+					 _TIF_SIGPENDING)
+
+#define is_32bit_task()	(1)
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
new file mode 100644
index 0000000..7fb6763
--- /dev/null
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* thread_info.h: sparc64 low-level thread information
+ *
+ * Copyright (C) 2002  David S. Miller (davem@redhat.com)
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#define NSWINS		7
+
+#define TI_FLAG_BYTE_FAULT_CODE		0
+#define TI_FLAG_FAULT_CODE_SHIFT	56
+#define TI_FLAG_BYTE_WSTATE		1
+#define TI_FLAG_WSTATE_SHIFT		48
+#define TI_FLAG_BYTE_NOERROR		2
+#define TI_FLAG_BYTE_NOERROR_SHIFT	40
+#define TI_FLAG_BYTE_FPDEPTH		3
+#define TI_FLAG_FPDEPTH_SHIFT		32
+#define TI_FLAG_BYTE_CWP		4
+#define TI_FLAG_CWP_SHIFT		24
+#define TI_FLAG_BYTE_WSAVED		5
+#define TI_FLAG_WSAVED_SHIFT		16
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+
+#include <asm/ptrace.h>
+#include <asm/types.h>
+
+struct task_struct;
+
+struct thread_info {
+	/* D$ line 1 */
+	struct task_struct	*task;
+	unsigned long		flags;
+	__u8			fpsaved[7];
+	__u8			status;
+	unsigned long		ksp;
+
+	/* D$ line 2 */
+	unsigned long		fault_address;
+	struct pt_regs		*kregs;
+	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
+	__u8			new_child;
+	__u8			current_ds;
+	__u16			cpu;
+
+	unsigned long		*utraps;
+
+	struct reg_window 	reg_window[NSWINS];
+	unsigned long 		rwbuf_stkptrs[NSWINS];
+
+	unsigned long		gsr[7];
+	unsigned long		xfsr[7];
+
+	struct pt_regs		*kern_una_regs;
+	unsigned int		kern_una_insn;
+
+	unsigned long		fpregs[(7 * 256) / sizeof(unsigned long)]
+		__attribute__ ((aligned(64)));
+};
+
+#endif /* !(__ASSEMBLY__) */
+
+/* offsets into the thread_info struct for assembly code access */
+#define TI_TASK		0x00000000
+#define TI_FLAGS	0x00000008
+#define TI_FAULT_CODE	(TI_FLAGS + TI_FLAG_BYTE_FAULT_CODE)
+#define TI_WSTATE	(TI_FLAGS + TI_FLAG_BYTE_WSTATE)
+#define TI_CWP		(TI_FLAGS + TI_FLAG_BYTE_CWP)
+#define TI_FPDEPTH	(TI_FLAGS + TI_FLAG_BYTE_FPDEPTH)
+#define TI_WSAVED	(TI_FLAGS + TI_FLAG_BYTE_WSAVED)
+#define TI_SYS_NOERROR	(TI_FLAGS + TI_FLAG_BYTE_NOERROR)
+#define TI_FPSAVED	0x00000010
+#define TI_KSP		0x00000018
+#define TI_FAULT_ADDR	0x00000020
+#define TI_KREGS	0x00000028
+#define TI_PRE_COUNT	0x00000030
+#define TI_NEW_CHILD	0x00000034
+#define TI_CURRENT_DS	0x00000035
+#define TI_CPU		0x00000036
+#define TI_UTRAPS	0x00000038
+#define TI_REG_WINDOW	0x00000040
+#define TI_RWIN_SPTRS	0x000003c0
+#define TI_GSR		0x000003f8
+#define TI_XFSR		0x00000430
+#define TI_KUNA_REGS	0x00000468
+#define TI_KUNA_INSN	0x00000470
+#define TI_FPREGS	0x00000480
+
+/* We embed this in the uppermost byte of thread_info->flags */
+#define FAULT_CODE_WRITE	0x01	/* Write access, implies D-TLB	   */
+#define FAULT_CODE_DTLB		0x02	/* Miss happened in D-TLB	   */
+#define FAULT_CODE_ITLB		0x04	/* Miss happened in I-TLB	   */
+#define FAULT_CODE_WINFIXUP	0x08	/* Miss happened during spill/fill */
+#define FAULT_CODE_BLKCOMMIT	0x10	/* Use blk-commit ASI in copy_page */
+#define	FAULT_CODE_BAD_RA	0x20	/* Bad RA for sun4v		   */
+
+#if PAGE_SHIFT == 13
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define THREAD_SHIFT (PAGE_SHIFT + 1)
+#else /* PAGE_SHIFT == 13 */
+#define THREAD_SIZE PAGE_SIZE
+#define THREAD_SHIFT PAGE_SHIFT
+#endif /* PAGE_SHIFT == 13 */
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk)				\
+{							\
+	.task		=	&tsk,			\
+	.current_ds	=	ASI_P,			\
+	.preempt_count	=	INIT_PREEMPT_COUNT,	\
+}
+
+/* how to get the thread information struct from C */
+register struct thread_info *current_thread_info_reg asm("g6");
+#define current_thread_info()	(current_thread_info_reg)
+
+/* thread information allocation */
+#if PAGE_SHIFT == 13
+#define THREAD_SIZE_ORDER	1
+#else /* PAGE_SHIFT == 13 */
+#define THREAD_SIZE_ORDER	0
+#endif /* PAGE_SHIFT == 13 */
+
+#define __thread_flag_byte_ptr(ti)	\
+	((unsigned char *)(&((ti)->flags)))
+#define __cur_thread_flag_byte_ptr	__thread_flag_byte_ptr(current_thread_info())
+
+#define get_thread_fault_code()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FAULT_CODE])
+#define set_thread_fault_code(val)	(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FAULT_CODE] = (val))
+#define get_thread_wstate()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSTATE])
+#define set_thread_wstate(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSTATE] = (val))
+#define get_thread_cwp()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP])
+#define set_thread_cwp(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP] = (val))
+#define get_thread_noerror()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_NOERROR])
+#define set_thread_noerror(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_NOERROR] = (val))
+#define get_thread_fpdepth()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH])
+#define set_thread_fpdepth(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH] = (val))
+#define get_thread_wsaved()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED])
+#define set_thread_wsaved(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED] = (val))
+#endif /* !(__ASSEMBLY__) */
+
+/*
+ * Thread information flags, only 16 bits are available as we encode
+ * other values into the upper 6 bytes.
+ *
+ * On trap return we need to test several values:
+ *
+ * user:	need_resched, notify_resume, sigpending, wsaved
+ * kernel:	fpdepth
+ *
+ * So to check for work in the kernel case we simply load the fpdepth
+ * byte out of the flags and test it.  For the user case we encode the
+ * lower 3 bytes of flags as follows:
+ *	----------------------------------------
+ *	| wsaved | flags byte 1 | flags byte 2 |
+ *	----------------------------------------
+ * This optimizes the user test into:
+ *	ldx		[%g6 + TI_FLAGS], REG1
+ *	sethi		%hi(_TIF_USER_WORK_MASK), REG2
+ *	or		REG2, %lo(_TIF_USER_WORK_MASK), REG2
+ *	andcc		REG1, REG2, %g0
+ *	be,pt		no_work_to_do
+ *	 nop
+ */
+#define TIF_SYSCALL_TRACE	0	/* syscall trace active */
+#define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
+#define TIF_SIGPENDING		2	/* signal pending */
+#define TIF_NEED_RESCHED	3	/* rescheduling necessary */
+/* flag bit 4 is available */
+#define TIF_UNALIGNED		5	/* allowed to do unaligned accesses */
+#define TIF_UPROBE		6	/* breakpointed or singlestepped */
+#define TIF_32BIT		7	/* 32-bit binary */
+#define TIF_NOHZ		8	/* in adaptive nohz mode */
+#define TIF_SECCOMP		9	/* secure computing */
+#define TIF_SYSCALL_AUDIT	10	/* syscall auditing active */
+#define TIF_SYSCALL_TRACEPOINT	11	/* syscall tracepoint instrumentation */
+/* NOTE: Thread flags >= 12 should be ones we have no interest
+ *       in using in assembly, else we can't use the mask as
+ *       an immediate value in instructions such as andcc.
+ */
+#define TIF_MCDPER		12	/* Precise MCD exception */
+#define TIF_MEMDIE		13	/* is terminating due to OOM killer */
+#define TIF_POLLING_NRFLAG	14
+
+#define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+#define _TIF_UNALIGNED		(1<<TIF_UNALIGNED)
+#define _TIF_UPROBE		(1<<TIF_UPROBE)
+#define _TIF_32BIT		(1<<TIF_32BIT)
+#define _TIF_NOHZ		(1<<TIF_NOHZ)
+#define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+
+#define _TIF_USER_WORK_MASK	((0xff << TI_FLAG_WSAVED_SHIFT) | \
+				 _TIF_DO_NOTIFY_RESUME_MASK | \
+				 _TIF_NEED_RESCHED)
+#define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | \
+					 _TIF_SIGPENDING | _TIF_UPROBE)
+
+#define is_32bit_task()	(test_thread_flag(TIF_32BIT))
+
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ *
+ * Note that there are only 8 bits available.
+ */
+
+#ifndef __ASSEMBLY__
+
+#define thread32_stack_is_64bit(__SP) (((__SP) & 0x1) != 0)
+#define test_thread_64bit_stack(__SP) \
+	((test_thread_flag(TIF_32BIT) && !thread32_stack_is_64bit(__SP)) ? \
+	 false : true)
+
+#endif	/* !__ASSEMBLY__ */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/sparc/include/asm/timer.h b/arch/sparc/include/asm/timer.h
new file mode 100644
index 0000000..eaf6195
--- /dev/null
+++ b/arch/sparc/include/asm/timer.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_TIMER_H
+#define ___ASM_SPARC_TIMER_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/timer_64.h>
+#else
+#include <asm/timer_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/timer_32.h b/arch/sparc/include/asm/timer_32.h
new file mode 100644
index 0000000..eecd269
--- /dev/null
+++ b/arch/sparc/include/asm/timer_32.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * timer.h:  Definitions for the timer chips on the Sparc.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+
+#ifndef _SPARC_TIMER_H
+#define _SPARC_TIMER_H
+
+#include <linux/clocksource.h>
+#include <linux/irqreturn.h>
+
+#include <asm-generic/percpu.h>
+
+#include <asm/cpu_type.h>  /* For SUN4M_NCPUS */
+
+#define SBUS_CLOCK_RATE   2000000 /* 2MHz */
+#define TIMER_VALUE_SHIFT 9
+#define TIMER_VALUE_MASK  0x3fffff
+#define TIMER_LIMIT_BIT   (1 << 31)  /* Bit 31 in Counter-Timer register */
+
+/* The counter timer register has the value offset by 9 bits.
+ * From sun4m manual:
+ * When a counter reaches the value in the corresponding limit register,
+ * the Limit bit is set and the counter is set to 500 nS (i.e. 0x00000200).
+ *
+ * To compensate for this add one to the value.
+ */
+static inline unsigned int timer_value(unsigned int value)
+{
+	return (value + 1) << TIMER_VALUE_SHIFT;
+}
+
+extern volatile u32 __iomem *master_l10_counter;
+
+irqreturn_t notrace timer_interrupt(int dummy, void *dev_id);
+
+#ifdef CONFIG_SMP
+DECLARE_PER_CPU(struct clock_event_device, sparc32_clockevent);
+void register_percpu_ce(int cpu);
+#endif
+
+#endif /* !(_SPARC_TIMER_H) */
diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h
new file mode 100644
index 0000000..c7e4fb6
--- /dev/null
+++ b/arch/sparc/include/asm/timer_64.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* timer.h: System timer definitions for sun5.
+ *
+ * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _SPARC64_TIMER_H
+#define _SPARC64_TIMER_H
+
+#include <linux/types.h>
+#include <linux/init.h>
+
+/* The most frequently accessed fields should be first,
+ * to fit into the same cacheline.
+ */
+struct sparc64_tick_ops {
+	unsigned long ticks_per_nsec_quotient;
+	unsigned long offset;
+	unsigned long long (*get_tick)(void);
+	int (*add_compare)(unsigned long);
+	unsigned long softint_mask;
+	void (*disable_irq)(void);
+
+	void (*init_tick)(void);
+	unsigned long (*add_tick)(unsigned long);
+	unsigned long (*get_frequency)(void);
+	unsigned long frequency;
+
+	char *name;
+};
+
+extern struct sparc64_tick_ops *tick_ops;
+
+unsigned long sparc64_get_clock_tick(unsigned int cpu);
+void setup_sparc64_timer(void);
+void __init time_init(void);
+
+#define TICK_PRIV_BIT		BIT(63)
+#define TICKCMP_IRQ_BIT		BIT(63)
+
+#define HBIRD_STICKCMP_ADDR	0x1fe0000f060UL
+#define HBIRD_STICK_ADDR	0x1fe0000f070UL
+
+#define GET_TICK_NINSTR		13
+struct get_tick_patch {
+	unsigned int	addr;
+	unsigned int	tick[GET_TICK_NINSTR];
+	unsigned int	stick[GET_TICK_NINSTR];
+};
+
+extern struct get_tick_patch __get_tick_patch;
+extern struct get_tick_patch __get_tick_patch_end;
+
+static inline unsigned long get_tick(void)
+{
+	unsigned long tick, tmp1, tmp2;
+
+	__asm__ __volatile__(
+	/* read hbtick 13 instructions */
+	"661:\n"
+	"	mov	0x1fe, %1\n"
+	"	sllx	%1, 0x20, %1\n"
+	"	sethi	%%hi(0xf000), %2\n"
+	"	or	%2, 0x70, %2\n"
+	"	or	%1, %2, %1\n"	/* %1 = HBIRD_STICK_ADDR */
+	"	add	%1, 8, %2\n"
+	"	ldxa	[%2]%3, %0\n"
+	"	ldxa	[%1]%3, %1\n"
+	"	ldxa	[%2]%3, %2\n"
+	"	sub	%2, %0, %0\n"	/* don't modify %xcc */
+	"	brnz,pn	%0, 661b\n"	/* restart to save one register */
+	"	 sllx	%2, 32, %2\n"
+	"	or	%2, %1, %0\n"
+	/* Common/not patched code */
+	"	sllx	%0, 1, %0\n"
+	"	srlx	%0, 1, %0\n"	/* Clear TICK_PRIV_BIT */
+	/* Beginning of patch section */
+	"	.section .get_tick_patch, \"ax\"\n"
+	"	.word	661b\n"
+	/* read tick 2 instructions and 11 skipped */
+	"	ba	1f\n"
+	"	 rd	%%tick, %0\n"
+	"	.skip	4 * (%4 - 2)\n"
+	"1:\n"
+	/* read stick 2 instructions and 11 skipped */
+	"	ba	1f\n"
+	"	 rd	%%asr24, %0\n"
+	"	.skip	4 * (%4 - 2)\n"
+	"1:\n"
+	/* End of patch section */
+	"	.previous\n"
+	: "=&r" (tick), "=&r" (tmp1), "=&r" (tmp2)
+	: "i" (ASI_PHYS_BYPASS_EC_E), "i" (GET_TICK_NINSTR));
+
+	return tick;
+}
+
+#endif /* _SPARC64_TIMER_H */
diff --git a/arch/sparc/include/asm/timex.h b/arch/sparc/include/asm/timex.h
new file mode 100644
index 0000000..9aac26b
--- /dev/null
+++ b/arch/sparc/include/asm/timex.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_TIMEX_H
+#define ___ASM_SPARC_TIMEX_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/timex_64.h>
+#else
+#include <asm/timex_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/timex_32.h b/arch/sparc/include/asm/timex_32.h
new file mode 100644
index 0000000..542915b
--- /dev/null
+++ b/arch/sparc/include/asm/timex_32.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/include/asm/timex.h
+ *
+ * sparc architecture timex specifications
+ */
+#ifndef _ASMsparc_TIMEX_H
+#define _ASMsparc_TIMEX_H
+
+#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
+
+/* XXX Maybe do something better at some point... -DaveM */
+typedef unsigned long cycles_t;
+#define get_cycles()	(0)
+
+#endif
diff --git a/arch/sparc/include/asm/timex_64.h b/arch/sparc/include/asm/timex_64.h
new file mode 100644
index 0000000..076c44f
--- /dev/null
+++ b/arch/sparc/include/asm/timex_64.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/include/asm/timex.h
+ *
+ * sparc64 architecture timex specifications
+ */
+#ifndef _ASMsparc64_TIMEX_H
+#define _ASMsparc64_TIMEX_H
+
+#include <asm/timer.h>
+
+#define CLOCK_TICK_RATE	1193180 /* Underlying HZ */
+
+/* Getting on the cycle counter on sparc64. */
+typedef unsigned long cycles_t;
+#define get_cycles()	tick_ops->get_tick()
+
+#define ARCH_HAS_READ_CURRENT_TIMER
+
+#endif
diff --git a/arch/sparc/include/asm/tlb.h b/arch/sparc/include/asm/tlb.h
new file mode 100644
index 0000000..7146a57
--- /dev/null
+++ b/arch/sparc/include/asm/tlb.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_TLB_H
+#define ___ASM_SPARC_TLB_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/tlb_64.h>
+#else
+#include <asm/tlb_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/tlb_32.h b/arch/sparc/include/asm/tlb_32.h
new file mode 100644
index 0000000..343cea1
--- /dev/null
+++ b/arch/sparc/include/asm/tlb_32.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_TLB_H
+#define _SPARC_TLB_H
+
+#define tlb_start_vma(tlb, vma) \
+do {								\
+	flush_cache_range(vma, vma->vm_start, vma->vm_end);	\
+} while (0)
+
+#define tlb_end_vma(tlb, vma) \
+do {								\
+	flush_tlb_range(vma, vma->vm_start, vma->vm_end);	\
+} while (0)
+
+#define __tlb_remove_tlb_entry(tlb, pte, address) \
+	do { } while (0)
+
+#define tlb_flush(tlb) \
+do {								\
+	flush_tlb_mm((tlb)->mm);				\
+} while (0)
+
+#include <asm-generic/tlb.h>
+
+#endif /* _SPARC_TLB_H */
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
new file mode 100644
index 0000000..a2f3fa6
--- /dev/null
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_TLB_H
+#define _SPARC64_TLB_H
+
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+
+#ifdef CONFIG_SMP
+void smp_flush_tlb_pending(struct mm_struct *,
+				  unsigned long, unsigned long *);
+#endif
+
+#ifdef CONFIG_SMP
+void smp_flush_tlb_mm(struct mm_struct *mm);
+#define do_flush_tlb_mm(mm) smp_flush_tlb_mm(mm)
+#else
+#define do_flush_tlb_mm(mm) __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT)
+#endif
+
+void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
+void flush_tlb_pending(void);
+
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma)	do { } while (0)
+#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush(tlb)	flush_tlb_pending()
+
+#include <asm-generic/tlb.h>
+
+#endif /* _SPARC64_TLB_H */
diff --git a/arch/sparc/include/asm/tlbflush.h b/arch/sparc/include/asm/tlbflush.h
new file mode 100644
index 0000000..30ae3ea
--- /dev/null
+++ b/arch/sparc/include/asm/tlbflush.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_TLBFLUSH_H
+#define ___ASM_SPARC_TLBFLUSH_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/tlbflush_64.h>
+#else
+#include <asm/tlbflush_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/tlbflush_32.h b/arch/sparc/include/asm/tlbflush_32.h
new file mode 100644
index 0000000..4705319
--- /dev/null
+++ b/arch/sparc/include/asm/tlbflush_32.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_TLBFLUSH_H
+#define _SPARC_TLBFLUSH_H
+
+#include <asm/cachetlb_32.h>
+
+#define flush_tlb_all() \
+	sparc32_cachetlb_ops->tlb_all()
+#define flush_tlb_mm(mm) \
+	sparc32_cachetlb_ops->tlb_mm(mm)
+#define flush_tlb_range(vma, start, end) \
+	sparc32_cachetlb_ops->tlb_range(vma, start, end)
+#define flush_tlb_page(vma, addr) \
+	sparc32_cachetlb_ops->tlb_page(vma, addr)
+
+/*
+ * This is a kludge, until I know better. --zaitcev XXX
+ */
+static inline void flush_tlb_kernel_range(unsigned long start,
+					  unsigned long end)
+{
+	flush_tlb_all();
+}
+
+#endif /* _SPARC_TLBFLUSH_H */
diff --git a/arch/sparc/include/asm/tlbflush_64.h b/arch/sparc/include/asm/tlbflush_64.h
new file mode 100644
index 0000000..8b8cdaa
--- /dev/null
+++ b/arch/sparc/include/asm/tlbflush_64.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_TLBFLUSH_H
+#define _SPARC64_TLBFLUSH_H
+
+#include <asm/mmu_context.h>
+
+/* TSB flush operations. */
+
+#define TLB_BATCH_NR	192
+
+struct tlb_batch {
+	unsigned int hugepage_shift;
+	struct mm_struct *mm;
+	unsigned long tlb_nr;
+	unsigned long active;
+	unsigned long vaddrs[TLB_BATCH_NR];
+};
+
+void flush_tsb_kernel_range(unsigned long start, unsigned long end);
+void flush_tsb_user(struct tlb_batch *tb);
+void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
+			 unsigned int hugepage_shift);
+
+/* TLB flush operations. */
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long vmaddr)
+{
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end)
+{
+}
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+
+void flush_tlb_pending(void);
+void arch_enter_lazy_mmu_mode(void);
+void arch_leave_lazy_mmu_mode(void);
+#define arch_flush_lazy_mmu_mode()      do {} while (0)
+
+/* Local cpu only.  */
+void __flush_tlb_all(void);
+void __flush_tlb_page(unsigned long context, unsigned long vaddr);
+void __flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+#ifndef CONFIG_SMP
+
+static inline void global_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
+{
+	__flush_tlb_page(CTX_HWBITS(mm->context), vaddr);
+}
+
+#else /* CONFIG_SMP */
+
+void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end);
+void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr);
+
+#define global_flush_tlb_page(mm, vaddr) \
+	smp_flush_tlb_page(mm, vaddr)
+
+#endif /* ! CONFIG_SMP */
+
+#endif /* _SPARC64_TLBFLUSH_H */
diff --git a/arch/sparc/include/asm/topology.h b/arch/sparc/include/asm/topology.h
new file mode 100644
index 0000000..ba7b9d9
--- /dev/null
+++ b/arch/sparc/include/asm/topology.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_TOPOLOGY_H
+#define ___ASM_SPARC_TOPOLOGY_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/topology_64.h>
+#else
+#include <asm/topology_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/topology_32.h b/arch/sparc/include/asm/topology_32.h
new file mode 100644
index 0000000..66c4f9f
--- /dev/null
+++ b/arch/sparc/include/asm/topology_32.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC_TOPOLOGY_H
+#define _ASM_SPARC_TOPOLOGY_H
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_SPARC_TOPOLOGY_H */
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
new file mode 100644
index 0000000..34c628a
--- /dev/null
+++ b/arch/sparc/include/asm/topology_64.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC64_TOPOLOGY_H
+#define _ASM_SPARC64_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+#include <asm/mmzone.h>
+
+static inline int cpu_to_node(int cpu)
+{
+	return numa_cpu_lookup_table[cpu];
+}
+
+#define cpumask_of_node(node) ((node) == -1 ?				\
+			       cpu_all_mask :				\
+			       &numa_cpumask_lookup_table[node])
+
+struct pci_bus;
+#ifdef CONFIG_PCI
+int pcibus_to_node(struct pci_bus *pbus);
+#else
+static inline int pcibus_to_node(struct pci_bus *pbus)
+{
+	return -1;
+}
+#endif
+
+#define cpumask_of_pcibus(bus)	\
+	(pcibus_to_node(bus) == -1 ? \
+	 cpu_all_mask : \
+	 cpumask_of_node(pcibus_to_node(bus)))
+
+int __node_distance(int, int);
+#define node_distance(a, b) __node_distance(a, b)
+
+#else /* CONFIG_NUMA */
+
+#include <asm-generic/topology.h>
+
+#endif /* !(CONFIG_NUMA) */
+
+#ifdef CONFIG_SMP
+
+#include <asm/cpudata.h>
+
+#define topology_physical_package_id(cpu)	(cpu_data(cpu).proc_id)
+#define topology_core_id(cpu)			(cpu_data(cpu).core_id)
+#define topology_core_cpumask(cpu)		(&cpu_core_sib_map[cpu])
+#define topology_core_cache_cpumask(cpu)	(&cpu_core_sib_cache_map[cpu])
+#define topology_sibling_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
+#endif /* CONFIG_SMP */
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_cache_map[NR_CPUS];
+
+/**
+ * Return cores that shares the last level cache.
+ */
+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_core_sib_cache_map[cpu];
+}
+
+#endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/include/asm/trap_block.h b/arch/sparc/include/asm/trap_block.h
new file mode 100644
index 0000000..0f6d0c4
--- /dev/null
+++ b/arch/sparc/include/asm/trap_block.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_TRAP_BLOCK_H
+#define _SPARC_TRAP_BLOCK_H
+
+#include <asm/hypervisor.h>
+#include <asm/asi.h>
+
+#ifndef __ASSEMBLY__
+
+/* Trap handling code needs to get at a few critical values upon
+ * trap entry and to process TSB misses.  These cannot be in the
+ * per_cpu() area as we really need to lock them into the TLB and
+ * thus make them part of the main kernel image.  As a result we
+ * try to make this as small as possible.
+ *
+ * This is padded out and aligned to 64-bytes to avoid false sharing
+ * on SMP.
+ */
+
+/* If you modify the size of this structure, please update
+ * TRAP_BLOCK_SZ_SHIFT below.
+ */
+struct thread_info;
+struct trap_per_cpu {
+/* D-cache line 1: Basic thread information, cpu and device mondo queues */
+	struct thread_info	*thread;
+	unsigned long		pgd_paddr;
+	unsigned long		cpu_mondo_pa;
+	unsigned long		dev_mondo_pa;
+
+/* D-cache line 2: Error Mondo Queue and kernel buffer pointers */
+	unsigned long		resum_mondo_pa;
+	unsigned long		resum_kernel_buf_pa;
+	unsigned long		nonresum_mondo_pa;
+	unsigned long		nonresum_kernel_buf_pa;
+
+/* Dcache lines 3, 4, 5, and 6: Hypervisor Fault Status */
+	struct hv_fault_status	fault_info;
+
+/* Dcache line 7: Physical addresses of CPU send mondo block and CPU list.  */
+	unsigned long		cpu_mondo_block_pa;
+	unsigned long		cpu_list_pa;
+	unsigned long		tsb_huge;
+	unsigned long		tsb_huge_temp;
+
+/* Dcache line 8: IRQ work list, and keep trap_block a power-of-2 in size.  */
+	unsigned long		irq_worklist_pa;
+	unsigned int		cpu_mondo_qmask;
+	unsigned int		dev_mondo_qmask;
+	unsigned int		resum_qmask;
+	unsigned int		nonresum_qmask;
+	unsigned long		__per_cpu_base;
+} __attribute__((aligned(64)));
+extern struct trap_per_cpu trap_block[NR_CPUS];
+void init_cur_cpu_trap(struct thread_info *);
+void setup_tba(void);
+extern int ncpus_probed;
+extern u64 cpu_mondo_counter[NR_CPUS];
+
+unsigned long real_hard_smp_processor_id(void);
+
+struct cpuid_patch_entry {
+	unsigned int	addr;
+	unsigned int	cheetah_safari[4];
+	unsigned int	cheetah_jbus[4];
+	unsigned int	starfire[4];
+	unsigned int	sun4v[4];
+};
+extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
+
+struct sun4v_1insn_patch_entry {
+	unsigned int	addr;
+	unsigned int	insn;
+};
+extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
+	__sun4v_1insn_patch_end;
+extern struct sun4v_1insn_patch_entry __fast_win_ctrl_1insn_patch,
+	__fast_win_ctrl_1insn_patch_end;
+extern struct sun4v_1insn_patch_entry __sun_m7_1insn_patch,
+	__sun_m7_1insn_patch_end;
+
+struct sun4v_2insn_patch_entry {
+	unsigned int	addr;
+	unsigned int	insns[2];
+};
+extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
+	__sun4v_2insn_patch_end;
+extern struct sun4v_2insn_patch_entry __sun_m7_2insn_patch,
+	__sun_m7_2insn_patch_end;
+
+
+#endif /* !(__ASSEMBLY__) */
+
+#define TRAP_PER_CPU_THREAD		0x00
+#define TRAP_PER_CPU_PGD_PADDR		0x08
+#define TRAP_PER_CPU_CPU_MONDO_PA	0x10
+#define TRAP_PER_CPU_DEV_MONDO_PA	0x18
+#define TRAP_PER_CPU_RESUM_MONDO_PA	0x20
+#define TRAP_PER_CPU_RESUM_KBUF_PA	0x28
+#define TRAP_PER_CPU_NONRESUM_MONDO_PA	0x30
+#define TRAP_PER_CPU_NONRESUM_KBUF_PA	0x38
+#define TRAP_PER_CPU_FAULT_INFO		0x40
+#define TRAP_PER_CPU_CPU_MONDO_BLOCK_PA	0xc0
+#define TRAP_PER_CPU_CPU_LIST_PA	0xc8
+#define TRAP_PER_CPU_TSB_HUGE		0xd0
+#define TRAP_PER_CPU_TSB_HUGE_TEMP	0xd8
+#define TRAP_PER_CPU_IRQ_WORKLIST_PA	0xe0
+#define TRAP_PER_CPU_CPU_MONDO_QMASK	0xe8
+#define TRAP_PER_CPU_DEV_MONDO_QMASK	0xec
+#define TRAP_PER_CPU_RESUM_QMASK	0xf0
+#define TRAP_PER_CPU_NONRESUM_QMASK	0xf4
+#define TRAP_PER_CPU_PER_CPU_BASE	0xf8
+
+#define TRAP_BLOCK_SZ_SHIFT		8
+
+#include <asm/scratchpad.h>
+
+#define __GET_CPUID(REG)				\
+	/* Spitfire implementation (default). */	\
+661:	ldxa		[%g0] ASI_UPA_CONFIG, REG;	\
+	srlx		REG, 17, REG;			\
+	 and		REG, 0x1f, REG;			\
+	nop;						\
+	.section	.cpuid_patch, "ax";		\
+	/* Instruction location. */			\
+	.word		661b;				\
+	/* Cheetah Safari implementation. */		\
+	ldxa		[%g0] ASI_SAFARI_CONFIG, REG;	\
+	srlx		REG, 17, REG;			\
+	and		REG, 0x3ff, REG;		\
+	nop;						\
+	/* Cheetah JBUS implementation. */		\
+	ldxa		[%g0] ASI_JBUS_CONFIG, REG;	\
+	srlx		REG, 17, REG;			\
+	and		REG, 0x1f, REG;			\
+	nop;						\
+	/* Starfire implementation. */			\
+	sethi		%hi(0x1fff40000d0 >> 9), REG;	\
+	sllx		REG, 9, REG;			\
+	or		REG, 0xd0, REG;			\
+	lduwa		[REG] ASI_PHYS_BYPASS_EC_E, REG;\
+	/* sun4v implementation. */			\
+	mov		SCRATCHPAD_CPUID, REG;		\
+	ldxa		[REG] ASI_SCRATCHPAD, REG;	\
+	nop;						\
+	nop;						\
+	.previous;
+
+#ifdef CONFIG_SMP
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	__GET_CPUID(TMP)			\
+	sethi	%hi(trap_block), DEST;		\
+	sllx	TMP, TRAP_BLOCK_SZ_SHIFT, TMP;	\
+	or	DEST, %lo(trap_block), DEST;	\
+	add	DEST, TMP, DEST;		\
+
+/* Clobbers TMP, current address space PGD phys address into DEST.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
+#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)	\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	add	DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
+
+/* Clobbers TMP, loads DEST with current thread info pointer.  */
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* Given the current thread info pointer in THR, load the per-cpu
+ * area base of the current processor into DEST.  REG1, REG2, and REG3 are
+ * clobbered.
+ *
+ * You absolutely cannot use DEST as a temporary in this code.  The
+ * reason is that traps can happen during execution, and return from
+ * trap will load the fully resolved DEST per-cpu base.  This can corrupt
+ * the calculations done by the macro mid-stream.
+ */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)	\
+	lduh	[THR + TI_CPU], REG1;			\
+	sethi	%hi(trap_block), REG2;			\
+	sllx	REG1, TRAP_BLOCK_SZ_SHIFT, REG1;	\
+	or	REG2, %lo(trap_block), REG2;		\
+	add	REG2, REG1, REG2;			\
+	ldx	[REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST;
+
+#else
+
+#define TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	sethi	%hi(trap_block), DEST;		\
+	or	DEST, %lo(trap_block), DEST;	\
+
+/* Uniprocessor versions, we know the cpuid is zero.  */
+#define TRAP_LOAD_PGD_PHYS(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_PGD_PADDR], DEST;
+
+/* Clobbers TMP, loads local processor's IRQ work area into DEST.  */
+#define TRAP_LOAD_IRQ_WORK_PA(DEST, TMP)	\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	add	DEST, TRAP_PER_CPU_IRQ_WORKLIST_PA, DEST;
+
+#define TRAP_LOAD_THREAD_REG(DEST, TMP)		\
+	TRAP_LOAD_TRAP_BLOCK(DEST, TMP)		\
+	ldx	[DEST + TRAP_PER_CPU_THREAD], DEST;
+
+/* No per-cpu areas on uniprocessor, so no need to load DEST.  */
+#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3)
+
+#endif /* !(CONFIG_SMP) */
+
+#endif /* _SPARC_TRAP_BLOCK_H */
diff --git a/arch/sparc/include/asm/traps.h b/arch/sparc/include/asm/traps.h
new file mode 100644
index 0000000..2fba260
--- /dev/null
+++ b/arch/sparc/include/asm/traps.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * traps.h:  Format of entries for the Sparc trap table.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef _SPARC_TRAPS_H
+#define _SPARC_TRAPS_H
+
+#include <uapi/asm/traps.h>
+
+#ifndef __ASSEMBLY__
+/* This is for V8 compliant Sparc CPUS */
+struct tt_entry {
+	unsigned long inst_one;
+	unsigned long inst_two;
+	unsigned long inst_three;
+	unsigned long inst_four;
+};
+
+/* We set this to _start in system setup. */
+extern struct tt_entry *sparc_ttable;
+
+#endif /* !(__ASSEMBLY__) */
+#endif /* !(_SPARC_TRAPS_H) */
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
new file mode 100644
index 0000000..522a677
--- /dev/null
+++ b/arch/sparc/include/asm/tsb.h
@@ -0,0 +1,380 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_TSB_H
+#define _SPARC64_TSB_H
+
+/* The sparc64 TSB is similar to the powerpc hashtables.  It's a
+ * power-of-2 sized table of TAG/PTE pairs.  The cpu precomputes
+ * pointers into this table for 8K and 64K page sizes, and also a
+ * comparison TAG based upon the virtual address and context which
+ * faults.
+ *
+ * TLB miss trap handler software does the actual lookup via something
+ * of the form:
+ *
+ * 	ldxa		[%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1
+ * 	ldxa		[%g0] ASI_{D,I}MMU, %g6
+ *	sllx		%g6, 22, %g6
+ *	srlx		%g6, 22, %g6
+ * 	ldda		[%g1] ASI_NUCLEUS_QUAD_LDD, %g4
+ * 	cmp		%g4, %g6
+ * 	bne,pn	%xcc, tsb_miss_{d,i}tlb
+ * 	 mov		FAULT_CODE_{D,I}TLB, %g3
+ * 	stxa		%g5, [%g0] ASI_{D,I}TLB_DATA_IN
+ * 	retry
+ *
+ *
+ * Each 16-byte slot of the TSB is the 8-byte tag and then the 8-byte
+ * PTE.  The TAG is of the same layout as the TLB TAG TARGET mmu
+ * register which is:
+ *
+ * -------------------------------------------------
+ * |  -  |  CONTEXT |  -  |    VADDR bits 63:22    |
+ * -------------------------------------------------
+ *  63 61 60      48 47 42 41                     0
+ *
+ * But actually, since we use per-mm TSB's, we zero out the CONTEXT
+ * field.
+ *
+ * Like the powerpc hashtables we need to use locking in order to
+ * synchronize while we update the entries.  PTE updates need locking
+ * as well.
+ *
+ * We need to carefully choose a lock bits for the TSB entry.  We
+ * choose to use bit 47 in the tag.  Also, since we never map anything
+ * at page zero in context zero, we use zero as an invalid tag entry.
+ * When the lock bit is set, this forces a tag comparison failure.
+ */
+
+#define TSB_TAG_LOCK_BIT	47
+#define TSB_TAG_LOCK_HIGH	(1 << (TSB_TAG_LOCK_BIT - 32))
+
+#define TSB_TAG_INVALID_BIT	46
+#define TSB_TAG_INVALID_HIGH	(1 << (TSB_TAG_INVALID_BIT - 32))
+
+/* Some cpus support physical address quad loads.  We want to use
+ * those if possible so we don't need to hard-lock the TSB mapping
+ * into the TLB.  We encode some instruction patching in order to
+ * support this.
+ *
+ * The kernel TSB is locked into the TLB by virtue of being in the
+ * kernel image, so we don't play these games for swapper_tsb access.
+ */
+#ifndef __ASSEMBLY__
+struct tsb_ldquad_phys_patch_entry {
+	unsigned int	addr;
+	unsigned int	sun4u_insn;
+	unsigned int	sun4v_insn;
+};
+extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch,
+	__tsb_ldquad_phys_patch_end;
+
+struct tsb_phys_patch_entry {
+	unsigned int	addr;
+	unsigned int	insn;
+};
+extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
+#endif
+#define TSB_LOAD_QUAD(TSB, REG)	\
+661:	ldda		[TSB] ASI_NUCLEUS_QUAD_LDD, REG; \
+	.section	.tsb_ldquad_phys_patch, "ax"; \
+	.word		661b; \
+	ldda		[TSB] ASI_QUAD_LDD_PHYS, REG; \
+	ldda		[TSB] ASI_QUAD_LDD_PHYS_4V, REG; \
+	.previous
+
+#define TSB_LOAD_TAG_HIGH(TSB, REG) \
+661:	lduwa		[TSB] ASI_N, REG; \
+	.section	.tsb_phys_patch, "ax"; \
+	.word		661b; \
+	lduwa		[TSB] ASI_PHYS_USE_EC, REG; \
+	.previous
+
+#define TSB_LOAD_TAG(TSB, REG) \
+661:	ldxa		[TSB] ASI_N, REG; \
+	.section	.tsb_phys_patch, "ax"; \
+	.word		661b; \
+	ldxa		[TSB] ASI_PHYS_USE_EC, REG; \
+	.previous
+
+#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \
+661:	casa		[TSB] ASI_N, REG1, REG2; \
+	.section	.tsb_phys_patch, "ax"; \
+	.word		661b; \
+	casa		[TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+	.previous
+
+#define TSB_CAS_TAG(TSB, REG1, REG2) \
+661:	casxa		[TSB] ASI_N, REG1, REG2; \
+	.section	.tsb_phys_patch, "ax"; \
+	.word		661b; \
+	casxa		[TSB] ASI_PHYS_USE_EC, REG1, REG2; \
+	.previous
+
+#define TSB_STORE(ADDR, VAL) \
+661:	stxa		VAL, [ADDR] ASI_N; \
+	.section	.tsb_phys_patch, "ax"; \
+	.word		661b; \
+	stxa		VAL, [ADDR] ASI_PHYS_USE_EC; \
+	.previous
+
+#define TSB_LOCK_TAG(TSB, REG1, REG2)	\
+99:	TSB_LOAD_TAG_HIGH(TSB, REG1);	\
+	sethi	%hi(TSB_TAG_LOCK_HIGH), REG2;\
+	andcc	REG1, REG2, %g0;	\
+	bne,pn	%icc, 99b;		\
+	 nop;				\
+	TSB_CAS_TAG_HIGH(TSB, REG1, REG2);	\
+	cmp	REG1, REG2;		\
+	bne,pn	%icc, 99b;		\
+	 nop;				\
+
+#define TSB_WRITE(TSB, TTE, TAG) \
+	add	TSB, 0x8, TSB;   \
+	TSB_STORE(TSB, TTE);     \
+	sub	TSB, 0x8, TSB;   \
+	TSB_STORE(TSB, TAG);
+
+	/* Do a kernel page table walk.  Leaves valid PTE value in
+	 * REG1.  Jumps to FAIL_LABEL on early page table walk
+	 * termination.  VADDR will not be clobbered, but REG2 will.
+	 *
+	 * There are two masks we must apply to propagate bits from
+	 * the virtual address into the PTE physical address field
+	 * when dealing with huge pages.  This is because the page
+	 * table boundaries do not match the huge page size(s) the
+	 * hardware supports.
+	 *
+	 * In these cases we propagate the bits that are below the
+	 * page table level where we saw the huge page mapping, but
+	 * are still within the relevant physical bits for the huge
+	 * page size in question.  So for PMD mappings (which fall on
+	 * bit 23, for 8MB per PMD) we must propagate bit 22 for a
+	 * 4MB huge page.  For huge PUDs (which fall on bit 33, for
+	 * 8GB per PUD), we have to accommodate 256MB and 2GB huge
+	 * pages.  So for those we propagate bits 32 to 28.
+	 */
+#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL)	\
+	sethi		%hi(swapper_pg_dir), REG1; \
+	or		REG1, %lo(swapper_pg_dir), REG1; \
+	sllx		VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldx		[REG1 + REG2], REG1; \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	brz,pn		REG1, FAIL_LABEL; \
+	sethi		%uhi(_PAGE_PUD_HUGE), REG2; \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		REG2, 32, REG2; \
+	andcc		REG1, REG2, %g0; \
+	sethi		%hi(0xf8000000), REG2; \
+	bne,pt		%xcc, 697f; \
+	 sllx		REG2, 1, REG2; \
+	sllx		VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	sethi		%uhi(_PAGE_PMD_HUGE), REG2; \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		REG2, 32, REG2; \
+	andcc		REG1, REG2, %g0; \
+	be,pn		%xcc, 698f; \
+	 sethi		%hi(0x400000), REG2; \
+697:	brgez,pn	REG1, FAIL_LABEL; \
+	 andn		REG1, REG2, REG1; \
+	and		VADDR, REG2, REG2; \
+	ba,pt		%xcc, 699f; \
+	 or		REG1, REG2, REG1; \
+698:	sllx		VADDR, 64 - PMD_SHIFT, REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	brgez,pn	REG1, FAIL_LABEL; \
+	 nop; \
+699:
+
+	/* PUD has been loaded into REG1, interpret the value, seeing
+	 * if it is a HUGE PUD or a normal one.  If it is not valid
+	 * then jump to FAIL_LABEL.  If it is a HUGE PUD, and it
+	 * translates to a valid PTE, branch to PTE_LABEL.
+	 *
+	 * We have to propagate bits [32:22] from the virtual address
+	 * to resolve at 4M granularity.
+	 */
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+700:	ba 700f;					\
+	 nop;						\
+	.section	.pud_huge_patch, "ax";		\
+	.word		700b;				\
+	nop;						\
+	.previous;					\
+	brz,pn		REG1, FAIL_LABEL;		\
+	 sethi		%uhi(_PAGE_PUD_HUGE), REG2;	\
+	sllx		REG2, 32, REG2;			\
+	andcc		REG1, REG2, %g0;		\
+	be,pt		%xcc, 700f;			\
+	 sethi		%hi(0xffe00000), REG2;		\
+	sllx		REG2, 1, REG2;			\
+	brgez,pn	REG1, FAIL_LABEL;		\
+	 andn		REG1, REG2, REG1;		\
+	and		VADDR, REG2, REG2;		\
+	brlz,pt		REG1, PTE_LABEL;		\
+	 or		REG1, REG2, REG1;		\
+700:
+#else
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+	brz,pn		REG1, FAIL_LABEL; \
+	 nop;
+#endif
+
+	/* PMD has been loaded into REG1, interpret the value, seeing
+	 * if it is a HUGE PMD or a normal one.  If it is not valid
+	 * then jump to FAIL_LABEL.  If it is a HUGE PMD, and it
+	 * translates to a valid PTE, branch to PTE_LABEL.
+	 *
+	 * We have to propagate the 4MB bit of the virtual address
+	 * because we are fabricating 8MB pages using 4MB hw pages.
+	 */
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+	brz,pn		REG1, FAIL_LABEL;		\
+	 sethi		%uhi(_PAGE_PMD_HUGE), REG2;	\
+	sllx		REG2, 32, REG2;			\
+	andcc		REG1, REG2, %g0;		\
+	be,pt		%xcc, 700f;			\
+	 sethi		%hi(4 * 1024 * 1024), REG2;	\
+	brgez,pn	REG1, FAIL_LABEL;		\
+	 andn		REG1, REG2, REG1;		\
+	and		VADDR, REG2, REG2;		\
+	brlz,pt		REG1, PTE_LABEL;		\
+	 or		REG1, REG2, REG1;		\
+700:
+#else
+#define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+	brz,pn		REG1, FAIL_LABEL; \
+	 nop;
+#endif
+
+	/* Do a user page table walk in MMU globals.  Leaves final,
+	 * valid, PTE value in REG1.  Jumps to FAIL_LABEL on early
+	 * page table walk termination or if the PTE is not valid.
+	 *
+	 * Physical base of page tables is in PHYS_PGD which will not
+	 * be modified.
+	 *
+	 * VADDR will not be clobbered, but REG1 and REG2 will.
+	 */
+#define USER_PGTABLE_WALK_TL1(VADDR, PHYS_PGD, REG1, REG2, FAIL_LABEL)	\
+	sllx		VADDR, 64 - (PGDIR_SHIFT + PGDIR_BITS), REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
+	sllx		VADDR, 64 - PMD_SHIFT, REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	add		REG1, REG2, REG1; \
+	ldxa		[REG1] ASI_PHYS_USE_EC, REG1; \
+	brgez,pn	REG1, FAIL_LABEL; \
+	 nop; \
+800:
+
+/* Lookup a OBP mapping on VADDR in the prom_trans[] table at TL>0.
+ * If no entry is found, FAIL_LABEL will be branched to.  On success
+ * the resulting PTE value will be left in REG1.  VADDR is preserved
+ * by this routine.
+ */
+#define OBP_TRANS_LOOKUP(VADDR, REG1, REG2, REG3, FAIL_LABEL) \
+	sethi		%hi(prom_trans), REG1; \
+	or		REG1, %lo(prom_trans), REG1; \
+97:	ldx		[REG1 + 0x00], REG2; \
+	brz,pn		REG2, FAIL_LABEL; \
+	 nop; \
+	ldx		[REG1 + 0x08], REG3; \
+	add		REG2, REG3, REG3; \
+	cmp		REG2, VADDR; \
+	bgu,pt		%xcc, 98f; \
+	 cmp		VADDR, REG3; \
+	bgeu,pt		%xcc, 98f; \
+	 ldx		[REG1 + 0x10], REG3; \
+	sub		VADDR, REG2, REG2; \
+	ba,pt		%xcc, 99f; \
+	 add		REG3, REG2, REG1; \
+98:	ba,pt		%xcc, 97b; \
+	 add		REG1, (3 * 8), REG1; \
+99:
+
+	/* We use a 32K TSB for the whole kernel, this allows to
+	 * handle about 16MB of modules and vmalloc mappings without
+	 * incurring many hash conflicts.
+	 */
+#define KERNEL_TSB_SIZE_BYTES	(32 * 1024)
+#define KERNEL_TSB_NENTRIES	\
+	(KERNEL_TSB_SIZE_BYTES / 16)
+#define KERNEL_TSB4M_NENTRIES	4096
+
+	/* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
+	 * on TSB hit.  REG1, REG2, REG3, and REG4 are used as temporaries
+	 * and the found TTE will be left in REG1.  REG3 and REG4 must
+	 * be an even/odd pair of registers.
+	 *
+	 * VADDR and TAG will be preserved and not clobbered by this macro.
+	 */
+#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+661:	sethi		%uhi(swapper_tsb), REG1; \
+	sethi		%hi(swapper_tsb), REG2; \
+	or		REG1, %ulo(swapper_tsb), REG1; \
+	or		REG2, %lo(swapper_tsb), REG2; \
+	.section	.swapper_tsb_phys_patch, "ax"; \
+	.word		661b; \
+	.previous; \
+	sllx		REG1, 32, REG1; \
+	or		REG1, REG2, REG1; \
+	srlx		VADDR, PAGE_SHIFT, REG2; \
+	and		REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
+	sllx		REG2, 4, REG2; \
+	add		REG1, REG2, REG2; \
+	TSB_LOAD_QUAD(REG2, REG3); \
+	cmp		REG3, TAG; \
+	be,a,pt		%xcc, OK_LABEL; \
+	 mov		REG4, REG1;
+
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	/* This version uses a trick, the TAG is already (VADDR >> 22) so
+	 * we can make use of that for the index computation.
+	 */
+#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
+661:	sethi		%uhi(swapper_4m_tsb), REG1; \
+	sethi		%hi(swapper_4m_tsb), REG2; \
+	or		REG1, %ulo(swapper_4m_tsb), REG1; \
+	or		REG2, %lo(swapper_4m_tsb), REG2; \
+	.section	.swapper_4m_tsb_phys_patch, "ax"; \
+	.word		661b; \
+	.previous; \
+	sllx		REG1, 32, REG1; \
+	or		REG1, REG2, REG1; \
+	and		TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
+	sllx		REG2, 4, REG2; \
+	add		REG1, REG2, REG2; \
+	TSB_LOAD_QUAD(REG2, REG3); \
+	cmp		REG3, TAG; \
+	be,a,pt		%xcc, OK_LABEL; \
+	 mov		REG4, REG1;
+#endif
+
+#endif /* !(_SPARC64_TSB_H) */
diff --git a/arch/sparc/include/asm/tsunami.h b/arch/sparc/include/asm/tsunami.h
new file mode 100644
index 0000000..acaf014
--- /dev/null
+++ b/arch/sparc/include/asm/tsunami.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tsunami.h:  Module specific definitions for Tsunami V8 Sparcs
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_TSUNAMI_H
+#define _SPARC_TSUNAMI_H
+
+#include <asm/asi.h>
+
+/* The MMU control register on the Tsunami:
+ *
+ * -----------------------------------------------------------------------
+ * | implvers |SW|AV|DV|MV| RSV |PC|ITD|ALC| RSV |PE| RC |IE|DE|RSV|NF|ME|
+ * -----------------------------------------------------------------------
+ *  31      24 23 22 21 20 19-18 17  16 14  13-12 11 10-9  8  7 6-2  1  0
+ *
+ * SW: Enable Software Table Walks  0=off 1=on
+ * AV: Address View bit
+ * DV: Data View bit
+ * MV: Memory View bit
+ * PC: Parity Control
+ * ITD: ITBR disable
+ * ALC: Alternate Cacheable
+ * PE: Parity Enable   0=off 1=on
+ * RC: Refresh Control
+ * IE: Instruction cache Enable  0=off 1=on
+ * DE: Data cache Enable  0=off 1=on
+ * NF: No Fault, same as all other SRMMUs
+ * ME: MMU Enable, same as all other SRMMUs
+ */
+
+#define TSUNAMI_SW        0x00800000
+#define TSUNAMI_AV        0x00400000
+#define TSUNAMI_DV        0x00200000
+#define TSUNAMI_MV        0x00100000
+#define TSUNAMI_PC        0x00020000
+#define TSUNAMI_ITD       0x00010000
+#define TSUNAMI_ALC       0x00008000
+#define TSUNAMI_PE        0x00001000
+#define TSUNAMI_RCMASK    0x00000C00
+#define TSUNAMI_IENAB     0x00000200
+#define TSUNAMI_DENAB     0x00000100
+#define TSUNAMI_NF        0x00000002
+#define TSUNAMI_ME        0x00000001
+
+static inline void tsunami_flush_icache(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t"
+			     : /* no outputs */
+			     : "i" (ASI_M_IC_FLCLEAR)
+			     : "memory");
+}
+
+static inline void tsunami_flush_dcache(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t"
+			     : /* no outputs */
+			     : "i" (ASI_M_DC_FLCLEAR)
+			     : "memory");
+}
+
+#endif /* !(_SPARC_TSUNAMI_H) */
diff --git a/arch/sparc/include/asm/ttable.h b/arch/sparc/include/asm/ttable.h
new file mode 100644
index 0000000..8f64694
--- /dev/null
+++ b/arch/sparc/include/asm/ttable.h
@@ -0,0 +1,694 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_TTABLE_H
+#define _SPARC64_TTABLE_H
+
+#include <asm/utrap.h>
+#include <asm/pil.h>
+
+#ifdef __ASSEMBLY__
+#include <asm/thread_info.h>
+#endif
+
+#define BOOT_KERNEL b sparc64_boot; nop; nop; nop; nop; nop; nop; nop;
+
+/* We need a "cleaned" instruction... */
+#define CLEAN_WINDOW							\
+	rdpr	%cleanwin, %l0;		add	%l0, 1, %l0;		\
+	wrpr	%l0, 0x0, %cleanwin;					\
+	clr	%o0;	clr	%o1;	clr	%o2;	clr	%o3;	\
+	clr	%o4;	clr	%o5;	clr	%o6;	clr	%o7;	\
+	clr	%l0;	clr	%l1;	clr	%l2;	clr	%l3;	\
+	clr	%l4;	clr	%l5;	clr	%l6;	clr	%l7;	\
+	retry;								\
+	nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;
+
+#define TRAP(routine)					\
+	sethi	%hi(109f), %g7;				\
+	ba,pt	%xcc, etrap;				\
+109:	 or	%g7, %lo(109b), %g7;			\
+	call	routine;				\
+	 add	%sp, PTREGS_OFF, %o0;			\
+	ba,pt	%xcc, rtrap;				\
+	 nop;						\
+	nop;
+
+#define TRAP_7INSNS(routine)				\
+	sethi	%hi(109f), %g7;				\
+	ba,pt	%xcc, etrap;				\
+109:	 or	%g7, %lo(109b), %g7;			\
+	call	routine;				\
+	 add	%sp, PTREGS_OFF, %o0;			\
+	ba,pt	%xcc, rtrap;				\
+	 nop;
+
+#define TRAP_SAVEFPU(routine)				\
+	sethi	%hi(109f), %g7;				\
+	ba,pt	%xcc, do_fptrap;			\
+109:	 or	%g7, %lo(109b), %g7;			\
+	call	routine;				\
+	 add	%sp, PTREGS_OFF, %o0;			\
+	ba,pt	%xcc, rtrap;				\
+	 nop;						\
+	nop;
+
+#define TRAP_NOSAVE(routine)				\
+	ba,pt	%xcc, routine;				\
+	 nop;						\
+	nop; nop; nop; nop; nop; nop;
+
+#define TRAP_NOSAVE_7INSNS(routine)			\
+	ba,pt	%xcc, routine;				\
+	 nop;						\
+	nop; nop; nop; nop; nop;
+
+#define TRAPTL1(routine)				\
+	sethi	%hi(109f), %g7;				\
+	ba,pt	%xcc, etraptl1;				\
+109:	 or	%g7, %lo(109b), %g7;			\
+	call	routine;				\
+	 add	%sp, PTREGS_OFF, %o0;			\
+	ba,pt	%xcc, rtrap;				\
+	 nop;						\
+	nop;
+
+#define TRAP_ARG(routine, arg)				\
+	sethi	%hi(109f), %g7;				\
+	ba,pt	%xcc, etrap;				\
+109:	 or	%g7, %lo(109b), %g7;			\
+	add	%sp, PTREGS_OFF, %o0;			\
+	call	routine;				\
+	 mov	arg, %o1;				\
+	ba,pt	%xcc, rtrap;				\
+	 nop;
+
+#define TRAPTL1_ARG(routine, arg)			\
+	sethi	%hi(109f), %g7;				\
+	ba,pt	%xcc, etraptl1;				\
+109:	 or	%g7, %lo(109b), %g7;			\
+	add	%sp, PTREGS_OFF, %o0;			\
+	call	routine;				\
+	 mov	arg, %o1;				\
+	ba,pt	%xcc, rtrap;				\
+	 nop;
+
+#define SYSCALL_TRAP(routine, systbl)			\
+	rdpr	%pil, %g2;				\
+	mov	TSTATE_SYSCALL, %g3;			\
+	sethi	%hi(109f), %g7;				\
+	ba,pt	%xcc, etrap_syscall;			\
+109:	 or	%g7, %lo(109b), %g7;			\
+	sethi	%hi(systbl), %l7;			\
+	ba,pt	%xcc, routine;				\
+	 or	%l7, %lo(systbl), %l7;
+
+#define TRAP_UTRAP(handler,lvl)				\
+	mov	handler, %g3;				\
+	ba,pt	%xcc, utrap_trap;			\
+	 mov	lvl, %g4;				\
+	nop;						\
+	nop;						\
+	nop;						\
+	nop;						\
+	nop;
+
+#ifdef CONFIG_COMPAT
+#define	LINUX_32BIT_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sys_call_table32)
+#else
+#define	LINUX_32BIT_SYSCALL_TRAP BTRAP(0x110)
+#endif
+#define LINUX_64BIT_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall, sys_call_table64)
+#define GETCC_TRAP TRAP(getcc)
+#define SETCC_TRAP TRAP(setcc)
+#define BREAKPOINT_TRAP TRAP(breakpoint_trap)
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+#define TRAP_IRQ(routine, level)			\
+	rdpr	%pil, %g2;				\
+	wrpr	%g0, PIL_NORMAL_MAX, %pil;		\
+	sethi	%hi(1f-4), %g7;				\
+	ba,pt	%xcc, etrap_irq;			\
+	 or	%g7, %lo(1f-4), %g7;			\
+	nop;						\
+	nop;						\
+	nop;						\
+	.subsection	2;				\
+1:	call	trace_hardirqs_off;			\
+	 nop;						\
+	mov	level, %o0;				\
+	call	routine;				\
+	 add	%sp, PTREGS_OFF, %o1;			\
+	ba,a,pt	%xcc, rtrap_irq;			\
+	.previous;
+
+#else
+
+#define TRAP_IRQ(routine, level)			\
+	rdpr	%pil, %g2;				\
+	wrpr	%g0, PIL_NORMAL_MAX, %pil;		\
+	ba,pt	%xcc, etrap_irq;			\
+	 rd	%pc, %g7;				\
+	mov	level, %o0;				\
+	call	routine;				\
+	 add	%sp, PTREGS_OFF, %o1;			\
+	ba,a,pt	%xcc, rtrap_irq;
+
+#endif
+
+#define TRAP_NMI_IRQ(routine, level)			\
+	rdpr	%pil, %g2;				\
+	wrpr	%g0, PIL_NMI, %pil;			\
+	ba,pt	%xcc, etrap_irq;			\
+	 rd	%pc, %g7;				\
+	mov	level, %o0;				\
+	call	routine;				\
+	 add	%sp, PTREGS_OFF, %o1;			\
+	ba,a,pt	%xcc, rtrap_nmi;
+
+#define TRAP_IVEC TRAP_NOSAVE(do_ivec)
+
+#define BTRAP(lvl) TRAP_ARG(bad_trap, lvl)
+
+#define BTRAPTL1(lvl) TRAPTL1_ARG(bad_trap_tl1, lvl)
+
+#define FLUSH_WINDOW_TRAP						\
+	ba,pt	%xcc, etrap;						\
+	 rd	%pc, %g7;						\
+	flushw;								\
+	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1;			\
+	add	%l1, 4, %l2;						\
+	stx	%l1, [%sp + PTREGS_OFF + PT_V9_TPC];			\
+	ba,pt	%xcc, rtrap;						\
+	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC];
+
+#ifdef CONFIG_KPROBES
+#define KPROBES_TRAP(lvl) TRAP_IRQ(kprobe_trap, lvl)
+#else
+#define KPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
+#endif
+
+#ifdef CONFIG_UPROBES
+#define UPROBES_TRAP(lvl) TRAP_ARG(uprobe_trap, lvl)
+#else
+#define UPROBES_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
+#endif
+
+#ifdef CONFIG_KGDB
+#define KGDB_TRAP(lvl) TRAP_IRQ(kgdb_trap, lvl)
+#else
+#define KGDB_TRAP(lvl) TRAP_ARG(bad_trap, lvl)
+#endif
+
+#define SUN4V_ITSB_MISS					\
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2;		\
+	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4;	\
+	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5;	\
+	srlx	%g4, 22, %g6;				\
+	ba,pt	%xcc, sun4v_itsb_miss;			\
+	 nop;						\
+	nop;						\
+	nop;
+
+#define SUN4V_DTSB_MISS					\
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2;		\
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4;	\
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5;	\
+	srlx	%g4, 22, %g6;				\
+	ba,pt	%xcc, sun4v_dtsb_miss;			\
+	 nop;						\
+	nop;						\
+	nop;
+
+#define SUN4V_MCD_PRECISE				\
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2;		\
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4;	\
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5;	\
+	ba,pt	%xcc, etrap;				\
+	 rd	%pc, %g7;				\
+	ba,pt	%xcc, sun4v_mcd_detect_precise;		\
+	 nop;						\
+	nop;
+
+/* Before touching these macros, you owe it to yourself to go and
+ * see how arch/sparc64/kernel/winfixup.S works... -DaveM
+ *
+ * For the user cases we used to use the %asi register, but
+ * it turns out that the "wr xxx, %asi" costs ~5 cycles, so
+ * now we use immediate ASI loads and stores instead.  Kudos
+ * to Greg Onufer for pointing out this performance anomaly.
+ *
+ * Further note that we cannot use the g2, g4, g5, and g7 alternate
+ * globals in the spill routines, check out the save instruction in
+ * arch/sparc64/kernel/etrap.S to see what I mean about g2, and
+ * g4/g5 are the globals which are preserved by etrap processing
+ * for the caller of it.  The g7 register is the return pc for
+ * etrap.  Finally, g6 is the current thread register so we cannot
+ * us it in the spill handlers either.  Most of these rules do not
+ * apply to fill processing, only g6 is not usable.
+ */
+
+/* Normal kernel spill */
+#define SPILL_0_NORMAL					\
+	stx	%l0, [%sp + STACK_BIAS + 0x00];		\
+	stx	%l1, [%sp + STACK_BIAS + 0x08];		\
+	stx	%l2, [%sp + STACK_BIAS + 0x10];		\
+	stx	%l3, [%sp + STACK_BIAS + 0x18];		\
+	stx	%l4, [%sp + STACK_BIAS + 0x20];		\
+	stx	%l5, [%sp + STACK_BIAS + 0x28];		\
+	stx	%l6, [%sp + STACK_BIAS + 0x30];		\
+	stx	%l7, [%sp + STACK_BIAS + 0x38];		\
+	stx	%i0, [%sp + STACK_BIAS + 0x40];		\
+	stx	%i1, [%sp + STACK_BIAS + 0x48];		\
+	stx	%i2, [%sp + STACK_BIAS + 0x50];		\
+	stx	%i3, [%sp + STACK_BIAS + 0x58];		\
+	stx	%i4, [%sp + STACK_BIAS + 0x60];		\
+	stx	%i5, [%sp + STACK_BIAS + 0x68];		\
+	stx	%i6, [%sp + STACK_BIAS + 0x70];		\
+	stx	%i7, [%sp + STACK_BIAS + 0x78];		\
+	saved; retry; nop; nop; nop; nop; nop; nop;	\
+	nop; nop; nop; nop; nop; nop; nop; nop;
+
+#define SPILL_0_NORMAL_ETRAP				\
+etrap_kernel_spill:					\
+	stx	%l0, [%sp + STACK_BIAS + 0x00];		\
+	stx	%l1, [%sp + STACK_BIAS + 0x08];		\
+	stx	%l2, [%sp + STACK_BIAS + 0x10];		\
+	stx	%l3, [%sp + STACK_BIAS + 0x18];		\
+	stx	%l4, [%sp + STACK_BIAS + 0x20];		\
+	stx	%l5, [%sp + STACK_BIAS + 0x28];		\
+	stx	%l6, [%sp + STACK_BIAS + 0x30];		\
+	stx	%l7, [%sp + STACK_BIAS + 0x38];		\
+	stx	%i0, [%sp + STACK_BIAS + 0x40];		\
+	stx	%i1, [%sp + STACK_BIAS + 0x48];		\
+	stx	%i2, [%sp + STACK_BIAS + 0x50];		\
+	stx	%i3, [%sp + STACK_BIAS + 0x58];		\
+	stx	%i4, [%sp + STACK_BIAS + 0x60];		\
+	stx	%i5, [%sp + STACK_BIAS + 0x68];		\
+	stx	%i6, [%sp + STACK_BIAS + 0x70];		\
+	stx	%i7, [%sp + STACK_BIAS + 0x78];		\
+	saved;						\
+	sub	%g1, 2, %g1;				\
+	ba,pt	%xcc, etrap_save;			\
+	wrpr	%g1, %cwp;				\
+	nop; nop; nop; nop; nop; nop; nop; nop;		\
+	nop; nop; nop; nop;
+
+/* Normal 64bit spill */
+#define SPILL_1_GENERIC(ASI)				\
+	add	%sp, STACK_BIAS + 0x00, %g1;		\
+	stxa	%l0, [%g1 + %g0] ASI;			\
+	mov	0x08, %g3;				\
+	stxa	%l1, [%g1 + %g3] ASI;			\
+	add	%g1, 0x10, %g1;				\
+	stxa	%l2, [%g1 + %g0] ASI;			\
+	stxa	%l3, [%g1 + %g3] ASI;			\
+	add	%g1, 0x10, %g1;				\
+	stxa	%l4, [%g1 + %g0] ASI;			\
+	stxa	%l5, [%g1 + %g3] ASI;			\
+	add	%g1, 0x10, %g1;				\
+	stxa	%l6, [%g1 + %g0] ASI;			\
+	stxa	%l7, [%g1 + %g3] ASI;			\
+	add	%g1, 0x10, %g1;				\
+	stxa	%i0, [%g1 + %g0] ASI;			\
+	stxa	%i1, [%g1 + %g3] ASI;			\
+	add	%g1, 0x10, %g1;				\
+	stxa	%i2, [%g1 + %g0] ASI;			\
+	stxa	%i3, [%g1 + %g3] ASI;			\
+	add	%g1, 0x10, %g1;				\
+	stxa	%i4, [%g1 + %g0] ASI;			\
+	stxa	%i5, [%g1 + %g3] ASI;			\
+	add	%g1, 0x10, %g1;				\
+	stxa	%i6, [%g1 + %g0] ASI;			\
+	stxa	%i7, [%g1 + %g3] ASI;			\
+	saved;						\
+	retry; nop; nop;				\
+	b,a,pt	%xcc, spill_fixup_dax;			\
+	b,a,pt	%xcc, spill_fixup_mna;			\
+	b,a,pt	%xcc, spill_fixup;
+
+#define SPILL_1_GENERIC_ETRAP				\
+etrap_user_spill_64bit:					\
+	stxa	%l0, [%sp + STACK_BIAS + 0x00] %asi;	\
+	stxa	%l1, [%sp + STACK_BIAS + 0x08] %asi;	\
+	stxa	%l2, [%sp + STACK_BIAS + 0x10] %asi;	\
+	stxa	%l3, [%sp + STACK_BIAS + 0x18] %asi;	\
+	stxa	%l4, [%sp + STACK_BIAS + 0x20] %asi;	\
+	stxa	%l5, [%sp + STACK_BIAS + 0x28] %asi;	\
+	stxa	%l6, [%sp + STACK_BIAS + 0x30] %asi;	\
+	stxa	%l7, [%sp + STACK_BIAS + 0x38] %asi;	\
+	stxa	%i0, [%sp + STACK_BIAS + 0x40] %asi;	\
+	stxa	%i1, [%sp + STACK_BIAS + 0x48] %asi;	\
+	stxa	%i2, [%sp + STACK_BIAS + 0x50] %asi;	\
+	stxa	%i3, [%sp + STACK_BIAS + 0x58] %asi;	\
+	stxa	%i4, [%sp + STACK_BIAS + 0x60] %asi;	\
+	stxa	%i5, [%sp + STACK_BIAS + 0x68] %asi;	\
+	stxa	%i6, [%sp + STACK_BIAS + 0x70] %asi;	\
+	stxa	%i7, [%sp + STACK_BIAS + 0x78] %asi;	\
+	saved;						\
+	sub	%g1, 2, %g1;				\
+	ba,pt	%xcc, etrap_save;			\
+	 wrpr	%g1, %cwp;				\
+	nop; nop; nop; nop; nop;			\
+	nop; nop; nop; nop;				\
+	ba,a,pt	%xcc, etrap_spill_fixup_64bit;		\
+	ba,a,pt	%xcc, etrap_spill_fixup_64bit;		\
+	ba,a,pt	%xcc, etrap_spill_fixup_64bit;
+
+#define SPILL_1_GENERIC_ETRAP_FIXUP			\
+etrap_spill_fixup_64bit:				\
+	ldub	[%g6 + TI_WSAVED], %g1;			\
+	sll	%g1, 3, %g3;				\
+	add	%g6, %g3, %g3;				\
+	stx	%sp, [%g3 + TI_RWIN_SPTRS];		\
+	sll	%g1, 7, %g3;				\
+	add	%g6, %g3, %g3;				\
+	stx	%l0, [%g3 + TI_REG_WINDOW + 0x00];	\
+	stx	%l1, [%g3 + TI_REG_WINDOW + 0x08];	\
+	stx	%l2, [%g3 + TI_REG_WINDOW + 0x10];	\
+	stx	%l3, [%g3 + TI_REG_WINDOW + 0x18];	\
+	stx	%l4, [%g3 + TI_REG_WINDOW + 0x20];	\
+	stx	%l5, [%g3 + TI_REG_WINDOW + 0x28];	\
+	stx	%l6, [%g3 + TI_REG_WINDOW + 0x30];	\
+	stx	%l7, [%g3 + TI_REG_WINDOW + 0x38];	\
+	stx	%i0, [%g3 + TI_REG_WINDOW + 0x40];	\
+	stx	%i1, [%g3 + TI_REG_WINDOW + 0x48];	\
+	stx	%i2, [%g3 + TI_REG_WINDOW + 0x50];	\
+	stx	%i3, [%g3 + TI_REG_WINDOW + 0x58];	\
+	stx	%i4, [%g3 + TI_REG_WINDOW + 0x60];	\
+	stx	%i5, [%g3 + TI_REG_WINDOW + 0x68];	\
+	stx	%i6, [%g3 + TI_REG_WINDOW + 0x70];	\
+	stx	%i7, [%g3 + TI_REG_WINDOW + 0x78];	\
+	add	%g1, 1, %g1;				\
+	stb	%g1, [%g6 + TI_WSAVED];			\
+	saved;						\
+	rdpr	%cwp, %g1;				\
+	sub	%g1, 2, %g1;				\
+	ba,pt	%xcc, etrap_save;			\
+	 wrpr	%g1, %cwp;				\
+	nop; nop; nop
+
+/* Normal 32bit spill */
+#define SPILL_2_GENERIC(ASI)				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, (. - (128 + 4));			\
+	 srl	%sp, 0, %sp;				\
+	stwa	%l0, [%sp + %g0] ASI;			\
+	mov	0x04, %g3;				\
+	stwa	%l1, [%sp + %g3] ASI;			\
+	add	%sp, 0x08, %g1;				\
+	stwa	%l2, [%g1 + %g0] ASI;			\
+	stwa	%l3, [%g1 + %g3] ASI;			\
+	add	%g1, 0x08, %g1;				\
+	stwa	%l4, [%g1 + %g0] ASI;			\
+	stwa	%l5, [%g1 + %g3] ASI;			\
+	add	%g1, 0x08, %g1;				\
+	stwa	%l6, [%g1 + %g0] ASI;			\
+	stwa	%l7, [%g1 + %g3] ASI;			\
+	add	%g1, 0x08, %g1;				\
+	stwa	%i0, [%g1 + %g0] ASI;			\
+	stwa	%i1, [%g1 + %g3] ASI;			\
+	add	%g1, 0x08, %g1;				\
+	stwa	%i2, [%g1 + %g0] ASI;			\
+	stwa	%i3, [%g1 + %g3] ASI;			\
+	add	%g1, 0x08, %g1;				\
+	stwa	%i4, [%g1 + %g0] ASI;			\
+	stwa	%i5, [%g1 + %g3] ASI;			\
+	add	%g1, 0x08, %g1;				\
+	stwa	%i6, [%g1 + %g0] ASI;			\
+	stwa	%i7, [%g1 + %g3] ASI;			\
+	saved;						\
+        retry;						\
+	b,a,pt	%xcc, spill_fixup_dax;			\
+	b,a,pt	%xcc, spill_fixup_mna;			\
+	b,a,pt	%xcc, spill_fixup;
+
+#define SPILL_2_GENERIC_ETRAP		\
+etrap_user_spill_32bit:			\
+	and	%sp, 1, %g3;		\
+	brnz,pn	%g3, etrap_user_spill_64bit;	\
+	 srl	%sp, 0, %sp;		\
+	stwa	%l0, [%sp + 0x00] %asi;	\
+	stwa	%l1, [%sp + 0x04] %asi;	\
+	stwa	%l2, [%sp + 0x08] %asi;	\
+	stwa	%l3, [%sp + 0x0c] %asi;	\
+	stwa	%l4, [%sp + 0x10] %asi;	\
+	stwa	%l5, [%sp + 0x14] %asi;	\
+	stwa	%l6, [%sp + 0x18] %asi;	\
+	stwa	%l7, [%sp + 0x1c] %asi;	\
+	stwa	%i0, [%sp + 0x20] %asi;	\
+	stwa	%i1, [%sp + 0x24] %asi;	\
+	stwa	%i2, [%sp + 0x28] %asi;	\
+	stwa	%i3, [%sp + 0x2c] %asi;	\
+	stwa	%i4, [%sp + 0x30] %asi;	\
+	stwa	%i5, [%sp + 0x34] %asi;	\
+	stwa	%i6, [%sp + 0x38] %asi;	\
+	stwa	%i7, [%sp + 0x3c] %asi;	\
+	saved;				\
+	sub	%g1, 2, %g1;		\
+	ba,pt	%xcc, etrap_save;	\
+	 wrpr	%g1, %cwp;		\
+	nop; nop; nop; nop;		\
+	nop; nop;			\
+	ba,a,pt	%xcc, etrap_spill_fixup_32bit; \
+	ba,a,pt	%xcc, etrap_spill_fixup_32bit; \
+	ba,a,pt	%xcc, etrap_spill_fixup_32bit;
+
+#define SPILL_2_GENERIC_ETRAP_FIXUP			\
+etrap_spill_fixup_32bit:				\
+	ldub	[%g6 + TI_WSAVED], %g1;			\
+	sll	%g1, 3, %g3;				\
+	add	%g6, %g3, %g3;				\
+	stx	%sp, [%g3 + TI_RWIN_SPTRS];		\
+	sll	%g1, 7, %g3;				\
+	add	%g6, %g3, %g3;				\
+	stw	%l0, [%g3 + TI_REG_WINDOW + 0x00];	\
+	stw	%l1, [%g3 + TI_REG_WINDOW + 0x04];	\
+	stw	%l2, [%g3 + TI_REG_WINDOW + 0x08];	\
+	stw	%l3, [%g3 + TI_REG_WINDOW + 0x0c];	\
+	stw	%l4, [%g3 + TI_REG_WINDOW + 0x10];	\
+	stw	%l5, [%g3 + TI_REG_WINDOW + 0x14];	\
+	stw	%l6, [%g3 + TI_REG_WINDOW + 0x18];	\
+	stw	%l7, [%g3 + TI_REG_WINDOW + 0x1c];	\
+	stw	%i0, [%g3 + TI_REG_WINDOW + 0x20];	\
+	stw	%i1, [%g3 + TI_REG_WINDOW + 0x24];	\
+	stw	%i2, [%g3 + TI_REG_WINDOW + 0x28];	\
+	stw	%i3, [%g3 + TI_REG_WINDOW + 0x2c];	\
+	stw	%i4, [%g3 + TI_REG_WINDOW + 0x30];	\
+	stw	%i5, [%g3 + TI_REG_WINDOW + 0x34];	\
+	stw	%i6, [%g3 + TI_REG_WINDOW + 0x38];	\
+	stw	%i7, [%g3 + TI_REG_WINDOW + 0x3c];	\
+	add	%g1, 1, %g1;				\
+	stb	%g1, [%g6 + TI_WSAVED];			\
+	saved;						\
+	rdpr	%cwp, %g1;				\
+	sub	%g1, 2, %g1;				\
+	ba,pt	%xcc, etrap_save;			\
+	 wrpr	%g1, %cwp;				\
+	nop; nop; nop
+
+#define SPILL_1_NORMAL SPILL_1_GENERIC(ASI_AIUP)
+#define SPILL_2_NORMAL SPILL_2_GENERIC(ASI_AIUP)
+#define SPILL_3_NORMAL SPILL_0_NORMAL
+#define SPILL_4_NORMAL SPILL_0_NORMAL
+#define SPILL_5_NORMAL SPILL_0_NORMAL
+#define SPILL_6_NORMAL SPILL_0_NORMAL
+#define SPILL_7_NORMAL SPILL_0_NORMAL
+
+#define SPILL_0_OTHER SPILL_0_NORMAL
+#define SPILL_1_OTHER SPILL_1_GENERIC(ASI_AIUS)
+#define SPILL_2_OTHER SPILL_2_GENERIC(ASI_AIUS)
+#define SPILL_3_OTHER SPILL_3_NORMAL
+#define SPILL_4_OTHER SPILL_4_NORMAL
+#define SPILL_5_OTHER SPILL_5_NORMAL
+#define SPILL_6_OTHER SPILL_6_NORMAL
+#define SPILL_7_OTHER SPILL_7_NORMAL
+
+/* Normal kernel fill */
+#define FILL_0_NORMAL					\
+	ldx	[%sp + STACK_BIAS + 0x00], %l0;		\
+	ldx	[%sp + STACK_BIAS + 0x08], %l1;		\
+	ldx	[%sp + STACK_BIAS + 0x10], %l2;		\
+	ldx	[%sp + STACK_BIAS + 0x18], %l3;		\
+	ldx	[%sp + STACK_BIAS + 0x20], %l4;		\
+	ldx	[%sp + STACK_BIAS + 0x28], %l5;		\
+	ldx	[%sp + STACK_BIAS + 0x30], %l6;		\
+	ldx	[%sp + STACK_BIAS + 0x38], %l7;		\
+	ldx	[%sp + STACK_BIAS + 0x40], %i0;		\
+	ldx	[%sp + STACK_BIAS + 0x48], %i1;		\
+	ldx	[%sp + STACK_BIAS + 0x50], %i2;		\
+	ldx	[%sp + STACK_BIAS + 0x58], %i3;		\
+	ldx	[%sp + STACK_BIAS + 0x60], %i4;		\
+	ldx	[%sp + STACK_BIAS + 0x68], %i5;		\
+	ldx	[%sp + STACK_BIAS + 0x70], %i6;		\
+	ldx	[%sp + STACK_BIAS + 0x78], %i7;		\
+	restored; retry; nop; nop; nop; nop; nop; nop;	\
+	nop; nop; nop; nop; nop; nop; nop; nop;
+
+#define FILL_0_NORMAL_RTRAP				\
+kern_rtt_fill:						\
+	rdpr	%cwp, %g1;				\
+	sub	%g1, 1, %g1;				\
+	wrpr	%g1, %cwp;				\
+	ldx	[%sp + STACK_BIAS + 0x00], %l0;		\
+	ldx	[%sp + STACK_BIAS + 0x08], %l1;		\
+	ldx	[%sp + STACK_BIAS + 0x10], %l2;		\
+	ldx	[%sp + STACK_BIAS + 0x18], %l3;		\
+	ldx	[%sp + STACK_BIAS + 0x20], %l4;		\
+	ldx	[%sp + STACK_BIAS + 0x28], %l5;		\
+	ldx	[%sp + STACK_BIAS + 0x30], %l6;		\
+	ldx	[%sp + STACK_BIAS + 0x38], %l7;		\
+	ldx	[%sp + STACK_BIAS + 0x40], %i0;		\
+	ldx	[%sp + STACK_BIAS + 0x48], %i1;		\
+	ldx	[%sp + STACK_BIAS + 0x50], %i2;		\
+	ldx	[%sp + STACK_BIAS + 0x58], %i3;		\
+	ldx	[%sp + STACK_BIAS + 0x60], %i4;		\
+	ldx	[%sp + STACK_BIAS + 0x68], %i5;		\
+	ldx	[%sp + STACK_BIAS + 0x70], %i6;		\
+	ldx	[%sp + STACK_BIAS + 0x78], %i7;		\
+	restored;					\
+	add	%g1, 1, %g1;				\
+	ba,pt	%xcc, kern_rtt_restore;			\
+	 wrpr	%g1, %cwp;				\
+	nop; nop; nop; nop; nop;			\
+	nop; nop; nop; nop;
+
+
+/* Normal 64bit fill */
+#define FILL_1_GENERIC(ASI)				\
+	add	%sp, STACK_BIAS + 0x00, %g1;		\
+	ldxa	[%g1 + %g0] ASI, %l0;			\
+	mov	0x08, %g2;				\
+	mov	0x10, %g3;				\
+	ldxa	[%g1 + %g2] ASI, %l1;			\
+	mov	0x18, %g5;				\
+	ldxa	[%g1 + %g3] ASI, %l2;			\
+	ldxa	[%g1 + %g5] ASI, %l3;			\
+	add	%g1, 0x20, %g1;				\
+	ldxa	[%g1 + %g0] ASI, %l4;			\
+	ldxa	[%g1 + %g2] ASI, %l5;			\
+	ldxa	[%g1 + %g3] ASI, %l6;			\
+	ldxa	[%g1 + %g5] ASI, %l7;			\
+	add	%g1, 0x20, %g1;				\
+	ldxa	[%g1 + %g0] ASI, %i0;			\
+	ldxa	[%g1 + %g2] ASI, %i1;			\
+	ldxa	[%g1 + %g3] ASI, %i2;			\
+	ldxa	[%g1 + %g5] ASI, %i3;			\
+	add	%g1, 0x20, %g1;				\
+	ldxa	[%g1 + %g0] ASI, %i4;			\
+	ldxa	[%g1 + %g2] ASI, %i5;			\
+	ldxa	[%g1 + %g3] ASI, %i6;			\
+	ldxa	[%g1 + %g5] ASI, %i7;			\
+	restored;					\
+	retry; nop; nop; nop; nop;			\
+	b,a,pt	%xcc, fill_fixup_dax;			\
+	b,a,pt	%xcc, fill_fixup_mna;			\
+	b,a,pt	%xcc, fill_fixup;
+
+#define FILL_1_GENERIC_RTRAP				\
+user_rtt_fill_64bit:					\
+	ldxa	[%sp + STACK_BIAS + 0x00] %asi, %l0;	\
+	ldxa	[%sp + STACK_BIAS + 0x08] %asi, %l1;	\
+	ldxa	[%sp + STACK_BIAS + 0x10] %asi, %l2;	\
+	ldxa	[%sp + STACK_BIAS + 0x18] %asi, %l3;	\
+	ldxa	[%sp + STACK_BIAS + 0x20] %asi, %l4;	\
+	ldxa	[%sp + STACK_BIAS + 0x28] %asi, %l5;	\
+	ldxa	[%sp + STACK_BIAS + 0x30] %asi, %l6;	\
+	ldxa	[%sp + STACK_BIAS + 0x38] %asi, %l7;	\
+	ldxa	[%sp + STACK_BIAS + 0x40] %asi, %i0;	\
+	ldxa	[%sp + STACK_BIAS + 0x48] %asi, %i1;	\
+	ldxa	[%sp + STACK_BIAS + 0x50] %asi, %i2;	\
+	ldxa	[%sp + STACK_BIAS + 0x58] %asi, %i3;	\
+	ldxa	[%sp + STACK_BIAS + 0x60] %asi, %i4;	\
+	ldxa	[%sp + STACK_BIAS + 0x68] %asi, %i5;	\
+	ldxa	[%sp + STACK_BIAS + 0x70] %asi, %i6;	\
+	ldxa	[%sp + STACK_BIAS + 0x78] %asi, %i7;	\
+	ba,pt	%xcc, user_rtt_pre_restore;		\
+	 restored;					\
+	nop; nop; nop; nop; nop; nop;			\
+	nop; nop; nop; nop; nop;			\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup;
+
+
+/* Normal 32bit fill */
+#define FILL_2_GENERIC(ASI)				\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, (. - (128 + 4));			\
+	 srl	%sp, 0, %sp;				\
+	lduwa	[%sp + %g0] ASI, %l0;			\
+	mov	0x04, %g2;				\
+	mov	0x08, %g3;				\
+	lduwa	[%sp + %g2] ASI, %l1;			\
+	mov	0x0c, %g5;				\
+	lduwa	[%sp + %g3] ASI, %l2;			\
+	lduwa	[%sp + %g5] ASI, %l3;			\
+	add	%sp, 0x10, %g1;				\
+	lduwa	[%g1 + %g0] ASI, %l4;			\
+	lduwa	[%g1 + %g2] ASI, %l5;			\
+	lduwa	[%g1 + %g3] ASI, %l6;			\
+	lduwa	[%g1 + %g5] ASI, %l7;			\
+	add	%g1, 0x10, %g1;				\
+	lduwa	[%g1 + %g0] ASI, %i0;			\
+	lduwa	[%g1 + %g2] ASI, %i1;			\
+	lduwa	[%g1 + %g3] ASI, %i2;			\
+	lduwa	[%g1 + %g5] ASI, %i3;			\
+	add	%g1, 0x10, %g1;				\
+	lduwa	[%g1 + %g0] ASI, %i4;			\
+	lduwa	[%g1 + %g2] ASI, %i5;			\
+	lduwa	[%g1 + %g3] ASI, %i6;			\
+	lduwa	[%g1 + %g5] ASI, %i7;			\
+	restored;					\
+	retry; nop; nop;				\
+	b,a,pt	%xcc, fill_fixup_dax;			\
+	b,a,pt	%xcc, fill_fixup_mna;			\
+	b,a,pt	%xcc, fill_fixup;
+
+#define FILL_2_GENERIC_RTRAP				\
+user_rtt_fill_32bit:					\
+	and	%sp, 1, %g3;				\
+	brnz,pn	%g3, user_rtt_fill_64bit;		\
+	 srl	%sp, 0, %sp;				\
+	lduwa	[%sp + 0x00] %asi, %l0;			\
+	lduwa	[%sp + 0x04] %asi, %l1;			\
+	lduwa	[%sp + 0x08] %asi, %l2;			\
+	lduwa	[%sp + 0x0c] %asi, %l3;			\
+	lduwa	[%sp + 0x10] %asi, %l4;			\
+	lduwa	[%sp + 0x14] %asi, %l5;			\
+	lduwa	[%sp + 0x18] %asi, %l6;			\
+	lduwa	[%sp + 0x1c] %asi, %l7;			\
+	lduwa	[%sp + 0x20] %asi, %i0;			\
+	lduwa	[%sp + 0x24] %asi, %i1;			\
+	lduwa	[%sp + 0x28] %asi, %i2;			\
+	lduwa	[%sp + 0x2c] %asi, %i3;			\
+	lduwa	[%sp + 0x30] %asi, %i4;			\
+	lduwa	[%sp + 0x34] %asi, %i5;			\
+	lduwa	[%sp + 0x38] %asi, %i6;			\
+	lduwa	[%sp + 0x3c] %asi, %i7;			\
+	ba,pt	%xcc, user_rtt_pre_restore;		\
+	 restored;					\
+	nop; nop; nop; nop; nop;			\
+	nop; nop; nop;					\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_dax;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup_mna;		\
+	ba,a,pt	%xcc, user_rtt_fill_fixup;
+
+
+#define FILL_1_NORMAL FILL_1_GENERIC(ASI_AIUP)
+#define FILL_2_NORMAL FILL_2_GENERIC(ASI_AIUP)
+#define FILL_3_NORMAL FILL_0_NORMAL
+#define FILL_4_NORMAL FILL_0_NORMAL
+#define FILL_5_NORMAL FILL_0_NORMAL
+#define FILL_6_NORMAL FILL_0_NORMAL
+#define FILL_7_NORMAL FILL_0_NORMAL
+
+#define FILL_0_OTHER FILL_0_NORMAL
+#define FILL_1_OTHER FILL_1_GENERIC(ASI_AIUS)
+#define FILL_2_OTHER FILL_2_GENERIC(ASI_AIUS)
+#define FILL_3_OTHER FILL_3_NORMAL
+#define FILL_4_OTHER FILL_4_NORMAL
+#define FILL_5_OTHER FILL_5_NORMAL
+#define FILL_6_OTHER FILL_6_NORMAL
+#define FILL_7_OTHER FILL_7_NORMAL
+
+#endif /* !(_SPARC64_TTABLE_H) */
diff --git a/arch/sparc/include/asm/turbosparc.h b/arch/sparc/include/asm/turbosparc.h
new file mode 100644
index 0000000..23df777
--- /dev/null
+++ b/arch/sparc/include/asm/turbosparc.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * turbosparc.h:  Defines specific to the TurboSparc module.
+ *            This is SRMMU stuff.
+ *
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+#ifndef _SPARC_TURBOSPARC_H
+#define _SPARC_TURBOSPARC_H
+
+#include <asm/asi.h>
+#include <asm/pgtsrmmu.h>
+
+/* Bits in the SRMMU control register for TurboSparc modules.
+ *
+ * -------------------------------------------------------------------
+ * |impl-vers| RSV| PMC |PE|PC| RSV |BM| RFR |IC|DC|PSO|RSV|ICS|NF|ME|
+ * -------------------------------------------------------------------
+ *  31    24 23-21 20-19 18 17 16-15 14 13-10  9  8  7  6-3   2  1  0
+ *
+ * BM: Boot Mode -- 0 = not in boot mode, 1 = in boot mode
+ *
+ * This indicates whether the TurboSparc is in boot-mode or not.
+ *
+ * IC: Instruction Cache -- 0 = off, 1 = on
+ * DC: Data Cache -- 0 = off, 1 = 0n
+ *
+ * These bits enable the on-cpu TurboSparc split I/D caches.
+ *
+ * ICS: ICache Snooping -- 0 = disable, 1 = enable snooping of icache
+ * NF: No Fault -- 0 = faults generate traps, 1 = faults don't trap
+ * ME: MMU enable -- 0 = mmu not translating, 1 = mmu translating
+ *
+ */
+
+#define TURBOSPARC_MMUENABLE    0x00000001
+#define TURBOSPARC_NOFAULT      0x00000002
+#define TURBOSPARC_ICSNOOP	0x00000004
+#define TURBOSPARC_PSO          0x00000080
+#define TURBOSPARC_DCENABLE     0x00000100   /* Enable data cache */
+#define TURBOSPARC_ICENABLE     0x00000200   /* Enable instruction cache */
+#define TURBOSPARC_BMODE        0x00004000   
+#define TURBOSPARC_PARITYODD	0x00020000   /* Parity odd, if enabled */
+#define TURBOSPARC_PCENABLE	0x00040000   /* Enable parity checking */
+
+/* Bits in the CPU configuration register for TurboSparc modules.
+ *
+ * -------------------------------------------------------
+ * |IOClk|SNP|AXClk| RAH |  WS |  RSV  |SBC|WT|uS2|SE|SCC|
+ * -------------------------------------------------------
+ *    31   30 29-28 27-26 25-23   22-8  7-6  5  4   3 2-0
+ *
+ */
+
+#define TURBOSPARC_SCENABLE 0x00000008	 /* Secondary cache enable */
+#define TURBOSPARC_uS2	    0x00000010   /* Swift compatibility mode */
+#define TURBOSPARC_WTENABLE 0x00000020	 /* Write thru for dcache */
+#define TURBOSPARC_SNENABLE 0x40000000	 /* DVMA snoop enable */
+
+#ifndef __ASSEMBLY__
+
+/* Bits [13:5] select one of 512 instruction cache tags */
+static inline void turbosparc_inv_insn_tag(unsigned long addr)
+{
+        __asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (addr), "i" (ASI_M_TXTC_TAG)
+			     : "memory");
+}
+
+/* Bits [13:5] select one of 512 data cache tags */
+static inline void turbosparc_inv_data_tag(unsigned long addr)
+{
+        __asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (addr), "i" (ASI_M_DATAC_TAG)
+			     : "memory");
+}
+
+static inline void turbosparc_flush_icache(void)
+{
+	unsigned long addr;
+
+        for (addr = 0; addr < 0x4000; addr += 0x20)
+                turbosparc_inv_insn_tag(addr);
+}
+
+static inline void turbosparc_flush_dcache(void)
+{
+	unsigned long addr;
+
+        for (addr = 0; addr < 0x4000; addr += 0x20)
+                turbosparc_inv_data_tag(addr);
+}
+
+static inline void turbosparc_idflash_clear(void)
+{
+	unsigned long addr;
+
+        for (addr = 0; addr < 0x4000; addr += 0x20) {
+                turbosparc_inv_insn_tag(addr);
+                turbosparc_inv_data_tag(addr);
+	}
+}
+
+static inline void turbosparc_set_ccreg(unsigned long regval)
+{
+	__asm__ __volatile__("sta %0, [%1] %2\n\t"
+			     : /* no outputs */
+			     : "r" (regval), "r" (0x600), "i" (ASI_M_MMUREGS)
+			     : "memory");
+}
+
+static inline unsigned long turbosparc_get_ccreg(void)
+{
+	unsigned long regval;
+
+	__asm__ __volatile__("lda [%1] %2, %0\n\t"
+			     : "=r" (regval)
+			     : "r" (0x600), "i" (ASI_M_MMUREGS));
+	return regval;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !(_SPARC_TURBOSPARC_H) */
diff --git a/arch/sparc/include/asm/uaccess.h b/arch/sparc/include/asm/uaccess.h
new file mode 100644
index 0000000..dd85bc2
--- /dev/null
+++ b/arch/sparc/include/asm/uaccess.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_UACCESS_H
+#define ___ASM_SPARC_UACCESS_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/uaccess_64.h>
+#else
+#include <asm/uaccess_32.h>
+#endif
+
+#define user_addr_max() \
+	(uaccess_kernel() ? ~0UL : TASK_SIZE)
+
+long strncpy_from_user(char *dest, const char __user *src, long count);
+
+#endif
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
new file mode 100644
index 0000000..de71c65
--- /dev/null
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * uaccess.h: User space memore access functions.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+#ifndef _ASM_UACCESS_H
+#define _ASM_UACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+#include <asm/processor.h>
+
+#define ARCH_HAS_SORT_EXTABLE
+#define ARCH_HAS_SEARCH_EXTABLE
+
+/* Sparc is not segmented, however we need to be able to fool access_ok()
+ * when doing system calls from kernel mode legitimately.
+ *
+ * "For historical reasons, these macros are grossly misnamed." -Linus
+ */
+
+#define KERNEL_DS   ((mm_segment_t) { 0 })
+#define USER_DS     ((mm_segment_t) { -1 })
+
+#define get_ds()	(KERNEL_DS)
+#define get_fs()	(current->thread.current_ds)
+#define set_fs(val)	((current->thread.current_ds) = (val))
+
+#define segment_eq(a, b) ((a).seg == (b).seg)
+
+/* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test
+ * can be fairly lightweight.
+ * No one can read/write anything from userland in the kernel space by setting
+ * large size and address near to PAGE_OFFSET - a fault will break his intentions.
+ */
+#define __user_ok(addr, size) ({ (void)(size); (addr) < STACK_TOP; })
+#define __kernel_ok (uaccess_kernel())
+#define __access_ok(addr, size) (__user_ok((addr) & get_fs().seg, (size)))
+#define access_ok(type, addr, size) \
+	({ (void)(type); __access_ok((unsigned long)(addr), size); })
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ *
+ * There is a special way how to put a range of potentially faulting
+ * insns (like twenty ldd/std's with now intervening other instructions)
+ * You specify address of first in insn and 0 in fixup and in the next
+ * exception_table_entry you specify last potentially faulting insn + 1
+ * and in fixup the routine which should handle the fault.
+ * That fixup code will get
+ * (faulting_insn_address - first_insn_in_the_range_address)/4
+ * in %g2 (ie. index of the faulting instruction in the range).
+ */
+
+struct exception_table_entry
+{
+        unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+unsigned long search_extables_range(unsigned long addr, unsigned long *g2);
+
+/* Uh, these should become the main single-value transfer routines..
+ * They automatically use the right size if we just have the right
+ * pointer type..
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the ugliness from the user.
+ */
+#define put_user(x, ptr) ({ \
+	unsigned long __pu_addr = (unsigned long)(ptr); \
+	__chk_user_ptr(ptr); \
+	__put_user_check((__typeof__(*(ptr)))(x), __pu_addr, sizeof(*(ptr))); \
+})
+
+#define get_user(x, ptr) ({ \
+	unsigned long __gu_addr = (unsigned long)(ptr); \
+	__chk_user_ptr(ptr); \
+	__get_user_check((x), __gu_addr, sizeof(*(ptr)), __typeof__(*(ptr))); \
+})
+
+/*
+ * The "__xxx" versions do not do address space checking, useful when
+ * doing multiple accesses to the same area (the user has to do the
+ * checks by hand with "access_ok()")
+ */
+#define __put_user(x, ptr) \
+	__put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) \
+    __get_user_nocheck((x), (ptr), sizeof(*(ptr)), __typeof__(*(ptr)))
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) ((struct __large_struct __user *)(x))
+
+#define __put_user_check(x, addr, size) ({ \
+	register int __pu_ret; \
+	if (__access_ok(addr, size)) { \
+		switch (size) { \
+		case 1: \
+			__put_user_asm(x, b, addr, __pu_ret); \
+			break; \
+		case 2: \
+			__put_user_asm(x, h, addr, __pu_ret); \
+			break; \
+		case 4: \
+			__put_user_asm(x, , addr, __pu_ret); \
+			break; \
+		case 8: \
+			__put_user_asm(x, d, addr, __pu_ret); \
+			break; \
+		default: \
+			__pu_ret = __put_user_bad(); \
+			break; \
+		} \
+	} else { \
+		__pu_ret = -EFAULT; \
+	} \
+	__pu_ret; \
+})
+
+#define __put_user_nocheck(x, addr, size) ({			\
+	register int __pu_ret;					\
+	switch (size) {						\
+	case 1: __put_user_asm(x, b, addr, __pu_ret); break;	\
+	case 2: __put_user_asm(x, h, addr, __pu_ret); break;	\
+	case 4: __put_user_asm(x, , addr, __pu_ret); break;	\
+	case 8: __put_user_asm(x, d, addr, __pu_ret); break;	\
+	default: __pu_ret = __put_user_bad(); break;		\
+	} \
+	__pu_ret; \
+})
+
+#define __put_user_asm(x, size, addr, ret)				\
+__asm__ __volatile__(							\
+		"/* Put user asm, inline. */\n"				\
+	"1:\t"	"st"#size " %1, %2\n\t"					\
+		"clr	%0\n"						\
+	"2:\n\n\t"							\
+		".section .fixup,#alloc,#execinstr\n\t"			\
+		".align	4\n"						\
+	"3:\n\t"							\
+		"b	2b\n\t"						\
+		" mov	%3, %0\n\t"					\
+		".previous\n\n\t"					\
+		".section __ex_table,#alloc\n\t"			\
+		".align	4\n\t"						\
+		".word	1b, 3b\n\t"					\
+		".previous\n\n\t"					\
+	       : "=&r" (ret) : "r" (x), "m" (*__m(addr)),		\
+		 "i" (-EFAULT))
+
+int __put_user_bad(void);
+
+#define __get_user_check(x, addr, size, type) ({ \
+	register int __gu_ret; \
+	register unsigned long __gu_val; \
+	if (__access_ok(addr, size)) { \
+		switch (size) { \
+		case 1: \
+			 __get_user_asm(__gu_val, ub, addr, __gu_ret); \
+			break; \
+		case 2: \
+			__get_user_asm(__gu_val, uh, addr, __gu_ret); \
+			break; \
+		case 4: \
+			__get_user_asm(__gu_val, , addr, __gu_ret); \
+			break; \
+		case 8: \
+			__get_user_asm(__gu_val, d, addr, __gu_ret); \
+			break; \
+		default: \
+			__gu_val = 0; \
+			__gu_ret = __get_user_bad(); \
+			break; \
+		} \
+	 } else { \
+		 __gu_val = 0; \
+		 __gu_ret = -EFAULT; \
+	} \
+	x = (__force type) __gu_val; \
+	__gu_ret; \
+})
+
+#define __get_user_nocheck(x, addr, size, type) ({			\
+	register int __gu_ret;						\
+	register unsigned long __gu_val;				\
+	switch (size) {							\
+	case 1: __get_user_asm(__gu_val, ub, addr, __gu_ret); break;	\
+	case 2: __get_user_asm(__gu_val, uh, addr, __gu_ret); break;	\
+	case 4: __get_user_asm(__gu_val, , addr, __gu_ret); break;	\
+	case 8: __get_user_asm(__gu_val, d, addr, __gu_ret); break;	\
+	default:							\
+		__gu_val = 0;						\
+		__gu_ret = __get_user_bad();				\
+		break;							\
+	}								\
+	x = (__force type) __gu_val;					\
+	__gu_ret;							\
+})
+
+#define __get_user_asm(x, size, addr, ret)				\
+__asm__ __volatile__(							\
+		"/* Get user asm, inline. */\n"				\
+	"1:\t"	"ld"#size " %2, %1\n\t"					\
+		"clr	%0\n"						\
+	"2:\n\n\t"							\
+		".section .fixup,#alloc,#execinstr\n\t"			\
+		".align	4\n"						\
+	"3:\n\t"							\
+		"clr	%1\n\t"						\
+		"b	2b\n\t"						\
+		" mov	%3, %0\n\n\t"					\
+		".previous\n\t"						\
+		".section __ex_table,#alloc\n\t"			\
+		".align	4\n\t"						\
+		".word	1b, 3b\n\n\t"					\
+		".previous\n\t"						\
+	       : "=&r" (ret), "=&r" (x) : "m" (*__m(addr)),		\
+		 "i" (-EFAULT))
+
+int __get_user_bad(void);
+
+unsigned long __copy_user(void __user *to, const void __user *from, unsigned long size);
+
+static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	return __copy_user(to, (__force void __user *) from, n);
+}
+
+static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	return __copy_user((__force void __user *) to, from, n);
+}
+
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
+
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__ (
+		".section __ex_table,#alloc\n\t"
+		".align 4\n\t"
+		".word 1f,3\n\t"
+		".previous\n\t"
+		"mov %2, %%o1\n"
+		"1:\n\t"
+		"call __bzero\n\t"
+		" mov %1, %%o0\n\t"
+		"mov %%o0, %0\n"
+		: "=r" (ret) : "r" (addr), "r" (size) :
+		"o0", "o1", "o2", "o3", "o4", "o5", "o7",
+		"g1", "g2", "g3", "g4", "g5", "g7", "cc");
+
+	return ret;
+}
+
+static inline unsigned long clear_user(void __user *addr, unsigned long n)
+{
+	if (n && __access_ok((unsigned long) addr, n))
+		return __clear_user(addr, n);
+	else
+		return n;
+}
+
+__must_check long strnlen_user(const char __user *str, long n);
+
+#endif /* _ASM_UACCESS_H */
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
new file mode 100644
index 0000000..cbb308c
--- /dev/null
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UACCESS_H
+#define _ASM_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <asm/asi.h>
+#include <asm/spitfire.h>
+#include <asm/extable_64.h>
+
+#include <asm/processor.h>
+
+/*
+ * Sparc64 is segmented, though more like the M68K than the I386.
+ * We use the secondary ASI to address user memory, which references a
+ * completely different VM map, thus there is zero chance of the user
+ * doing something queer and tricking us into poking kernel memory.
+ *
+ * What is left here is basically what is needed for the other parts of
+ * the kernel that expect to be able to manipulate, erum, "segments".
+ * Or perhaps more properly, permissions.
+ *
+ * "For historical reasons, these macros are grossly misnamed." -Linus
+ */
+
+#define KERNEL_DS   ((mm_segment_t) { ASI_P })
+#define USER_DS     ((mm_segment_t) { ASI_AIUS })	/* har har har */
+
+#define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)})
+#define get_ds() (KERNEL_DS)
+
+#define segment_eq(a, b)  ((a).seg == (b).seg)
+
+#define set_fs(val)								\
+do {										\
+	current_thread_info()->current_ds = (val).seg;				\
+	__asm__ __volatile__ ("wr %%g0, %0, %%asi" : : "r" ((val).seg));	\
+} while(0)
+
+/*
+ * Test whether a block of memory is a valid user space address.
+ * Returns 0 if the range is valid, nonzero otherwise.
+ */
+static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit)
+{
+	if (__builtin_constant_p(size))
+		return addr > limit - size;
+
+	addr += size;
+	if (addr < size)
+		return true;
+
+	return addr > limit;
+}
+
+#define __range_not_ok(addr, size, limit)                               \
+({                                                                      \
+	__chk_user_ptr(addr);                                           \
+	__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
+})
+
+static inline int __access_ok(const void __user * addr, unsigned long size)
+{
+	return 1;
+}
+
+static inline int access_ok(int type, const void __user * addr, unsigned long size)
+{
+	return 1;
+}
+
+void __retl_efault(void);
+
+/* Uh, these should become the main single-value transfer routines..
+ * They automatically use the right size if we just have the right
+ * pointer type..
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the ugliness from the user.
+ */
+#define put_user(x, ptr) ({ \
+	unsigned long __pu_addr = (unsigned long)(ptr); \
+	__chk_user_ptr(ptr); \
+	__put_user_nocheck((__typeof__(*(ptr)))(x), __pu_addr, sizeof(*(ptr)));\
+})
+
+#define get_user(x, ptr) ({ \
+	unsigned long __gu_addr = (unsigned long)(ptr); \
+	__chk_user_ptr(ptr); \
+	__get_user_nocheck((x), __gu_addr, sizeof(*(ptr)), __typeof__(*(ptr)));\
+})
+
+#define __put_user(x, ptr) put_user(x, ptr)
+#define __get_user(x, ptr) get_user(x, ptr)
+
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) ((struct __large_struct *)(x))
+
+#define __put_user_nocheck(data, addr, size) ({			\
+	register int __pu_ret;					\
+	switch (size) {						\
+	case 1: __put_user_asm(data, b, addr, __pu_ret); break;	\
+	case 2: __put_user_asm(data, h, addr, __pu_ret); break;	\
+	case 4: __put_user_asm(data, w, addr, __pu_ret); break;	\
+	case 8: __put_user_asm(data, x, addr, __pu_ret); break;	\
+	default: __pu_ret = __put_user_bad(); break;		\
+	}							\
+	__pu_ret;						\
+})
+
+#define __put_user_asm(x, size, addr, ret)				\
+__asm__ __volatile__(							\
+		"/* Put user asm, inline. */\n"				\
+	"1:\t"	"st"#size "a %1, [%2] %%asi\n\t"			\
+		"clr	%0\n"						\
+	"2:\n\n\t"							\
+		".section .fixup,#alloc,#execinstr\n\t"			\
+		".align	4\n"						\
+	"3:\n\t"							\
+		"sethi	%%hi(2b), %0\n\t"				\
+		"jmpl	%0 + %%lo(2b), %%g0\n\t"			\
+		" mov	%3, %0\n\n\t"					\
+		".previous\n\t"						\
+		".section __ex_table,\"a\"\n\t"				\
+		".align	4\n\t"						\
+		".word	1b, 3b\n\t"					\
+		".previous\n\n\t"					\
+	       : "=r" (ret) : "r" (x), "r" (__m(addr)),			\
+		 "i" (-EFAULT))
+
+int __put_user_bad(void);
+
+#define __get_user_nocheck(data, addr, size, type) ({			     \
+	register int __gu_ret;						     \
+	register unsigned long __gu_val;				     \
+	switch (size) {							     \
+		case 1: __get_user_asm(__gu_val, ub, addr, __gu_ret); break; \
+		case 2: __get_user_asm(__gu_val, uh, addr, __gu_ret); break; \
+		case 4: __get_user_asm(__gu_val, uw, addr, __gu_ret); break; \
+		case 8: __get_user_asm(__gu_val, x, addr, __gu_ret); break;  \
+		default:						     \
+			__gu_val = 0;					     \
+			__gu_ret = __get_user_bad();			     \
+			break;						     \
+	} 								     \
+	data = (__force type) __gu_val;					     \
+	 __gu_ret;							     \
+})
+
+#define __get_user_asm(x, size, addr, ret)				\
+__asm__ __volatile__(							\
+		"/* Get user asm, inline. */\n"				\
+	"1:\t"	"ld"#size "a [%2] %%asi, %1\n\t"			\
+		"clr	%0\n"						\
+	"2:\n\n\t"							\
+		".section .fixup,#alloc,#execinstr\n\t"			\
+		".align	4\n"						\
+	"3:\n\t"							\
+		"sethi	%%hi(2b), %0\n\t"				\
+		"clr	%1\n\t"						\
+		"jmpl	%0 + %%lo(2b), %%g0\n\t"			\
+		" mov	%3, %0\n\n\t"					\
+		".previous\n\t"						\
+		".section __ex_table,\"a\"\n\t"				\
+		".align	4\n\t"						\
+		".word	1b, 3b\n\n\t"					\
+		".previous\n\t"						\
+	       : "=r" (ret), "=r" (x) : "r" (__m(addr)),		\
+		 "i" (-EFAULT))
+
+int __get_user_bad(void);
+
+unsigned long __must_check raw_copy_from_user(void *to,
+					     const void __user *from,
+					     unsigned long size);
+
+unsigned long __must_check raw_copy_to_user(void __user *to,
+					   const void *from,
+					   unsigned long size);
+#define INLINE_COPY_FROM_USER
+#define INLINE_COPY_TO_USER
+
+unsigned long __must_check raw_copy_in_user(void __user *to,
+					   const void __user *from,
+					   unsigned long size);
+
+unsigned long __must_check __clear_user(void __user *, unsigned long);
+
+#define clear_user __clear_user
+
+__must_check long strnlen_user(const char __user *str, long n);
+
+struct pt_regs;
+unsigned long compute_effective_address(struct pt_regs *,
+					unsigned int insn,
+					unsigned int rd);
+
+#endif /* _ASM_UACCESS_H */
diff --git a/arch/sparc/include/asm/unaligned.h b/arch/sparc/include/asm/unaligned.h
new file mode 100644
index 0000000..7971d89
--- /dev/null
+++ b/arch/sparc/include/asm/unaligned.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SPARC_UNALIGNED_H
+#define _ASM_SPARC_UNALIGNED_H
+
+#include <linux/unaligned/be_struct.h>
+#include <linux/unaligned/le_byteshift.h>
+#include <linux/unaligned/generic.h>
+#define get_unaligned	__get_unaligned_be
+#define put_unaligned	__put_unaligned_be
+
+#endif /* _ASM_SPARC_UNALIGNED_H */
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
new file mode 100644
index 0000000..b2a6a95
--- /dev/null
+++ b/arch/sparc/include/asm/unistd.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * System calls under the Sparc.
+ *
+ * Don't be scared by the ugly clobbers, it is the only way I can
+ * think of right now to force the arguments into fixed registers
+ * before the trap into the system call with gcc 'asm' statements.
+ *
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+ *
+ * SunOS compatibility based upon preliminary work which is:
+ *
+ * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
+ */
+#ifndef _SPARC_UNISTD_H
+#define _SPARC_UNISTD_H
+
+#include <uapi/asm/unistd.h>
+
+#ifdef __32bit_syscall_numbers__
+#else
+#define __NR_time		231 /* Linux sparc32                               */
+#endif
+#define __ARCH_WANT_OLD_READDIR
+#define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_GETHOSTNAME
+#define __ARCH_WANT_SYS_PAUSE
+#define __ARCH_WANT_SYS_SIGNAL
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_WAITPID
+#define __ARCH_WANT_SYS_SOCKETCALL
+#define __ARCH_WANT_SYS_FADVISE64
+#define __ARCH_WANT_SYS_GETPGRP
+#define __ARCH_WANT_SYS_LLSEEK
+#define __ARCH_WANT_SYS_NICE
+#define __ARCH_WANT_SYS_OLDUMOUNT
+#define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_SIGPROCMASK
+#ifdef __32bit_syscall_numbers__
+#define __ARCH_WANT_SYS_IPC
+#else
+#define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#endif
+
+#endif /* _SPARC_UNISTD_H */
diff --git a/arch/sparc/include/asm/upa.h b/arch/sparc/include/asm/upa.h
new file mode 100644
index 0000000..782691b
--- /dev/null
+++ b/arch/sparc/include/asm/upa.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_UPA_H
+#define _SPARC64_UPA_H
+
+#include <asm/asi.h>
+
+/* UPA level registers and defines. */
+
+/* UPA Config Register */
+#define UPA_CONFIG_RESV		0xffffffffc0000000 /* Reserved.                    */
+#define UPA_CONFIG_PCON		0x000000003fc00000 /* Depth of various sys queues. */
+#define UPA_CONFIG_MID		0x00000000003e0000 /* Module ID.                   */
+#define UPA_CONFIG_PCAP		0x000000000001ffff /* Port Capabilities.           */
+
+/* UPA Port ID Register */
+#define UPA_PORTID_FNP		0xff00000000000000 /* Hardcoded to 0xfc on ultra.  */
+#define UPA_PORTID_RESV		0x00fffff800000000 /* Reserved.                    */
+#define UPA_PORTID_ECCVALID     0x0000000400000000 /* Zero if mod can generate ECC */
+#define UPA_PORTID_ONEREAD      0x0000000200000000 /* Set if mod generates P_RASB  */
+#define UPA_PORTID_PINTRDQ      0x0000000180000000 /* # outstanding P_INT_REQ's    */
+#define UPA_PORTID_PREQDQ       0x000000007e000000 /* slave-wr's to mod supported  */
+#define UPA_PORTID_PREQRD       0x0000000001e00000 /* # incoming P_REQ's supported */
+#define UPA_PORTID_UPACAP       0x00000000001f0000 /* UPA capabilities of mod      */
+#define UPA_PORTID_ID           0x000000000000ffff /* Module Identification bits  */
+
+/* UPA I/O space accessors */
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+static inline unsigned char _upa_readb(unsigned long addr)
+{
+	unsigned char ret;
+
+	__asm__ __volatile__("lduba\t[%1] %2, %0\t/* upa_readb */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+static inline unsigned short _upa_readw(unsigned long addr)
+{
+	unsigned short ret;
+
+	__asm__ __volatile__("lduha\t[%1] %2, %0\t/* upa_readw */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+static inline unsigned int _upa_readl(unsigned long addr)
+{
+	unsigned int ret;
+
+	__asm__ __volatile__("lduwa\t[%1] %2, %0\t/* upa_readl */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+static inline unsigned long _upa_readq(unsigned long addr)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__("ldxa\t[%1] %2, %0\t/* upa_readq */"
+			     : "=r" (ret)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+
+	return ret;
+}
+
+static inline void _upa_writeb(unsigned char b, unsigned long addr)
+{
+	__asm__ __volatile__("stba\t%0, [%1] %2\t/* upa_writeb */"
+			     : /* no outputs */
+			     : "r" (b), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+static inline void _upa_writew(unsigned short w, unsigned long addr)
+{
+	__asm__ __volatile__("stha\t%0, [%1] %2\t/* upa_writew */"
+			     : /* no outputs */
+			     : "r" (w), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+static inline void _upa_writel(unsigned int l, unsigned long addr)
+{
+	__asm__ __volatile__("stwa\t%0, [%1] %2\t/* upa_writel */"
+			     : /* no outputs */
+			     : "r" (l), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+static inline void _upa_writeq(unsigned long q, unsigned long addr)
+{
+	__asm__ __volatile__("stxa\t%0, [%1] %2\t/* upa_writeq */"
+			     : /* no outputs */
+			     : "r" (q), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+#define upa_readb(__addr)		(_upa_readb((unsigned long)(__addr)))
+#define upa_readw(__addr)		(_upa_readw((unsigned long)(__addr)))
+#define upa_readl(__addr)		(_upa_readl((unsigned long)(__addr)))
+#define upa_readq(__addr)		(_upa_readq((unsigned long)(__addr)))
+#define upa_writeb(__b, __addr)		(_upa_writeb((__b), (unsigned long)(__addr)))
+#define upa_writew(__w, __addr)		(_upa_writew((__w), (unsigned long)(__addr)))
+#define upa_writel(__l, __addr)		(_upa_writel((__l), (unsigned long)(__addr)))
+#define upa_writeq(__q, __addr)		(_upa_writeq((__q), (unsigned long)(__addr)))
+#endif /* __KERNEL__ && !__ASSEMBLY__ */
+
+#endif /* !(_SPARC64_UPA_H) */
diff --git a/arch/sparc/include/asm/uprobes.h b/arch/sparc/include/asm/uprobes.h
new file mode 100644
index 0000000..36196c1
--- /dev/null
+++ b/arch/sparc/include/asm/uprobes.h
@@ -0,0 +1,59 @@
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+/*
+ * User-space Probes (UProbes) for sparc
+ *
+ * Copyright (C) 2013 Oracle, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ *     Jose E. Marchesi <jose.marchesi@oracle.com>
+ *	Eric Saint Etienne <eric.saint.etienne@oracle.com>
+ */
+
+typedef u32 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES		4
+#define UPROBE_XOL_SLOT_BYTES	(MAX_UINSN_BYTES * 2)
+
+#define UPROBE_SWBP_INSN_SIZE	4
+#define UPROBE_SWBP_INSN	0x91d02073 /* ta 0x73 */
+#define UPROBE_STP_INSN		0x91d02074 /* ta 0x74 */
+
+#define ANNUL_BIT (1 << 29)
+
+struct arch_uprobe {
+	union {
+		u8  insn[MAX_UINSN_BYTES];
+		u32 ixol;
+	};
+};
+
+struct arch_uprobe_task {
+	u64 saved_tpc;
+	u64 saved_tnpc;
+};
+
+struct task_struct;
+struct notifier_block;
+
+extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
+extern int  arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern int  arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
+extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+
+#endif	/* _ASM_UPROBES_H */
diff --git a/arch/sparc/include/asm/user.h b/arch/sparc/include/asm/user.h
new file mode 100644
index 0000000..3400ea8
--- /dev/null
+++ b/arch/sparc/include/asm/user.h
@@ -0,0 +1,6 @@
+#ifndef _SPARC_USER_H
+#define _SPARC_USER_H
+
+/* Nothing to define.  */
+
+#endif /* !(_SPARC_USER_H) */
diff --git a/arch/sparc/include/asm/vaddrs.h b/arch/sparc/include/asm/vaddrs.h
new file mode 100644
index 0000000..84d054b
--- /dev/null
+++ b/arch/sparc/include/asm/vaddrs.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC_VADDRS_H
+#define _SPARC_VADDRS_H
+
+#include <asm/head.h>
+
+/*
+ * asm/vaddrs.h:  Here we define the virtual addresses at
+ *                      which important things will be mapped.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2000 Anton Blanchard (anton@samba.org)
+ */
+
+#define SRMMU_MAXMEM		0x0c000000
+
+#define SRMMU_NOCACHE_VADDR	(KERNBASE + SRMMU_MAXMEM)
+				/* = 0x0fc000000 */
+/* XXX Empiricals - this needs to go away - KMW */
+#define SRMMU_MIN_NOCACHE_PAGES (550)
+#define SRMMU_MAX_NOCACHE_PAGES	(1280)
+
+/* The following constant is used in mm/srmmu.c::srmmu_nocache_calcsize()
+ * to determine the amount of memory that will be reserved as nocache:
+ *
+ * 256 pages will be taken as nocache per each
+ * SRMMU_NOCACHE_ALCRATIO MB of system memory.
+ *
+ * limits enforced:	nocache minimum = 256 pages
+ *			nocache maximum = 1280 pages
+ */
+#define SRMMU_NOCACHE_ALCRATIO	64	/* 256 pages per 64MB of system RAM */
+
+#ifndef __ASSEMBLY__
+#include <asm/kmap_types.h>
+
+enum fixed_addresses {
+	FIX_HOLE,
+#ifdef CONFIG_HIGHMEM
+	FIX_KMAP_BEGIN,
+	FIX_KMAP_END = (KM_TYPE_NR * NR_CPUS),
+#endif
+	__end_of_fixed_addresses
+};
+#endif
+
+/* Leave one empty page between IO pages at 0xfd000000 and
+ * the top of the fixmap.
+ */
+#define FIXADDR_TOP		(0xfcfff000UL)
+#define FIXADDR_SIZE		((FIX_KMAP_END + 1) << PAGE_SHIFT)
+#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
+
+#define __fix_to_virt(x)        (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+#define SUN4M_IOBASE_VADDR	0xfd000000 /* Base for mapping pages */
+#define IOBASE_VADDR		0xfe000000
+#define IOBASE_END		0xfe600000
+
+#define KADB_DEBUGGER_BEGVM	0xffc00000 /* Where kern debugger is in virt-mem */
+#define KADB_DEBUGGER_ENDVM	0xffd00000
+#define DEBUG_FIRSTVADDR	KADB_DEBUGGER_BEGVM
+#define DEBUG_LASTVADDR		KADB_DEBUGGER_ENDVM
+
+#define LINUX_OPPROM_BEGVM	0xffd00000
+#define LINUX_OPPROM_ENDVM	0xfff00000
+
+#define DVMA_VADDR		0xfff00000 /* Base area of the DVMA on suns */
+#define DVMA_END		0xfffc0000
+
+#endif /* !(_SPARC_VADDRS_H) */
diff --git a/arch/sparc/include/asm/vdso.h b/arch/sparc/include/asm/vdso.h
new file mode 100644
index 0000000..93b6287
--- /dev/null
+++ b/arch/sparc/include/asm/vdso.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ASM_SPARC_VDSO_H
+#define _ASM_SPARC_VDSO_H
+
+struct vdso_image {
+	void *data;
+	unsigned long size;   /* Always a multiple of PAGE_SIZE */
+	long sym_vvar_start;  /* Negative offset to the vvar area */
+	long sym_vread_tick; /* Start of vread_tick section */
+	long sym_vread_tick_patch_start; /* Start of tick read */
+	long sym_vread_tick_patch_end;   /* End of tick read */
+};
+
+#ifdef CONFIG_SPARC64
+extern const struct vdso_image vdso_image_64_builtin;
+#endif
+#ifdef CONFIG_COMPAT
+extern const struct vdso_image vdso_image_32_builtin;
+#endif
+
+#endif /* _ASM_SPARC_VDSO_H */
diff --git a/arch/sparc/include/asm/vga.h b/arch/sparc/include/asm/vga.h
new file mode 100644
index 0000000..2952d66
--- /dev/null
+++ b/arch/sparc/include/asm/vga.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *	Access to VGA videoram
+ *
+ *	(c) 1998 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+#include <linux/bug.h>
+#include <linux/string.h>
+#include <asm/types.h>
+
+#define VT_BUF_HAVE_RW
+#define VT_BUF_HAVE_MEMSETW
+#define VT_BUF_HAVE_MEMCPYW
+#define VT_BUF_HAVE_MEMMOVEW
+
+#undef scr_writew
+#undef scr_readw
+
+static inline void scr_writew(u16 val, u16 *addr)
+{
+	BUG_ON((long) addr >= 0);
+
+	*addr = val;
+}
+
+static inline u16 scr_readw(const u16 *addr)
+{
+	BUG_ON((long) addr >= 0);
+
+	return *addr;
+}
+
+static inline void scr_memsetw(u16 *p, u16 v, unsigned int n)
+{
+	BUG_ON((long) p >= 0);
+
+	memset16(p, cpu_to_le16(v), n / 2);
+}
+
+static inline void scr_memcpyw(u16 *d, u16 *s, unsigned int n)
+{
+	BUG_ON((long) d >= 0);
+
+	memcpy(d, s, n);
+}
+
+static inline void scr_memmovew(u16 *d, u16 *s, unsigned int n)
+{
+	BUG_ON((long) d >= 0);
+
+	memmove(d, s, n);
+}
+
+#define VGA_MAP_MEM(x,s) (x)
+
+#endif
diff --git a/arch/sparc/include/asm/viking.h b/arch/sparc/include/asm/viking.h
new file mode 100644
index 0000000..0bbefd1
--- /dev/null
+++ b/arch/sparc/include/asm/viking.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * viking.h:  Defines specific to the GNU/Viking MBUS module.
+ *            This is SRMMU stuff.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+#ifndef _SPARC_VIKING_H
+#define _SPARC_VIKING_H
+
+#include <asm/asi.h>
+#include <asm/mxcc.h>
+#include <asm/pgtsrmmu.h>
+
+/* Bits in the SRMMU control register for GNU/Viking modules.
+ *
+ * -----------------------------------------------------------
+ * |impl-vers| RSV |TC|AC|SP|BM|PC|MBM|SB|IC|DC|PSO|RSV|NF|ME|
+ * -----------------------------------------------------------
+ *  31     24 23-17 16 15 14 13 12 11  10  9  8  7  6-2  1  0
+ *
+ * TC: Tablewalk Cacheable -- 0 = Twalks are not cacheable in E-cache
+ *                            1 = Twalks are cacheable in E-cache
+ *
+ * GNU/Viking will only cache tablewalks in the E-cache (mxcc) if present
+ * and never caches them internally (or so states the docs).  Therefore
+ * for machines lacking an E-cache (ie. in MBUS mode) this bit must
+ * remain cleared.
+ *
+ * AC: Alternate Cacheable -- 0 = Passthru physical accesses not cacheable
+ *                            1 = Passthru physical accesses cacheable
+ *
+ * This indicates whether accesses are cacheable when no cachable bit
+ * is present in the pte when the processor is in boot-mode or the
+ * access does not need pte's for translation (ie. pass-thru ASI's).
+ * "Cachable" is only referring to E-cache (if present) and not the
+ * on chip split I/D caches of the GNU/Viking.
+ *
+ * SP: SnooP Enable -- 0 = bus snooping off, 1 = bus snooping on
+ *
+ * This enables snooping on the GNU/Viking bus.  This must be on
+ * for the hardware cache consistency mechanisms of the GNU/Viking
+ * to work at all.  On non-mxcc GNU/Viking modules the split I/D
+ * caches will snoop regardless of whether they are enabled, this
+ * takes care of the case where the I or D or both caches are turned
+ * off yet still contain valid data.  Note also that this bit does
+ * not affect GNU/Viking store-buffer snoops, those happen if the
+ * store-buffer is enabled no matter what.
+ *
+ * BM: Boot Mode -- 0 = not in boot mode, 1 = in boot mode
+ *
+ * This indicates whether the GNU/Viking is in boot-mode or not,
+ * if it is then all instruction fetch physical addresses are
+ * computed as 0xff0000000 + low 28 bits of requested address.
+ * GNU/Viking boot-mode does not affect data accesses.  Also,
+ * in boot mode instruction accesses bypass the split on chip I/D
+ * caches, they may be cached by the GNU/MXCC if present and enabled.
+ *
+ * MBM: MBus Mode -- 0 = not in MBus mode, 1 = in MBus mode
+ *
+ * This indicated the GNU/Viking configuration present.  If in
+ * MBUS mode, the GNU/Viking lacks a GNU/MXCC E-cache.  If it is
+ * not then the GNU/Viking is on a module VBUS connected directly
+ * to a GNU/MXCC cache controller.  The GNU/MXCC can be thus connected
+ * to either an GNU/MBUS (sun4m) or the packet-switched GNU/XBus (sun4d).
+ *
+ * SB: StoreBuffer enable -- 0 = store buffer off, 1 = store buffer on
+ *
+ * The GNU/Viking store buffer allows the chip to continue execution
+ * after a store even if the data cannot be placed in one of the
+ * caches during that cycle.  If disabled, all stores operations
+ * occur synchronously.
+ *
+ * IC: Instruction Cache -- 0 = off, 1 = on
+ * DC: Data Cache -- 0 = off, 1 = 0n
+ *
+ * These bits enable the on-cpu GNU/Viking split I/D caches.  Note,
+ * as mentioned above, these caches will snoop the bus in GNU/MBUS
+ * configurations even when disabled to avoid data corruption.
+ *
+ * NF: No Fault -- 0 = faults generate traps, 1 = faults don't trap
+ * ME: MMU enable -- 0 = mmu not translating, 1 = mmu translating
+ *
+ */
+
+#define VIKING_MMUENABLE    0x00000001
+#define VIKING_NOFAULT      0x00000002
+#define VIKING_PSO          0x00000080
+#define VIKING_DCENABLE     0x00000100   /* Enable data cache */
+#define VIKING_ICENABLE     0x00000200   /* Enable instruction cache */
+#define VIKING_SBENABLE     0x00000400   /* Enable store buffer */
+#define VIKING_MMODE        0x00000800   /* MBUS mode */
+#define VIKING_PCENABLE     0x00001000   /* Enable parity checking */
+#define VIKING_BMODE        0x00002000   
+#define VIKING_SPENABLE     0x00004000   /* Enable bus cache snooping */
+#define VIKING_ACENABLE     0x00008000   /* Enable alternate caching */
+#define VIKING_TCENABLE     0x00010000   /* Enable table-walks to be cached */
+#define VIKING_DPENABLE     0x00040000   /* Enable the data prefetcher */
+
+/*
+ * GNU/Viking Breakpoint Action Register fields.
+ */
+#define VIKING_ACTION_MIX   0x00001000   /* Enable multiple instructions */
+
+/*
+ * GNU/Viking Cache Tags.
+ */
+#define VIKING_PTAG_VALID   0x01000000   /* Cache block is valid */
+#define VIKING_PTAG_DIRTY   0x00010000   /* Block has been modified */
+#define VIKING_PTAG_SHARED  0x00000100   /* Shared with some other cache */
+
+#ifndef __ASSEMBLY__
+
+static inline void viking_flush_icache(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t"
+			     : /* no outputs */
+			     : "i" (ASI_M_IC_FLCLEAR)
+			     : "memory");
+}
+
+static inline void viking_flush_dcache(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t"
+			     : /* no outputs */
+			     : "i" (ASI_M_DC_FLCLEAR)
+			     : "memory");
+}
+
+static inline void viking_unlock_icache(void)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (0x80000000), "i" (ASI_M_IC_FLCLEAR)
+			     : "memory");
+}
+
+static inline void viking_unlock_dcache(void)
+{
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (0x80000000), "i" (ASI_M_DC_FLCLEAR)
+			     : "memory");
+}
+
+static inline void viking_set_bpreg(unsigned long regval)
+{
+	__asm__ __volatile__("sta %0, [%%g0] %1\n\t"
+			     : /* no outputs */
+			     : "r" (regval), "i" (ASI_M_ACTION)
+			     : "memory");
+}
+
+static inline unsigned long viking_get_bpreg(void)
+{
+	unsigned long regval;
+
+	__asm__ __volatile__("lda [%%g0] %1, %0\n\t"
+			     : "=r" (regval)
+			     : "i" (ASI_M_ACTION));
+	return regval;
+}
+
+static inline void viking_get_dcache_ptag(int set, int block,
+					      unsigned long *data)
+{
+	unsigned long ptag = ((set & 0x7f) << 5) | ((block & 0x3) << 26) |
+			     0x80000000;
+	unsigned long info, page;
+
+	__asm__ __volatile__ ("ldda [%2] %3, %%g2\n\t"
+			      "or %%g0, %%g2, %0\n\t"
+			      "or %%g0, %%g3, %1\n\t"
+			      : "=r" (info), "=r" (page)
+			      : "r" (ptag), "i" (ASI_M_DATAC_TAG)
+			      : "g2", "g3");
+	data[0] = info;
+	data[1] = page;
+}
+
+static inline void viking_mxcc_turn_off_parity(unsigned long *mregp,
+						   unsigned long *mxcc_cregp)
+{
+	unsigned long mreg = *mregp;
+	unsigned long mxcc_creg = *mxcc_cregp;
+
+	mreg &= ~(VIKING_PCENABLE);
+	mxcc_creg &= ~(MXCC_CTL_PARE);
+
+	__asm__ __volatile__ ("set 1f, %%g2\n\t"
+			      "andcc %%g2, 4, %%g0\n\t"
+			      "bne 2f\n\t"
+			      " nop\n"
+			      "1:\n\t"
+			      "sta %0, [%%g0] %3\n\t"
+			      "sta %1, [%2] %4\n\t"
+			      "b 1f\n\t"
+			      " nop\n\t"
+			      "nop\n"
+			      "2:\n\t"
+			      "sta %0, [%%g0] %3\n\t"
+			      "sta %1, [%2] %4\n"
+			      "1:\n\t"
+			      : /* no output */
+			      : "r" (mreg), "r" (mxcc_creg),
+			        "r" (MXCC_CREG), "i" (ASI_M_MMUREGS),
+			        "i" (ASI_M_MXCC)
+			      : "g2", "memory", "cc");
+	*mregp = mreg;
+	*mxcc_cregp = mxcc_creg;
+}
+
+static inline unsigned long viking_hwprobe(unsigned long vaddr)
+{
+	unsigned long val;
+
+	vaddr &= PAGE_MASK;
+	/* Probe all MMU entries. */
+	__asm__ __volatile__("lda [%1] %2, %0\n\t"
+			     : "=r" (val)
+			     : "r" (vaddr | 0x400), "i" (ASI_M_FLUSH_PROBE));
+	if (!val)
+		return 0;
+
+	/* Probe region. */
+	__asm__ __volatile__("lda [%1] %2, %0\n\t"
+			     : "=r" (val)
+			     : "r" (vaddr | 0x200), "i" (ASI_M_FLUSH_PROBE));
+	if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
+		vaddr &= ~SRMMU_PGDIR_MASK;
+		vaddr >>= PAGE_SHIFT;
+		return val | (vaddr << 8);
+	}
+
+	/* Probe segment. */
+	__asm__ __volatile__("lda [%1] %2, %0\n\t"
+			     : "=r" (val)
+			     : "r" (vaddr | 0x100), "i" (ASI_M_FLUSH_PROBE));
+	if ((val & SRMMU_ET_MASK) == SRMMU_ET_PTE) {
+		vaddr &= ~SRMMU_REAL_PMD_MASK;
+		vaddr >>= PAGE_SHIFT;
+		return val | (vaddr << 8);
+	}
+
+	/* Probe page. */
+	__asm__ __volatile__("lda [%1] %2, %0\n\t"
+			     : "=r" (val)
+			     : "r" (vaddr), "i" (ASI_M_FLUSH_PROBE));
+	return val;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !(_SPARC_VIKING_H) */
diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
new file mode 100644
index 0000000..059f0eb
--- /dev/null
+++ b/arch/sparc/include/asm/vio.h
@@ -0,0 +1,515 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_VIO_H
+#define _SPARC64_VIO_H
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+
+#include <asm/ldc.h>
+#include <asm/mdesc.h>
+
+struct vio_msg_tag {
+	u8			type;
+#define VIO_TYPE_CTRL		0x01
+#define VIO_TYPE_DATA		0x02
+#define VIO_TYPE_ERR		0x04
+
+	u8			stype;
+#define VIO_SUBTYPE_INFO	0x01
+#define VIO_SUBTYPE_ACK		0x02
+#define VIO_SUBTYPE_NACK	0x04
+
+	u16			stype_env;
+#define VIO_VER_INFO		0x0001
+#define VIO_ATTR_INFO		0x0002
+#define VIO_DRING_REG		0x0003
+#define VIO_DRING_UNREG		0x0004
+#define VIO_RDX			0x0005
+#define VIO_PKT_DATA		0x0040
+#define VIO_DESC_DATA		0x0041
+#define VIO_DRING_DATA		0x0042
+#define VNET_MCAST_INFO		0x0101
+
+	u32		sid;
+};
+
+struct vio_rdx {
+	struct vio_msg_tag	tag;
+	u64			resv[6];
+};
+
+struct vio_ver_info {
+	struct vio_msg_tag	tag;
+	u16			major;
+	u16			minor;
+	u8			dev_class;
+#define VDEV_NETWORK		0x01
+#define VDEV_NETWORK_SWITCH	0x02
+#define VDEV_DISK		0x03
+#define VDEV_DISK_SERVER	0x04
+#define VDEV_CONSOLE_CON	0x05
+
+	u8			resv1[3];
+	u64			resv2[5];
+};
+
+struct vio_dring_register {
+	struct vio_msg_tag	tag;
+	u64			dring_ident;
+	u32			num_descr;
+	u32			descr_size;
+	u16			options;
+#define VIO_TX_DRING		0x0001
+#define VIO_RX_DRING		0x0002
+#define VIO_RX_DRING_DATA	0x0004
+	u16			resv;
+	u32			num_cookies;
+	struct ldc_trans_cookie	cookies[0];
+};
+
+struct vio_dring_unregister {
+	struct vio_msg_tag	tag;
+	u64			dring_ident;
+	u64			resv[5];
+};
+
+/* Data transfer modes */
+#define VIO_PKT_MODE		0x01 /* Packet based transfer	*/
+#define VIO_DESC_MODE		0x02 /* In-band descriptors	*/
+#define VIO_DRING_MODE		0x03 /* Descriptor rings	*/
+/* in vers >= 1.2, VIO_DRING_MODE is 0x04 and transfer mode is a bitmask */
+#define VIO_NEW_DRING_MODE	0x04
+
+struct vio_dring_data {
+	struct vio_msg_tag	tag;
+	u64			seq;
+	u64			dring_ident;
+	u32			start_idx;
+	u32			end_idx;
+	u8			state;
+#define VIO_DRING_ACTIVE	0x01
+#define VIO_DRING_STOPPED	0x02
+
+	u8			__pad1;
+	u16			__pad2;
+	u32			__pad3;
+	u64			__par4[2];
+};
+
+struct vio_dring_hdr {
+	u8			state;
+#define VIO_DESC_FREE		0x01
+#define VIO_DESC_READY		0x02
+#define VIO_DESC_ACCEPTED	0x03
+#define VIO_DESC_DONE		0x04
+	u8			ack;
+#define VIO_ACK_ENABLE		0x01
+#define VIO_ACK_DISABLE		0x00
+
+	u16			__pad1;
+	u32			__pad2;
+};
+
+/* VIO disk specific structures and defines */
+struct vio_disk_attr_info {
+	struct vio_msg_tag	tag;
+	u8			xfer_mode;
+	u8			vdisk_type;
+#define VD_DISK_TYPE_SLICE	0x01 /* Slice in block device	*/
+#define VD_DISK_TYPE_DISK	0x02 /* Entire block device	*/
+	u8			vdisk_mtype;		/* v1.1 */
+#define VD_MEDIA_TYPE_FIXED	0x01 /* Fixed device */
+#define VD_MEDIA_TYPE_CD	0x02 /* CD Device    */
+#define VD_MEDIA_TYPE_DVD	0x03 /* DVD Device   */
+	u8			resv1;
+	u32			vdisk_block_size;
+	u64			operations;
+	u64			vdisk_size;		/* v1.1 */
+	u64			max_xfer_size;
+	u32			phys_block_size;	/* v1.2 */
+	u32			resv2;
+	u64			resv3[1];
+};
+
+struct vio_disk_desc {
+	struct vio_dring_hdr	hdr;
+	u64			req_id;
+	u8			operation;
+#define VD_OP_BREAD		0x01 /* Block read			*/
+#define VD_OP_BWRITE		0x02 /* Block write			*/
+#define VD_OP_FLUSH		0x03 /* Flush disk contents		*/
+#define VD_OP_GET_WCE		0x04 /* Get write-cache status		*/
+#define VD_OP_SET_WCE		0x05 /* Enable/disable write-cache	*/
+#define VD_OP_GET_VTOC		0x06 /* Get VTOC			*/
+#define VD_OP_SET_VTOC		0x07 /* Set VTOC			*/
+#define VD_OP_GET_DISKGEOM	0x08 /* Get disk geometry		*/
+#define VD_OP_SET_DISKGEOM	0x09 /* Set disk geometry		*/
+#define VD_OP_SCSICMD		0x0a /* SCSI control command		*/
+#define VD_OP_GET_DEVID		0x0b /* Get device ID			*/
+#define VD_OP_GET_EFI		0x0c /* Get EFI				*/
+#define VD_OP_SET_EFI		0x0d /* Set EFI				*/
+	u8			slice;
+	u16			resv1;
+	u32			status;
+	u64			offset;
+	u64			size;
+	u32			ncookies;
+	u32			resv2;
+	struct ldc_trans_cookie	cookies[0];
+};
+
+#define VIO_DISK_VNAME_LEN	8
+#define VIO_DISK_ALABEL_LEN	128
+#define VIO_DISK_NUM_PART	8
+
+struct vio_disk_vtoc {
+	u8			volume_name[VIO_DISK_VNAME_LEN];
+	u16			sector_size;
+	u16			num_partitions;
+	u8			ascii_label[VIO_DISK_ALABEL_LEN];
+	struct {
+		u16		id;
+		u16		perm_flags;
+		u32		resv;
+		u64		start_block;
+		u64		num_blocks;
+	} partitions[VIO_DISK_NUM_PART];
+};
+
+struct vio_disk_geom {
+	u16			num_cyl; /* Num data cylinders		*/
+	u16			alt_cyl; /* Num alternate cylinders	*/
+	u16			beg_cyl; /* Cyl off of fixed head area	*/
+	u16			num_hd;  /* Num heads			*/
+	u16			num_sec; /* Num sectors			*/
+	u16			ifact;   /* Interleave factor		*/
+	u16			apc;     /* Alts per cylinder (SCSI)	*/
+	u16			rpm;	 /* Revolutions per minute	*/
+	u16			phy_cyl; /* Num physical cylinders	*/
+	u16			wr_skip; /* Num sects to skip, writes	*/
+	u16			rd_skip; /* Num sects to skip, writes	*/
+};
+
+struct vio_disk_devid {
+	u16			resv;
+	u16			type;
+	u32			len;
+	char			id[0];
+};
+
+struct vio_disk_efi {
+	u64			lba;
+	u64			len;
+	char			data[0];
+};
+
+/* VIO net specific structures and defines */
+struct vio_net_attr_info {
+	struct vio_msg_tag	tag;
+	u8			xfer_mode;
+	u8			addr_type;
+#define VNET_ADDR_ETHERMAC	0x01
+	u16			ack_freq;
+	u8			plnk_updt;
+#define PHYSLINK_UPDATE_NONE		0x00
+#define PHYSLINK_UPDATE_STATE		0x01
+#define PHYSLINK_UPDATE_STATE_ACK	0x02
+#define PHYSLINK_UPDATE_STATE_NACK	0x03
+	u8			options;
+	u16			resv1;
+	u64			addr;
+	u64			mtu;
+	u16			cflags;
+#define VNET_LSO_IPV4_CAPAB		0x0001
+	u16			ipv4_lso_maxlen;
+	u32			resv2;
+	u64			resv3[2];
+};
+
+#define VNET_NUM_MCAST		7
+
+struct vio_net_mcast_info {
+	struct vio_msg_tag	tag;
+	u8			set;
+	u8			count;
+	u8			mcast_addr[VNET_NUM_MCAST * 6];
+	u32			resv;
+};
+
+struct vio_net_desc {
+	struct vio_dring_hdr	hdr;
+	u32			size;
+	u32			ncookies;
+	struct ldc_trans_cookie	cookies[0];
+};
+
+struct vio_net_dext {
+	u8		flags;
+#define VNET_PKT_HASH			0x01
+#define	VNET_PKT_HCK_IPV4_HDRCKSUM	0x02
+#define	VNET_PKT_HCK_FULLCKSUM		0x04
+#define	VNET_PKT_IPV4_LSO		0x08
+#define	VNET_PKT_HCK_IPV4_HDRCKSUM_OK	0x10
+#define	VNET_PKT_HCK_FULLCKSUM_OK	0x20
+
+	u8		vnet_hashval;
+	u16		ipv4_lso_mss;
+	u32		resv3;
+};
+
+static inline struct vio_net_dext *vio_net_ext(struct vio_net_desc *desc)
+{
+	return (struct vio_net_dext *)&desc->cookies[2];
+}
+
+#define VIO_MAX_RING_COOKIES	24
+
+struct vio_dring_state {
+	u64			ident;
+	void			*base;
+	u64			snd_nxt;
+	u64			rcv_nxt;
+	u32			entry_size;
+	u32			num_entries;
+	u32			prod;
+	u32			cons;
+	u32			pending;
+	int			ncookies;
+	struct ldc_trans_cookie	cookies[VIO_MAX_RING_COOKIES];
+};
+
+#define VIO_TAG_SIZE		((int)sizeof(struct vio_msg_tag))
+#define VIO_VCC_MTU_SIZE	(LDC_PACKET_SIZE - VIO_TAG_SIZE)
+
+struct vio_vcc {
+	struct vio_msg_tag	tag;
+	char			data[VIO_VCC_MTU_SIZE];
+};
+
+static inline void *vio_dring_cur(struct vio_dring_state *dr)
+{
+	return dr->base + (dr->entry_size * dr->prod);
+}
+
+static inline void *vio_dring_entry(struct vio_dring_state *dr,
+				    unsigned int index)
+{
+	return dr->base + (dr->entry_size * index);
+}
+
+static inline u32 vio_dring_avail(struct vio_dring_state *dr,
+				  unsigned int ring_size)
+{
+	return (dr->pending -
+		((dr->prod - dr->cons) & (ring_size - 1)) - 1);
+}
+
+static inline u32 vio_dring_next(struct vio_dring_state *dr, u32 index)
+{
+	if (++index == dr->num_entries)
+		index = 0;
+	return index;
+}
+
+static inline u32 vio_dring_prev(struct vio_dring_state *dr, u32 index)
+{
+	if (index == 0)
+		return dr->num_entries - 1;
+	else
+		return index - 1;
+}
+
+#define VIO_MAX_TYPE_LEN	32
+#define VIO_MAX_NAME_LEN	32
+#define VIO_MAX_COMPAT_LEN	64
+
+struct vio_dev {
+	u64			mp;
+	struct device_node	*dp;
+
+	char			node_name[VIO_MAX_NAME_LEN];
+	char			type[VIO_MAX_TYPE_LEN];
+	char			compat[VIO_MAX_COMPAT_LEN];
+	int			compat_len;
+
+	u64			dev_no;
+
+	unsigned long		port_id;
+	unsigned long		channel_id;
+
+	unsigned int		tx_irq;
+	unsigned int		rx_irq;
+	u64			rx_ino;
+	u64			tx_ino;
+
+	/* Handle to the root of "channel-devices" sub-tree in MDESC */
+	u64			cdev_handle;
+
+	/* MD specific data used to identify the vdev in MD */
+	union md_node_info	md_node_info;
+
+	struct device		dev;
+};
+
+struct vio_driver {
+	const char			*name;
+	struct list_head		node;
+	const struct vio_device_id	*id_table;
+	int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
+	int (*remove)(struct vio_dev *dev);
+	void (*shutdown)(struct vio_dev *dev);
+	unsigned long			driver_data;
+	struct device_driver		driver;
+	bool				no_irq;
+};
+
+struct vio_version {
+	u16		major;
+	u16		minor;
+};
+
+struct vio_driver_state;
+struct vio_driver_ops {
+	int	(*send_attr)(struct vio_driver_state *vio);
+	int	(*handle_attr)(struct vio_driver_state *vio, void *pkt);
+	void	(*handshake_complete)(struct vio_driver_state *vio);
+};
+
+struct vio_completion {
+	struct completion	com;
+	int			err;
+	int			waiting_for;
+};
+
+struct vio_driver_state {
+	/* Protects VIO handshake and, optionally, driver private state.  */
+	spinlock_t		lock;
+
+	struct ldc_channel	*lp;
+
+	u32			_peer_sid;
+	u32			_local_sid;
+	struct vio_dring_state	drings[2];
+#define VIO_DRIVER_TX_RING	0
+#define VIO_DRIVER_RX_RING	1
+
+	u8			hs_state;
+#define VIO_HS_INVALID		0x00
+#define VIO_HS_GOTVERS		0x01
+#define VIO_HS_GOT_ATTR		0x04
+#define VIO_HS_SENT_DREG	0x08
+#define VIO_HS_SENT_RDX		0x10
+#define VIO_HS_GOT_RDX_ACK	0x20
+#define VIO_HS_GOT_RDX		0x40
+#define VIO_HS_SENT_RDX_ACK	0x80
+#define VIO_HS_COMPLETE		(VIO_HS_GOT_RDX_ACK | VIO_HS_SENT_RDX_ACK)
+
+	u8			dev_class;
+
+	u8			dr_state;
+#define VIO_DR_STATE_TXREG	0x01
+#define VIO_DR_STATE_RXREG	0x02
+#define VIO_DR_STATE_TXREQ	0x10
+#define VIO_DR_STATE_RXREQ	0x20
+
+	u8			debug;
+#define VIO_DEBUG_HS		0x01
+#define VIO_DEBUG_DATA		0x02
+
+	void			*desc_buf;
+	unsigned int		desc_buf_len;
+
+	struct vio_completion	*cmp;
+
+	struct vio_dev		*vdev;
+
+	struct timer_list	timer;
+
+	struct vio_version	ver;
+
+	struct vio_version	*ver_table;
+	int			ver_table_entries;
+
+	char			*name;
+
+	struct vio_driver_ops	*ops;
+};
+
+static inline bool vio_version_before(struct vio_driver_state *vio,
+				      u16 major, u16 minor)
+{
+	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
+	u32 want = (u32)major << 16 | minor;
+
+	return have < want;
+}
+
+static inline bool vio_version_after(struct vio_driver_state *vio,
+				      u16 major, u16 minor)
+{
+	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
+	u32 want = (u32)major << 16 | minor;
+
+	return have > want;
+}
+
+static inline bool vio_version_after_eq(struct vio_driver_state *vio,
+					u16 major, u16 minor)
+{
+	u32 have = (u32)vio->ver.major << 16 | vio->ver.minor;
+	u32 want = (u32)major << 16 | minor;
+
+	return have >= want;
+}
+
+#define viodbg(TYPE, f, a...) \
+do {	if (vio->debug & VIO_DEBUG_##TYPE) \
+		printk(KERN_INFO "vio: ID[%lu] " f, \
+		       vio->vdev->channel_id, ## a); \
+} while (0)
+
+int __vio_register_driver(struct vio_driver *drv, struct module *owner,
+				 const char *mod_name);
+/*
+ * vio_register_driver must be a macro so that KBUILD_MODNAME can be expanded
+ */
+#define vio_register_driver(driver)		\
+	__vio_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
+void vio_unregister_driver(struct vio_driver *drv);
+
+static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
+{
+	return container_of(drv, struct vio_driver, driver);
+}
+
+static inline struct vio_dev *to_vio_dev(struct device *dev)
+{
+	return container_of(dev, struct vio_dev, dev);
+}
+
+int vio_ldc_send(struct vio_driver_state *vio, void *data, int len);
+void vio_link_state_change(struct vio_driver_state *vio, int event);
+void vio_conn_reset(struct vio_driver_state *vio);
+int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt);
+int vio_validate_sid(struct vio_driver_state *vio,
+		     struct vio_msg_tag *tp);
+u32 vio_send_sid(struct vio_driver_state *vio);
+int vio_ldc_alloc(struct vio_driver_state *vio,
+		  struct ldc_channel_config *base_cfg, void *event_arg);
+void vio_ldc_free(struct vio_driver_state *vio);
+int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
+		    u8 dev_class, struct vio_version *ver_table,
+		    int ver_table_size, struct vio_driver_ops *ops,
+		    char *name);
+
+void vio_port_up(struct vio_driver_state *vio);
+int vio_set_intr(unsigned long dev_ino, int state);
+u64 vio_vdev_node(struct mdesc_handle *hp, struct vio_dev *vdev);
+
+#endif /* _SPARC64_VIO_H */
diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h
new file mode 100644
index 0000000..7903e84
--- /dev/null
+++ b/arch/sparc/include/asm/visasm.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_VISASM_H
+#define _SPARC64_VISASM_H
+
+/* visasm.h:  FPU saving macros for VIS routines
+ *
+ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+
+/* Clobbers %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
+
+#define VISEntry					\
+	rd		%fprs, %o5;			\
+	andcc		%o5, (FPRS_FEF|FPRS_DU), %g0;	\
+	be,pt		%icc, 297f;			\
+	 sethi		%hi(297f), %g7;			\
+	sethi		%hi(VISenter), %g1;		\
+	jmpl		%g1 + %lo(VISenter), %g0;	\
+	 or		%g7, %lo(297f), %g7;		\
+297:	wr		%g0, FPRS_FEF, %fprs;		\
+
+#define VISExit						\
+	wr		%g0, 0, %fprs;
+
+/* Clobbers %o5, %g1, %g2, %g3, %g7, %icc, %xcc.
+ * Must preserve %o5 between VISEntryHalf and VISExitHalf */
+
+#define VISEntryHalf					\
+	VISEntry
+
+#define VISExitHalf					\
+	VISExit
+
+#define VISEntryHalfFast(fail_label)			\
+	rd		%fprs, %o5;			\
+	andcc		%o5, FPRS_FEF, %g0;		\
+	be,pt		%icc, 297f;			\
+	 nop;						\
+	ba,a,pt		%xcc, fail_label;		\
+297:	wr		%o5, FPRS_FEF, %fprs;
+
+#define VISExitHalfFast					\
+	wr		%o5, 0, %fprs;
+
+#ifndef __ASSEMBLY__
+static inline void save_and_clear_fpu(void) {
+	__asm__ __volatile__ (
+"		rd %%fprs, %%o5\n"
+"		andcc %%o5, %0, %%g0\n"
+"		be,pt %%icc, 299f\n"
+"		 sethi %%hi(298f), %%g7\n"
+"		sethi %%hi(VISenter), %%g1\n"
+"		jmpl %%g1 + %%lo(VISenter), %%g0\n"
+"		 or %%g7, %%lo(298f), %%g7\n"
+"	298:	wr %%g0, 0, %%fprs\n"
+"	299:\n"
+"		" : : "i" (FPRS_FEF|FPRS_DU) :
+		"o5", "g1", "g2", "g3", "g7", "cc");
+}
+
+int vis_emul(struct pt_regs *, unsigned int);
+#endif
+
+#endif /* _SPARC64_ASI_H */
diff --git a/arch/sparc/include/asm/vvar.h b/arch/sparc/include/asm/vvar.h
new file mode 100644
index 0000000..0289503
--- /dev/null
+++ b/arch/sparc/include/asm/vvar.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#ifndef _ASM_SPARC_VVAR_DATA_H
+#define _ASM_SPARC_VVAR_DATA_H
+
+#include <asm/clocksource.h>
+#include <linux/seqlock.h>
+#include <linux/time.h>
+#include <linux/types.h>
+
+struct vvar_data {
+	unsigned int seq;
+
+	int vclock_mode;
+	struct { /* extract of a clocksource struct */
+		u64	cycle_last;
+		u64	mask;
+		int	mult;
+		int	shift;
+	} clock;
+	/* open coded 'struct timespec' */
+	u64		wall_time_sec;
+	u64		wall_time_snsec;
+	u64		monotonic_time_snsec;
+	u64		monotonic_time_sec;
+	u64		monotonic_time_coarse_sec;
+	u64		monotonic_time_coarse_nsec;
+	u64		wall_time_coarse_sec;
+	u64		wall_time_coarse_nsec;
+
+	int		tz_minuteswest;
+	int		tz_dsttime;
+};
+
+extern struct vvar_data *vvar_data;
+extern int vdso_fix_stick;
+
+static inline unsigned int vvar_read_begin(const struct vvar_data *s)
+{
+	unsigned int ret;
+
+repeat:
+	ret = READ_ONCE(s->seq);
+	if (unlikely(ret & 1)) {
+		cpu_relax();
+		goto repeat;
+	}
+	smp_rmb(); /* Finish all reads before we return seq */
+	return ret;
+}
+
+static inline int vvar_read_retry(const struct vvar_data *s,
+					unsigned int start)
+{
+	smp_rmb(); /* Finish all reads before checking the value of seq */
+	return unlikely(s->seq != start);
+}
+
+static inline void vvar_write_begin(struct vvar_data *s)
+{
+	++s->seq;
+	smp_wmb(); /* Makes sure that increment of seq is reflected */
+}
+
+static inline void vvar_write_end(struct vvar_data *s)
+{
+	smp_wmb(); /* Makes the value of seq current before we increment */
+	++s->seq;
+}
+
+
+#endif /* _ASM_SPARC_VVAR_DATA_H */
diff --git a/arch/sparc/include/asm/winmacro.h b/arch/sparc/include/asm/winmacro.h
new file mode 100644
index 0000000..b6e911f
--- /dev/null
+++ b/arch/sparc/include/asm/winmacro.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * winmacro.h: Window loading-unloading macros.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_WINMACRO_H
+#define _SPARC_WINMACRO_H
+
+#include <asm/ptrace.h>
+
+/* Store the register window onto the 8-byte aligned area starting
+ * at %reg.  It might be %sp, it might not, we don't care.
+ */
+#define STORE_WINDOW(reg) \
+	std	%l0, [%reg + RW_L0]; \
+	std	%l2, [%reg + RW_L2]; \
+	std	%l4, [%reg + RW_L4]; \
+	std	%l6, [%reg + RW_L6]; \
+	std	%i0, [%reg + RW_I0]; \
+	std	%i2, [%reg + RW_I2]; \
+	std	%i4, [%reg + RW_I4]; \
+	std	%i6, [%reg + RW_I6];
+
+/* Load a register window from the area beginning at %reg. */
+#define LOAD_WINDOW(reg) \
+	ldd	[%reg + RW_L0], %l0; \
+	ldd	[%reg + RW_L2], %l2; \
+	ldd	[%reg + RW_L4], %l4; \
+	ldd	[%reg + RW_L6], %l6; \
+	ldd	[%reg + RW_I0], %i0; \
+	ldd	[%reg + RW_I2], %i2; \
+	ldd	[%reg + RW_I4], %i4; \
+	ldd	[%reg + RW_I6], %i6;
+
+/* Loading and storing struct pt_reg trap frames. */
+#define LOAD_PT_INS(base_reg) \
+        ldd     [%base_reg + STACKFRAME_SZ + PT_I0], %i0; \
+        ldd     [%base_reg + STACKFRAME_SZ + PT_I2], %i2; \
+        ldd     [%base_reg + STACKFRAME_SZ + PT_I4], %i4; \
+        ldd     [%base_reg + STACKFRAME_SZ + PT_I6], %i6;
+
+#define LOAD_PT_GLOBALS(base_reg) \
+        ld      [%base_reg + STACKFRAME_SZ + PT_G1], %g1; \
+        ldd     [%base_reg + STACKFRAME_SZ + PT_G2], %g2; \
+        ldd     [%base_reg + STACKFRAME_SZ + PT_G4], %g4; \
+        ldd     [%base_reg + STACKFRAME_SZ + PT_G6], %g6;
+
+#define LOAD_PT_YREG(base_reg, scratch) \
+        ld      [%base_reg + STACKFRAME_SZ + PT_Y], %scratch; \
+        wr      %scratch, 0x0, %y;
+
+#define LOAD_PT_PRIV(base_reg, pt_psr, pt_pc, pt_npc) \
+        ld      [%base_reg + STACKFRAME_SZ + PT_PSR], %pt_psr; \
+        ld      [%base_reg + STACKFRAME_SZ + PT_PC], %pt_pc; \
+        ld      [%base_reg + STACKFRAME_SZ + PT_NPC], %pt_npc;
+
+#define LOAD_PT_ALL(base_reg, pt_psr, pt_pc, pt_npc, scratch) \
+        LOAD_PT_YREG(base_reg, scratch) \
+        LOAD_PT_INS(base_reg) \
+        LOAD_PT_GLOBALS(base_reg) \
+        LOAD_PT_PRIV(base_reg, pt_psr, pt_pc, pt_npc)
+
+#define STORE_PT_INS(base_reg) \
+        std     %i0, [%base_reg + STACKFRAME_SZ + PT_I0]; \
+        std     %i2, [%base_reg + STACKFRAME_SZ + PT_I2]; \
+        std     %i4, [%base_reg + STACKFRAME_SZ + PT_I4]; \
+        std     %i6, [%base_reg + STACKFRAME_SZ + PT_I6];
+
+#define STORE_PT_GLOBALS(base_reg) \
+        st      %g1, [%base_reg + STACKFRAME_SZ + PT_G1]; \
+        std     %g2, [%base_reg + STACKFRAME_SZ + PT_G2]; \
+        std     %g4, [%base_reg + STACKFRAME_SZ + PT_G4]; \
+        std     %g6, [%base_reg + STACKFRAME_SZ + PT_G6];
+
+#define STORE_PT_YREG(base_reg, scratch) \
+        rd      %y, %scratch; \
+        st      %scratch, [%base_reg + STACKFRAME_SZ + PT_Y];
+
+#define STORE_PT_PRIV(base_reg, pt_psr, pt_pc, pt_npc) \
+        st      %pt_psr, [%base_reg + STACKFRAME_SZ + PT_PSR]; \
+        st      %pt_pc,  [%base_reg + STACKFRAME_SZ + PT_PC]; \
+        st      %pt_npc, [%base_reg + STACKFRAME_SZ + PT_NPC];
+
+#define STORE_PT_ALL(base_reg, reg_psr, reg_pc, reg_npc, g_scratch) \
+        STORE_PT_PRIV(base_reg, reg_psr, reg_pc, reg_npc) \
+        STORE_PT_GLOBALS(base_reg) \
+        STORE_PT_YREG(base_reg, g_scratch) \
+        STORE_PT_INS(base_reg)
+
+#define SAVE_BOLIXED_USER_STACK(cur_reg, scratch) \
+        ld       [%cur_reg + TI_W_SAVED], %scratch; \
+        sll      %scratch, 2, %scratch; \
+        add      %scratch, %cur_reg, %scratch; \
+        st       %sp, [%scratch + TI_RWIN_SPTRS]; \
+        sub      %scratch, %cur_reg, %scratch; \
+        sll      %scratch, 4, %scratch; \
+        add      %scratch, %cur_reg, %scratch; \
+        STORE_WINDOW(scratch + TI_REG_WINDOW); \
+        sub      %scratch, %cur_reg, %scratch; \
+        srl      %scratch, 6, %scratch; \
+        add      %scratch, 1, %scratch; \
+        st       %scratch, [%cur_reg + TI_W_SAVED];
+
+#ifdef CONFIG_SMP
+#define LOAD_CURRENT(dest_reg, idreg) 			\
+661:	rd	%tbr, %idreg;				\
+	srl	%idreg, 10, %idreg;			\
+	and	%idreg, 0xc, %idreg;			\
+	.section	.cpuid_patch, "ax";		\
+	/* Instruction location. */			\
+	.word		661b;				\
+	/* SUN4D implementation. */			\
+	lda	 [%g0] ASI_M_VIKING_TMP1, %idreg;	\
+	sll	 %idreg, 2, %idreg;			\
+	nop;						\
+	/* LEON implementation. */			\
+	rd 	%asr17, %idreg;				\
+	srl	%idreg, 0x1c, %idreg;			\
+	sll	%idreg, 0x02, %idreg;			\
+	.previous;					\
+	sethi    %hi(current_set), %dest_reg; 		\
+	or       %dest_reg, %lo(current_set), %dest_reg;\
+	ld       [%idreg + %dest_reg], %dest_reg;
+#else
+#define LOAD_CURRENT(dest_reg, idreg) \
+        sethi    %hi(current_set), %idreg; \
+        ld       [%idreg + %lo(current_set)], %dest_reg;
+#endif
+
+#endif /* !(_SPARC_WINMACRO_H) */
diff --git a/arch/sparc/include/asm/xor.h b/arch/sparc/include/asm/xor.h
new file mode 100644
index 0000000..f4c651e
--- /dev/null
+++ b/arch/sparc/include/asm/xor.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___ASM_SPARC_XOR_H
+#define ___ASM_SPARC_XOR_H
+#if defined(__sparc__) && defined(__arch64__)
+#include <asm/xor_64.h>
+#else
+#include <asm/xor_32.h>
+#endif
+#endif
diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h
new file mode 100644
index 0000000..44bfa07
--- /dev/null
+++ b/arch/sparc/include/asm/xor_32.h
@@ -0,0 +1,269 @@
+/*
+ * include/asm/xor.h
+ *
+ * Optimized RAID-5 checksumming functions for 32-bit Sparc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * High speed xor_block operation for RAID4/5 utilizing the
+ * ldd/std SPARC instructions.
+ *
+ * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+static void
+sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	int lines = bytes / (sizeof (long)) / 8;
+
+	do {
+		__asm__ __volatile__(
+		  "ldd [%0 + 0x00], %%g2\n\t"
+		  "ldd [%0 + 0x08], %%g4\n\t"
+		  "ldd [%0 + 0x10], %%o0\n\t"
+		  "ldd [%0 + 0x18], %%o2\n\t"
+		  "ldd [%1 + 0x00], %%o4\n\t"
+		  "ldd [%1 + 0x08], %%l0\n\t"
+		  "ldd [%1 + 0x10], %%l2\n\t"
+		  "ldd [%1 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "std %%g2, [%0 + 0x00]\n\t"
+		  "std %%g4, [%0 + 0x08]\n\t"
+		  "std %%o0, [%0 + 0x10]\n\t"
+		  "std %%o2, [%0 + 0x18]\n"
+		:
+		: "r" (p1), "r" (p2)
+		: "g2", "g3", "g4", "g5",
+		  "o0", "o1", "o2", "o3", "o4", "o5",
+		  "l0", "l1", "l2", "l3", "l4", "l5");
+		p1 += 8;
+		p2 += 8;
+	} while (--lines > 0);
+}
+
+static void
+sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	unsigned long *p3)
+{
+	int lines = bytes / (sizeof (long)) / 8;
+
+	do {
+		__asm__ __volatile__(
+		  "ldd [%0 + 0x00], %%g2\n\t"
+		  "ldd [%0 + 0x08], %%g4\n\t"
+		  "ldd [%0 + 0x10], %%o0\n\t"
+		  "ldd [%0 + 0x18], %%o2\n\t"
+		  "ldd [%1 + 0x00], %%o4\n\t"
+		  "ldd [%1 + 0x08], %%l0\n\t"
+		  "ldd [%1 + 0x10], %%l2\n\t"
+		  "ldd [%1 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "ldd [%2 + 0x00], %%o4\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "ldd [%2 + 0x08], %%l0\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "ldd [%2 + 0x10], %%l2\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "ldd [%2 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "std %%g2, [%0 + 0x00]\n\t"
+		  "std %%g4, [%0 + 0x08]\n\t"
+		  "std %%o0, [%0 + 0x10]\n\t"
+		  "std %%o2, [%0 + 0x18]\n"
+		:
+		: "r" (p1), "r" (p2), "r" (p3)
+		: "g2", "g3", "g4", "g5",
+		  "o0", "o1", "o2", "o3", "o4", "o5",
+		  "l0", "l1", "l2", "l3", "l4", "l5");
+		p1 += 8;
+		p2 += 8;
+		p3 += 8;
+	} while (--lines > 0);
+}
+
+static void
+sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	unsigned long *p3, unsigned long *p4)
+{
+	int lines = bytes / (sizeof (long)) / 8;
+
+	do {
+		__asm__ __volatile__(
+		  "ldd [%0 + 0x00], %%g2\n\t"
+		  "ldd [%0 + 0x08], %%g4\n\t"
+		  "ldd [%0 + 0x10], %%o0\n\t"
+		  "ldd [%0 + 0x18], %%o2\n\t"
+		  "ldd [%1 + 0x00], %%o4\n\t"
+		  "ldd [%1 + 0x08], %%l0\n\t"
+		  "ldd [%1 + 0x10], %%l2\n\t"
+		  "ldd [%1 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "ldd [%2 + 0x00], %%o4\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "ldd [%2 + 0x08], %%l0\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "ldd [%2 + 0x10], %%l2\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "ldd [%2 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "ldd [%3 + 0x00], %%o4\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "ldd [%3 + 0x08], %%l0\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "ldd [%3 + 0x10], %%l2\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "ldd [%3 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "std %%g2, [%0 + 0x00]\n\t"
+		  "std %%g4, [%0 + 0x08]\n\t"
+		  "std %%o0, [%0 + 0x10]\n\t"
+		  "std %%o2, [%0 + 0x18]\n"
+		:
+		: "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+		: "g2", "g3", "g4", "g5",
+		  "o0", "o1", "o2", "o3", "o4", "o5",
+		  "l0", "l1", "l2", "l3", "l4", "l5");
+		p1 += 8;
+		p2 += 8;
+		p3 += 8;
+		p4 += 8;
+	} while (--lines > 0);
+}
+
+static void
+sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+	unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	int lines = bytes / (sizeof (long)) / 8;
+
+	do {
+		__asm__ __volatile__(
+		  "ldd [%0 + 0x00], %%g2\n\t"
+		  "ldd [%0 + 0x08], %%g4\n\t"
+		  "ldd [%0 + 0x10], %%o0\n\t"
+		  "ldd [%0 + 0x18], %%o2\n\t"
+		  "ldd [%1 + 0x00], %%o4\n\t"
+		  "ldd [%1 + 0x08], %%l0\n\t"
+		  "ldd [%1 + 0x10], %%l2\n\t"
+		  "ldd [%1 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "ldd [%2 + 0x00], %%o4\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "ldd [%2 + 0x08], %%l0\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "ldd [%2 + 0x10], %%l2\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "ldd [%2 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "ldd [%3 + 0x00], %%o4\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "ldd [%3 + 0x08], %%l0\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "ldd [%3 + 0x10], %%l2\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "ldd [%3 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "ldd [%4 + 0x00], %%o4\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "ldd [%4 + 0x08], %%l0\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "ldd [%4 + 0x10], %%l2\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "ldd [%4 + 0x18], %%l4\n\t"
+		  "xor %%g2, %%o4, %%g2\n\t"
+		  "xor %%g3, %%o5, %%g3\n\t"
+		  "xor %%g4, %%l0, %%g4\n\t"
+		  "xor %%g5, %%l1, %%g5\n\t"
+		  "xor %%o0, %%l2, %%o0\n\t"
+		  "xor %%o1, %%l3, %%o1\n\t"
+		  "xor %%o2, %%l4, %%o2\n\t"
+		  "xor %%o3, %%l5, %%o3\n\t"
+		  "std %%g2, [%0 + 0x00]\n\t"
+		  "std %%g4, [%0 + 0x08]\n\t"
+		  "std %%o0, [%0 + 0x10]\n\t"
+		  "std %%o2, [%0 + 0x18]\n"
+		:
+		: "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+		: "g2", "g3", "g4", "g5",
+		  "o0", "o1", "o2", "o3", "o4", "o5",
+		  "l0", "l1", "l2", "l3", "l4", "l5");
+		p1 += 8;
+		p2 += 8;
+		p3 += 8;
+		p4 += 8;
+		p5 += 8;
+	} while (--lines > 0);
+}
+
+static struct xor_block_template xor_block_SPARC = {
+	.name	= "SPARC",
+	.do_2	= sparc_2,
+	.do_3	= sparc_3,
+	.do_4	= sparc_4,
+	.do_5	= sparc_5,
+};
+
+/* For grins, also test the generic routines.  */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+	do {						\
+		xor_speed(&xor_block_8regs);		\
+		xor_speed(&xor_block_32regs);		\
+		xor_speed(&xor_block_SPARC);		\
+	} while (0)
diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h
new file mode 100644
index 0000000..50c8828
--- /dev/null
+++ b/arch/sparc/include/asm/xor_64.h
@@ -0,0 +1,73 @@
+/*
+ * include/asm/xor.h
+ *
+ * High speed xor_block operation for RAID4/5 utilizing the
+ * UltraSparc Visual Instruction Set and Niagara block-init
+ * twin-load instructions.
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <asm/spitfire.h>
+
+void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
+void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
+	       unsigned long *);
+void xor_vis_4(unsigned long, unsigned long *, unsigned long *,
+	       unsigned long *, unsigned long *);
+void xor_vis_5(unsigned long, unsigned long *, unsigned long *,
+	       unsigned long *, unsigned long *, unsigned long *);
+
+/* XXX Ugh, write cheetah versions... -DaveM */
+
+static struct xor_block_template xor_block_VIS = {
+        .name	= "VIS",
+        .do_2	= xor_vis_2,
+        .do_3	= xor_vis_3,
+        .do_4	= xor_vis_4,
+        .do_5	= xor_vis_5,
+};
+
+void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
+void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
+		   unsigned long *);
+void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
+		   unsigned long *, unsigned long *);
+void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
+		   unsigned long *, unsigned long *, unsigned long *);
+
+static struct xor_block_template xor_block_niagara = {
+        .name	= "Niagara",
+        .do_2	= xor_niagara_2,
+        .do_3	= xor_niagara_3,
+        .do_4	= xor_niagara_4,
+        .do_5	= xor_niagara_5,
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES				\
+	do {						\
+		xor_speed(&xor_block_VIS);		\
+		xor_speed(&xor_block_niagara);		\
+	} while (0)
+
+/* For VIS for everything except Niagara.  */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+	((tlb_type == hypervisor && \
+	  (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \
+	   sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \
+	   sun4v_chip_type == SUN4V_CHIP_NIAGARA3 || \
+	   sun4v_chip_type == SUN4V_CHIP_NIAGARA4 || \
+	   sun4v_chip_type == SUN4V_CHIP_NIAGARA5)) ? \
+	 &xor_block_niagara : \
+	 &xor_block_VIS)
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
new file mode 100644
index 0000000..4680ba2
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -0,0 +1,5 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+generic-y += bpf_perf_event.h
+generic-y += types.h
diff --git a/arch/sparc/include/uapi/asm/apc.h b/arch/sparc/include/uapi/asm/apc.h
new file mode 100644
index 0000000..aeb369b
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/apc.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* apc - Driver definitions for power management functions
+ * of Aurora Personality Chip (APC) on SPARCstation-4/5 and 
+ * derivatives
+ *
+ * Copyright (c) 2001 Eric Brower (ebrower@usa.net)
+ *
+ */
+
+#ifndef _SPARC_APC_H
+#define _SPARC_APC_H
+
+#include <linux/ioctl.h>
+
+#define APC_IOC	'A'
+
+#define APCIOCGFANCTL _IOR(APC_IOC, 0x00, int)	/* Get fan speed	*/
+#define APCIOCSFANCTL _IOW(APC_IOC, 0x01, int)	/* Set fan speed	*/
+
+#define APCIOCGCPWR   _IOR(APC_IOC, 0x02, int)	/* Get CPOWER state	*/
+#define APCIOCSCPWR   _IOW(APC_IOC, 0x03, int)	/* Set CPOWER state	*/
+
+#define APCIOCGBPORT   _IOR(APC_IOC, 0x04, int)	/* Get BPORT state 	*/
+#define APCIOCSBPORT   _IOW(APC_IOC, 0x05, int)	/* Set BPORT state	*/
+
+/*
+ * Register offsets
+ */
+#define APC_IDLE_REG	0x00
+#define APC_FANCTL_REG	0x20
+#define APC_CPOWER_REG	0x24
+#define APC_BPORT_REG	0x30
+
+#define APC_REGMASK		0x01
+#define APC_BPMASK		0x03
+
+/*
+ * IDLE - CPU standby values (set to initiate standby)
+ */
+#define APC_IDLE_ON		0x01
+
+/*
+ * FANCTL - Fan speed control state values
+ */
+#define APC_FANCTL_HI	0x00	/* Fan speed high	*/
+#define APC_FANCTL_LO	0x01	/* Fan speed low	*/
+
+/*
+ * CPWR - Convenience power outlet state values 
+ */
+#define APC_CPOWER_ON	0x00	/* Conv power on	*/
+#define APC_CPOWER_OFF	0x01	/* Conv power off	*/
+
+/*
+ * BPA/BPB - Read-Write "Bit Ports" state values (reset to 0 at power-on)
+ *
+ * WARNING: Internal usage of bit ports is platform dependent--
+ * don't modify BPORT settings unless you know what you are doing.
+ * 
+ * On SS5 BPA seems to toggle onboard ethernet loopback... -E
+ */
+#define APC_BPORT_A		0x01	/* Bit Port A		*/
+#define APC_BPORT_B		0x02	/* Bit Port B		*/
+
+#endif /* !(_SPARC_APC_H) */
diff --git a/arch/sparc/include/uapi/asm/asi.h b/arch/sparc/include/uapi/asm/asi.h
new file mode 100644
index 0000000..fbb30a5
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/asi.h
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_ASI_H
+#define _SPARC_ASI_H
+
+/* asi.h:  Address Space Identifier values for the sparc.
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * Pioneer work for sun4m: Paul Hatchman (paul@sfe.com.au)
+ * Joint edition for sun4c+sun4m: Pete A. Zaitcev <zaitcev@ipmce.su>
+ */
+
+/* The first batch are for the sun4c. */
+
+#define ASI_NULL1           0x00
+#define ASI_NULL2           0x01
+
+/* sun4c and sun4 control registers and mmu/vac ops */
+#define ASI_CONTROL         0x02
+#define ASI_SEGMAP          0x03
+#define ASI_PTE             0x04
+#define ASI_HWFLUSHSEG      0x05
+#define ASI_HWFLUSHPAGE     0x06
+#define ASI_REGMAP          0x06
+#define ASI_HWFLUSHCONTEXT  0x07
+
+#define ASI_USERTXT         0x08
+#define ASI_KERNELTXT       0x09
+#define ASI_USERDATA        0x0a
+#define ASI_KERNELDATA      0x0b
+
+/* VAC Cache flushing on sun4c and sun4 */
+#define ASI_FLUSHSEG        0x0c
+#define ASI_FLUSHPG         0x0d
+#define ASI_FLUSHCTX        0x0e
+
+/* SPARCstation-5: only 6 bits are decoded. */
+/* wo = Write Only, rw = Read Write;        */
+/* ss = Single Size, as = All Sizes;        */
+#define ASI_M_RES00         0x00   /* Don't touch... */
+#define ASI_M_UNA01         0x01   /* Same here... */
+#define ASI_M_MXCC          0x02   /* Access to TI VIKING MXCC registers */
+#define ASI_M_FLUSH_PROBE   0x03   /* Reference MMU Flush/Probe; rw, ss */
+#define ASI_M_MMUREGS       0x04   /* MMU Registers; rw, ss */
+#define ASI_M_TLBDIAG       0x05   /* MMU TLB only Diagnostics */
+#define ASI_M_DIAGS         0x06   /* Reference MMU Diagnostics */
+#define ASI_M_IODIAG        0x07   /* MMU I/O TLB only Diagnostics */
+#define ASI_M_USERTXT       0x08   /* Same as ASI_USERTXT; rw, as */
+#define ASI_M_KERNELTXT     0x09   /* Same as ASI_KERNELTXT; rw, as */
+#define ASI_M_USERDATA      0x0A   /* Same as ASI_USERDATA; rw, as */
+#define ASI_M_KERNELDATA    0x0B   /* Same as ASI_KERNELDATA; rw, as */
+#define ASI_M_TXTC_TAG      0x0C   /* Instruction Cache Tag; rw, ss */
+#define ASI_M_TXTC_DATA     0x0D   /* Instruction Cache Data; rw, ss */
+#define ASI_M_DATAC_TAG     0x0E   /* Data Cache Tag; rw, ss */
+#define ASI_M_DATAC_DATA    0x0F   /* Data Cache Data; rw, ss */
+
+/* The following cache flushing ASIs work only with the 'sta'
+ * instruction. Results are unpredictable for 'swap' and 'ldstuba',
+ * so don't do it.
+ */
+
+/* These ASI flushes affect external caches too. */
+#define ASI_M_FLUSH_PAGE    0x10   /* Flush I&D Cache Line (page); wo, ss */
+#define ASI_M_FLUSH_SEG     0x11   /* Flush I&D Cache Line (seg); wo, ss */
+#define ASI_M_FLUSH_REGION  0x12   /* Flush I&D Cache Line (region); wo, ss */
+#define ASI_M_FLUSH_CTX     0x13   /* Flush I&D Cache Line (context); wo, ss */
+#define ASI_M_FLUSH_USER    0x14   /* Flush I&D Cache Line (user); wo, ss */
+
+/* Block-copy operations are available only on certain V8 cpus. */
+#define ASI_M_BCOPY         0x17   /* Block copy */
+
+/* These affect only the ICACHE and are Ross HyperSparc and TurboSparc specific. */
+#define ASI_M_IFLUSH_PAGE   0x18   /* Flush I Cache Line (page); wo, ss */
+#define ASI_M_IFLUSH_SEG    0x19   /* Flush I Cache Line (seg); wo, ss */
+#define ASI_M_IFLUSH_REGION 0x1A   /* Flush I Cache Line (region); wo, ss */
+#define ASI_M_IFLUSH_CTX    0x1B   /* Flush I Cache Line (context); wo, ss */
+#define ASI_M_IFLUSH_USER   0x1C   /* Flush I Cache Line (user); wo, ss */
+
+/* Block-fill operations are available on certain V8 cpus */
+#define ASI_M_BFILL         0x1F
+
+/* This allows direct access to main memory, actually 0x20 to 0x2f are
+ * the available ASI's for physical ram pass-through, but I don't have
+ * any idea what the other ones do....
+ */
+
+#define ASI_M_BYPASS       0x20   /* Reference MMU bypass; rw, as */
+#define ASI_M_FBMEM        0x29   /* Graphics card frame buffer access */
+#define ASI_M_VMEUS        0x2A   /* VME user 16-bit access */
+#define ASI_M_VMEPS        0x2B   /* VME priv 16-bit access */
+#define ASI_M_VMEUT        0x2C   /* VME user 32-bit access */
+#define ASI_M_VMEPT        0x2D   /* VME priv 32-bit access */
+#define ASI_M_SBUS         0x2E   /* Direct SBus access */
+#define ASI_M_CTL          0x2F   /* Control Space (ECC and MXCC are here) */
+
+
+/* This is ROSS HyperSparc only. */
+#define ASI_M_FLUSH_IWHOLE 0x31   /* Flush entire ICACHE; wo, ss */
+
+/* Tsunami/Viking/TurboSparc i/d cache flash clear. */
+#define ASI_M_IC_FLCLEAR   0x36
+#define ASI_M_DC_FLCLEAR   0x37
+
+#define ASI_M_DCDR         0x39   /* Data Cache Diagnostics Register rw, ss */
+
+#define ASI_M_VIKING_TMP1  0x40	  /* Emulation temporary 1 on Viking */
+/* only available on SuperSparc I */
+/* #define ASI_M_VIKING_TMP2  0x41 */  /* Emulation temporary 2 on Viking */
+
+#define ASI_M_ACTION       0x4c   /* Breakpoint Action Register (GNU/Viking) */
+
+/* LEON ASI */
+#define ASI_LEON_NOCACHE        0x01
+
+#define ASI_LEON_DCACHE_MISS    0x01
+
+#define ASI_LEON_CACHEREGS      0x02
+#define ASI_LEON_IFLUSH         0x10
+#define ASI_LEON_DFLUSH         0x11
+
+#define ASI_LEON_MMUFLUSH       0x18
+#define ASI_LEON_MMUREGS        0x19
+#define ASI_LEON_BYPASS         0x1c
+#define ASI_LEON_FLUSH_PAGE     0x10
+
+/* V9 Architecture mandary ASIs. */
+#define ASI_N			0x04 /* Nucleus				*/
+#define ASI_NL			0x0c /* Nucleus, little endian		*/
+#define ASI_AIUP		0x10 /* Primary, user			*/
+#define ASI_AIUS		0x11 /* Secondary, user			*/
+#define ASI_AIUPL		0x18 /* Primary, user, little endian	*/
+#define ASI_AIUSL		0x19 /* Secondary, user, little endian	*/
+#define ASI_P			0x80 /* Primary, implicit		*/
+#define ASI_S			0x81 /* Secondary, implicit		*/
+#define ASI_PNF			0x82 /* Primary, no fault		*/
+#define ASI_SNF			0x83 /* Secondary, no fault		*/
+#define ASI_PL			0x88 /* Primary, implicit, l-endian	*/
+#define ASI_SL			0x89 /* Secondary, implicit, l-endian	*/
+#define ASI_PNFL		0x8a /* Primary, no fault, l-endian	*/
+#define ASI_SNFL		0x8b /* Secondary, no fault, l-endian	*/
+
+/* SpitFire and later extended ASIs.  The "(III)" marker designates
+ * UltraSparc-III and later specific ASIs.  The "(CMT)" marker designates
+ * Chip Multi Threading specific ASIs.  "(NG)" designates Niagara specific
+ * ASIs, "(4V)" designates SUN4V specific ASIs.  "(NG4)" designates SPARC-T4
+ * and later ASIs.
+ */
+#define ASI_MCD_PRIV_PRIMARY	0x02 /* (NG7) Privileged MCD version VA	*/
+#define ASI_MCD_REAL		0x05 /* (NG7) Privileged MCD version PA	*/
+#define ASI_PHYS_USE_EC		0x14 /* PADDR, E-cachable		*/
+#define ASI_PHYS_BYPASS_EC_E	0x15 /* PADDR, E-bit			*/
+#define ASI_BLK_AIUP_4V		0x16 /* (4V) Prim, user, block ld/st	*/
+#define ASI_BLK_AIUS_4V		0x17 /* (4V) Sec, user, block ld/st	*/
+#define ASI_PHYS_USE_EC_L	0x1c /* PADDR, E-cachable, little endian*/
+#define ASI_PHYS_BYPASS_EC_E_L	0x1d /* PADDR, E-bit, little endian	*/
+#define ASI_BLK_AIUP_L_4V	0x1e /* (4V) Prim, user, block, l-endian*/
+#define ASI_BLK_AIUS_L_4V	0x1f /* (4V) Sec, user, block, l-endian	*/
+#define ASI_SCRATCHPAD		0x20 /* (4V) Scratch Pad Registers	*/
+#define ASI_MMU			0x21 /* (4V) MMU Context Registers	*/
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 /* (NG) init-store, twin load,
+					 * secondary, user
+					 */
+#define ASI_NUCLEUS_QUAD_LDD	0x24 /* Cachable, qword load		*/
+#define ASI_QUEUE		0x25 /* (4V) Interrupt Queue Registers	*/
+#define ASI_QUAD_LDD_PHYS_4V	0x26 /* (4V) Physical, qword load	*/
+#define ASI_NUCLEUS_QUAD_LDD_L	0x2c /* Cachable, qword load, l-endian 	*/
+#define ASI_QUAD_LDD_PHYS_L_4V	0x2e /* (4V) Phys, qword load, l-endian	*/
+#define ASI_PCACHE_DATA_STATUS	0x30 /* (III) PCache data stat RAM diag	*/
+#define ASI_PCACHE_DATA		0x31 /* (III) PCache data RAM diag	*/
+#define ASI_PCACHE_TAG		0x32 /* (III) PCache tag RAM diag	*/
+#define ASI_PCACHE_SNOOP_TAG	0x33 /* (III) PCache snoop tag RAM diag	*/
+#define ASI_QUAD_LDD_PHYS	0x34 /* (III+) PADDR, qword load	*/
+#define ASI_WCACHE_VALID_BITS	0x38 /* (III) WCache Valid Bits diag	*/
+#define ASI_WCACHE_DATA		0x39 /* (III) WCache data RAM diag	*/
+#define ASI_WCACHE_TAG		0x3a /* (III) WCache tag RAM diag	*/
+#define ASI_WCACHE_SNOOP_TAG	0x3b /* (III) WCache snoop tag RAM diag	*/
+#define ASI_QUAD_LDD_PHYS_L	0x3c /* (III+) PADDR, qw-load, l-endian	*/
+#define ASI_SRAM_FAST_INIT	0x40 /* (III+) Fast SRAM init		*/
+#define ASI_CORE_AVAILABLE	0x41 /* (CMT) LP Available		*/
+#define ASI_CORE_ENABLE_STAT	0x41 /* (CMT) LP Enable Status		*/
+#define ASI_CORE_ENABLE		0x41 /* (CMT) LP Enable RW		*/
+#define ASI_XIR_STEERING	0x41 /* (CMT) XIR Steering RW		*/
+#define ASI_CORE_RUNNING_RW	0x41 /* (CMT) LP Running RW		*/
+#define ASI_CORE_RUNNING_W1S	0x41 /* (CMT) LP Running Write-One Set	*/
+#define ASI_CORE_RUNNING_W1C	0x41 /* (CMT) LP Running Write-One Clr	*/
+#define ASI_CORE_RUNNING_STAT	0x41 /* (CMT) LP Running Status		*/
+#define ASI_CMT_ERROR_STEERING	0x41 /* (CMT) Error Steering RW		*/
+#define ASI_DCACHE_INVALIDATE	0x42 /* (III) DCache Invalidate diag	*/
+#define ASI_DCACHE_UTAG		0x43 /* (III) DCache uTag diag		*/
+#define ASI_DCACHE_SNOOP_TAG	0x44 /* (III) DCache snoop tag RAM diag	*/
+#define ASI_LSU_CONTROL		0x45 /* Load-store control unit		*/
+#define ASI_DCU_CONTROL_REG	0x45 /* (III) DCache Unit Control reg	*/
+#define ASI_DCACHE_DATA		0x46 /* DCache data-ram diag access	*/
+#define ASI_DCACHE_TAG		0x47 /* Dcache tag/valid ram diag access*/
+#define ASI_INTR_DISPATCH_STAT	0x48 /* IRQ vector dispatch status	*/
+#define ASI_INTR_RECEIVE	0x49 /* IRQ vector receive status	*/
+#define ASI_UPA_CONFIG		0x4a /* UPA config space		*/
+#define ASI_JBUS_CONFIG		0x4a /* (IIIi) JBUS Config Register	*/
+#define ASI_SAFARI_CONFIG	0x4a /* (III) Safari Config Register	*/
+#define ASI_SAFARI_ADDRESS	0x4a /* (III) Safari Address Register	*/
+#define ASI_ESTATE_ERROR_EN	0x4b /* E-cache error enable space	*/
+#define ASI_AFSR		0x4c /* Async fault status register	*/
+#define ASI_AFAR		0x4d /* Async fault address register	*/
+#define ASI_EC_TAG_DATA		0x4e /* E-cache tag/valid ram diag acc	*/
+#define ASI_IMMU		0x50 /* Insn-MMU main register space	*/
+#define ASI_IMMU_TSB_8KB_PTR	0x51 /* Insn-MMU 8KB TSB pointer reg	*/
+#define ASI_IMMU_TSB_64KB_PTR	0x52 /* Insn-MMU 64KB TSB pointer reg	*/
+#define ASI_ITLB_DATA_IN	0x54 /* Insn-MMU TLB data in reg	*/
+#define ASI_ITLB_DATA_ACCESS	0x55 /* Insn-MMU TLB data access reg	*/
+#define ASI_ITLB_TAG_READ	0x56 /* Insn-MMU TLB tag read reg	*/
+#define ASI_IMMU_DEMAP		0x57 /* Insn-MMU TLB demap		*/
+#define ASI_DMMU		0x58 /* Data-MMU main register space	*/
+#define ASI_DMMU_TSB_8KB_PTR	0x59 /* Data-MMU 8KB TSB pointer reg	*/
+#define ASI_DMMU_TSB_64KB_PTR	0x5a /* Data-MMU 16KB TSB pointer reg	*/
+#define ASI_DMMU_TSB_DIRECT_PTR	0x5b /* Data-MMU TSB direct pointer reg	*/
+#define ASI_DTLB_DATA_IN	0x5c /* Data-MMU TLB data in reg	*/
+#define ASI_DTLB_DATA_ACCESS	0x5d /* Data-MMU TLB data access reg	*/
+#define ASI_DTLB_TAG_READ	0x5e /* Data-MMU TLB tag read reg	*/
+#define ASI_DMMU_DEMAP		0x5f /* Data-MMU TLB demap		*/
+#define ASI_IIU_INST_TRAP	0x60 /* (III) Instruction Breakpoint	*/
+#define ASI_INTR_ID		0x63 /* (CMT) Interrupt ID register	*/
+#define ASI_CORE_ID		0x63 /* (CMT) LP ID register		*/
+#define ASI_CESR_ID		0x63 /* (CMT) CESR ID register		*/
+#define ASI_IC_INSTR		0x66 /* Insn cache instrucion ram diag	*/
+#define ASI_IC_TAG		0x67 /* Insn cache tag/valid ram diag 	*/
+#define ASI_IC_STAG		0x68 /* (III) Insn cache snoop tag ram	*/
+#define ASI_IC_PRE_DECODE	0x6e /* Insn cache pre-decode ram diag	*/
+#define ASI_IC_NEXT_FIELD	0x6f /* Insn cache next-field ram diag	*/
+#define ASI_BRPRED_ARRAY	0x6f /* (III) Branch Prediction RAM diag*/
+#define ASI_BLK_AIUP		0x70 /* Primary, user, block load/store	*/
+#define ASI_BLK_AIUS		0x71 /* Secondary, user, block ld/st	*/
+#define ASI_MCU_CTRL_REG	0x72 /* (III) Memory controller regs	*/
+#define ASI_EC_DATA		0x74 /* (III) E-cache data staging reg	*/
+#define ASI_EC_CTRL		0x75 /* (III) E-cache control reg	*/
+#define ASI_EC_W		0x76 /* E-cache diag write access	*/
+#define ASI_UDB_ERROR_W		0x77 /* External UDB error regs W	*/
+#define ASI_UDB_CONTROL_W	0x77 /* External UDB control regs W	*/
+#define ASI_INTR_W		0x77 /* IRQ vector dispatch write	*/
+#define ASI_INTR_DATAN_W	0x77 /* (III) Out irq vector data reg N	*/
+#define ASI_INTR_DISPATCH_W	0x77 /* (III) Interrupt vector dispatch	*/
+#define ASI_BLK_AIUPL		0x78 /* Primary, user, little, blk ld/st*/
+#define ASI_BLK_AIUSL		0x79 /* Secondary, user, little, blk ld/st*/
+#define ASI_EC_R		0x7e /* E-cache diag read access	*/
+#define ASI_UDBH_ERROR_R	0x7f /* External UDB error regs rd hi	*/
+#define ASI_UDBL_ERROR_R	0x7f /* External UDB error regs rd low	*/
+#define ASI_UDBH_CONTROL_R	0x7f /* External UDB control regs rd hi	*/
+#define ASI_UDBL_CONTROL_R	0x7f /* External UDB control regs rd low*/
+#define ASI_INTR_R		0x7f /* IRQ vector dispatch read	*/
+#define ASI_INTR_DATAN_R	0x7f /* (III) In irq vector data reg N	*/
+#define ASI_MCD_PRIMARY		0x90 /* (NG7) MCD version load/store	*/
+#define ASI_MCD_ST_BLKINIT_PRIMARY	\
+				0x92 /* (NG7) MCD store BLKINIT primary	*/
+#define ASI_PIC			0xb0 /* (NG4) PIC registers		*/
+#define ASI_PST8_P		0xc0 /* Primary, 8 8-bit, partial	*/
+#define ASI_PST8_S		0xc1 /* Secondary, 8 8-bit, partial	*/
+#define ASI_PST16_P		0xc2 /* Primary, 4 16-bit, partial	*/
+#define ASI_PST16_S		0xc3 /* Secondary, 4 16-bit, partial	*/
+#define ASI_PST32_P		0xc4 /* Primary, 2 32-bit, partial	*/
+#define ASI_PST32_S		0xc5 /* Secondary, 2 32-bit, partial	*/
+#define ASI_PST8_PL		0xc8 /* Primary, 8 8-bit, partial, L	*/
+#define ASI_PST8_SL		0xc9 /* Secondary, 8 8-bit, partial, L	*/
+#define ASI_PST16_PL		0xca /* Primary, 4 16-bit, partial, L	*/
+#define ASI_PST16_SL		0xcb /* Secondary, 4 16-bit, partial, L	*/
+#define ASI_PST32_PL		0xcc /* Primary, 2 32-bit, partial, L	*/
+#define ASI_PST32_SL		0xcd /* Secondary, 2 32-bit, partial, L	*/
+#define ASI_FL8_P		0xd0 /* Primary, 1 8-bit, fpu ld/st	*/
+#define ASI_FL8_S		0xd1 /* Secondary, 1 8-bit, fpu ld/st	*/
+#define ASI_FL16_P		0xd2 /* Primary, 1 16-bit, fpu ld/st	*/
+#define ASI_FL16_S		0xd3 /* Secondary, 1 16-bit, fpu ld/st	*/
+#define ASI_FL8_PL		0xd8 /* Primary, 1 8-bit, fpu ld/st, L	*/
+#define ASI_FL8_SL		0xd9 /* Secondary, 1 8-bit, fpu ld/st, L*/
+#define ASI_FL16_PL		0xda /* Primary, 1 16-bit, fpu ld/st, L	*/
+#define ASI_FL16_SL		0xdb /* Secondary, 1 16-bit, fpu ld/st,L*/
+#define ASI_BLK_COMMIT_P	0xe0 /* Primary, blk store commit	*/
+#define ASI_BLK_COMMIT_S	0xe1 /* Secondary, blk store commit	*/
+#define ASI_BLK_INIT_QUAD_LDD_P	0xe2 /* (NG) init-store, twin load,
+				      * primary, implicit
+				      */
+#define ASI_BLK_INIT_QUAD_LDD_S	0xe3 /* (NG) init-store, twin load,
+				      * secondary, implicit
+				      */
+#define ASI_BLK_P		0xf0 /* Primary, blk ld/st		*/
+#define ASI_BLK_S		0xf1 /* Secondary, blk ld/st		*/
+#define ASI_ST_BLKINIT_MRU_P	0xf2 /* (NG4) init-store, twin load,
+				      * Most-Recently-Used, primary,
+				      * implicit
+				      */
+#define ASI_ST_BLKINIT_MRU_S	0xf3 /* (NG4) init-store, twin load,
+				      * Most-Recently-Used, secondary,
+				      * implicit
+				      */
+#define ASI_BLK_PL		0xf8 /* Primary, blk ld/st, little	*/
+#define ASI_BLK_SL		0xf9 /* Secondary, blk ld/st, little	*/
+#define ASI_ST_BLKINIT_MRU_PL	0xfa /* (NG4) init-store, twin load,
+				      * Most-Recently-Used, primary,
+				      * implicit, little-endian
+				      */
+#define ASI_ST_BLKINIT_MRU_SL	0xfb /* (NG4) init-store, twin load,
+				      * Most-Recently-Used, secondary,
+				      * implicit, little-endian
+				      */
+
+#endif /* _SPARC_ASI_H */
diff --git a/arch/sparc/include/uapi/asm/auxvec.h b/arch/sparc/include/uapi/asm/auxvec.h
new file mode 100644
index 0000000..ab8780f
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/auxvec.h
@@ -0,0 +1,15 @@
+#ifndef __ASMSPARC_AUXVEC_H
+#define __ASMSPARC_AUXVEC_H
+
+#define AT_SYSINFO_EHDR		33
+
+/* Avoid overlap with other AT_* values since they are consolidated in
+ * glibc and any overlaps can cause problems
+ */
+#define AT_ADI_BLKSZ	48
+#define AT_ADI_NBITS	49
+#define AT_ADI_UEONADI	50
+
+#define AT_VECTOR_SIZE_ARCH	4
+
+#endif /* !(__ASMSPARC_AUXVEC_H) */
diff --git a/arch/sparc/include/uapi/asm/bitsperlong.h b/arch/sparc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 0000000..cd9a432
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#if defined(__sparc__) && defined(__arch64__)
+#define __BITS_PER_LONG 64
+#else
+#define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
+
diff --git a/arch/sparc/include/uapi/asm/byteorder.h b/arch/sparc/include/uapi/asm/byteorder.h
new file mode 100644
index 0000000..216b8e5
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/byteorder.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_BYTEORDER_H
+#define _SPARC_BYTEORDER_H
+
+#include <linux/byteorder/big_endian.h>
+
+#endif /* _SPARC_BYTEORDER_H */
diff --git a/arch/sparc/include/uapi/asm/display7seg.h b/arch/sparc/include/uapi/asm/display7seg.h
new file mode 100644
index 0000000..7e9fef0
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/display7seg.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * display7seg - Driver interface for the 7-segment display
+ * present on Sun Microsystems CP1400 and CP1500
+ *
+ * Copyright (c) 2000 Eric Brower <ebrower@usa.net>
+ *
+ */
+
+#ifndef __display7seg_h__
+#define __display7seg_h__
+
+#define D7S_IOC	'p'
+
+#define D7SIOCRD _IOR(D7S_IOC, 0x45, int)	/* Read device state	*/
+#define D7SIOCWR _IOW(D7S_IOC, 0x46, int)	/* Write device state	*/
+#define D7SIOCTM _IO (D7S_IOC, 0x47)		/* Translate mode (FLIP)*/
+
+/*
+ * ioctl flag definitions
+ *
+ * POINT	- Toggle decimal point	(0=absent 1=present)
+ * ALARM	- Toggle alarm LED 		(0=green  1=red)
+ * FLIP		- Toggle inverted mode 	(0=normal 1=flipped)
+ * bits 0-4	- Character displayed	(see definitions below)
+ *
+ * Display segments are defined as follows,
+ * subject to D7S_FLIP register state:
+ *
+ *    a
+ *   ---
+ * f|   |b
+ *   -g-
+ * e|   |c
+ *   ---
+ *    d
+ */
+
+#define D7S_POINT	(1 << 7)	/* Decimal point*/
+#define D7S_ALARM	(1 << 6)	/* Alarm LED 	*/
+#define D7S_FLIP	(1 << 5)	/* Flip display */
+
+#define D7S_0		0x00		/* Numerals 0-9 */
+#define D7S_1		0x01
+#define D7S_2		0x02
+#define D7S_3		0x03
+#define D7S_4		0x04
+#define D7S_5		0x05
+#define D7S_6		0x06
+#define D7S_7		0x07
+#define D7S_8		0x08
+#define D7S_9		0x09
+#define D7S_A		0x0A		/* Letters A-F, H, L, P */
+#define D7S_B		0x0B
+#define D7S_C		0x0C
+#define D7S_D		0x0D
+#define D7S_E		0x0E
+#define D7S_F		0x0F
+#define D7S_H		0x10
+#define D7S_E2		0x11
+#define D7S_L		0x12
+#define D7S_P		0x13
+#define D7S_SEGA	0x14		/* Individual segments */
+#define D7S_SEGB	0x15
+#define D7S_SEGC	0x16
+#define D7S_SEGD	0x17
+#define D7S_SEGE	0x18
+#define D7S_SEGF	0x19
+#define D7S_SEGG	0x1A
+#define D7S_SEGABFG 0x1B		/* Segment groupings */
+#define D7S_SEGCDEG	0x1C
+#define D7S_SEGBCEF 0x1D
+#define D7S_SEGADG	0x1E
+#define D7S_BLANK	0x1F		/* Clear all segments */
+
+#define D7S_MIN_VAL	0x0
+#define D7S_MAX_VAL	0x1F
+
+#endif /* ifndef __display7seg_h__ */
diff --git a/arch/sparc/include/uapi/asm/envctrl.h b/arch/sparc/include/uapi/asm/envctrl.h
new file mode 100644
index 0000000..cf8aa0a
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/envctrl.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * envctrl.h: Definitions for access to the i2c environment
+ *            monitoring on Ultrasparc systems.
+ *
+ * Copyright (C) 1998  Eddie C. Dost  (ecd@skynet.be)
+ * Copyright (C) 2000  Vinh Truong  (vinh.truong@eng.sun.com)
+ * VT - Add all ioctl commands and environment status definitions
+ * VT - Add application note
+ */
+#ifndef _SPARC64_ENVCTRL_H
+#define _SPARC64_ENVCTRL_H 1
+
+#include <linux/ioctl.h>
+
+/* Application note:
+ *
+ * The driver supports 4 operations: open(), close(), ioctl(), read()
+ * The device name is /dev/envctrl.
+ * Below is sample usage:
+ *
+ *	fd = open("/dev/envtrl", O_RDONLY);
+ *	if (ioctl(fd, ENVCTRL_READ_SHUTDOWN_TEMPERATURE, 0) < 0)
+ *		printf("error\n");
+ *	ret = read(fd, buf, 10);
+ *	close(fd);
+ *
+ * Notice in the case of cpu voltage and temperature, the default is
+ * cpu0.  If we need to know the info of cpu1, cpu2, cpu3, we need to
+ * pass in cpu number in ioctl() last parameter.  For example, to
+ * get the voltage of cpu2:
+ *
+ *	ioctlbuf[0] = 2;
+ *	if (ioctl(fd, ENVCTRL_READ_CPU_VOLTAGE, ioctlbuf) < 0)
+ *		printf("error\n");
+ *	ret = read(fd, buf, 10);
+ *
+ * All the return values are in ascii.  So check read return value
+ * and do appropriate conversions in your application.
+ */
+
+/* IOCTL commands */
+
+/* Note: these commands reflect possible monitor features.
+ * Some boards choose to support some of the features only.
+ */
+#define ENVCTRL_RD_CPU_TEMPERATURE	_IOR('p', 0x40, int)
+#define ENVCTRL_RD_CPU_VOLTAGE		_IOR('p', 0x41, int)
+#define ENVCTRL_RD_FAN_STATUS		_IOR('p', 0x42, int)
+#define ENVCTRL_RD_WARNING_TEMPERATURE	_IOR('p', 0x43, int)
+#define ENVCTRL_RD_SHUTDOWN_TEMPERATURE	_IOR('p', 0x44, int)
+#define ENVCTRL_RD_VOLTAGE_STATUS	_IOR('p', 0x45, int)
+#define ENVCTRL_RD_SCSI_TEMPERATURE	_IOR('p', 0x46, int)
+#define ENVCTRL_RD_ETHERNET_TEMPERATURE	_IOR('p', 0x47, int)
+#define ENVCTRL_RD_MTHRBD_TEMPERATURE	_IOR('p', 0x48, int)
+
+#define ENVCTRL_RD_GLOBALADDRESS	_IOR('p', 0x49, int)
+
+/* Read return values for a voltage status request. */
+#define ENVCTRL_VOLTAGE_POWERSUPPLY_GOOD	0x01
+#define ENVCTRL_VOLTAGE_BAD			0x02
+#define ENVCTRL_POWERSUPPLY_BAD			0x03
+#define ENVCTRL_VOLTAGE_POWERSUPPLY_BAD		0x04
+
+/* Read return values for a fan status request.
+ * A failure match means either the fan fails or
+ * the fan is not connected.  Some boards have optional
+ * connectors to connect extra fans.
+ *
+ * There are maximum 8 monitor fans.  Some are cpu fans
+ * some are system fans.  The mask below only indicates
+ * fan by order number.
+ * Below is a sample application:
+ *
+ *	if (ioctl(fd, ENVCTRL_READ_FAN_STATUS, 0) < 0) {
+ *		printf("ioctl fan failed\n");
+ *	}
+ *	if (read(fd, rslt, 1) <= 0) {
+ *		printf("error or fan not monitored\n");
+ *	} else {
+ *		if (rslt[0] == ENVCTRL_ALL_FANS_GOOD) {
+ *			printf("all fans good\n");
+ *	} else if (rslt[0] == ENVCTRL_ALL_FANS_BAD) {
+ *		printf("all fans bad\n");
+ *	} else {
+ *		if (rslt[0] & ENVCTRL_FAN0_FAILURE_MASK) {
+ *			printf("fan 0 failed or not connected\n");
+ *	}
+ *	......
+ */
+
+#define ENVCTRL_ALL_FANS_GOOD			0x00
+#define ENVCTRL_FAN0_FAILURE_MASK		0x01
+#define ENVCTRL_FAN1_FAILURE_MASK		0x02
+#define ENVCTRL_FAN2_FAILURE_MASK		0x04
+#define ENVCTRL_FAN3_FAILURE_MASK		0x08
+#define ENVCTRL_FAN4_FAILURE_MASK		0x10
+#define ENVCTRL_FAN5_FAILURE_MASK		0x20
+#define ENVCTRL_FAN6_FAILURE_MASK		0x40
+#define ENVCTRL_FAN7_FAILURE_MASK		0x80
+#define ENVCTRL_ALL_FANS_BAD 			0xFF
+
+#endif /* !(_SPARC64_ENVCTRL_H) */
diff --git a/arch/sparc/include/uapi/asm/errno.h b/arch/sparc/include/uapi/asm/errno.h
new file mode 100644
index 0000000..81a732b
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/errno.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_ERRNO_H
+#define _SPARC_ERRNO_H
+
+/* These match the SunOS error numbering scheme. */
+
+#include <asm-generic/errno-base.h>
+
+#define	EWOULDBLOCK	EAGAIN	/* Operation would block */
+#define	EINPROGRESS	36	/* Operation now in progress */
+#define	EALREADY	37	/* Operation already in progress */
+#define	ENOTSOCK	38	/* Socket operation on non-socket */
+#define	EDESTADDRREQ	39	/* Destination address required */
+#define	EMSGSIZE	40	/* Message too long */
+#define	EPROTOTYPE	41	/* Protocol wrong type for socket */
+#define	ENOPROTOOPT	42	/* Protocol not available */
+#define	EPROTONOSUPPORT	43	/* Protocol not supported */
+#define	ESOCKTNOSUPPORT	44	/* Socket type not supported */
+#define	EOPNOTSUPP	45	/* Op not supported on transport endpoint */
+#define	EPFNOSUPPORT	46	/* Protocol family not supported */
+#define	EAFNOSUPPORT	47	/* Address family not supported by protocol */
+#define	EADDRINUSE	48	/* Address already in use */
+#define	EADDRNOTAVAIL	49	/* Cannot assign requested address */
+#define	ENETDOWN	50	/* Network is down */
+#define	ENETUNREACH	51	/* Network is unreachable */
+#define	ENETRESET	52	/* Net dropped connection because of reset */
+#define	ECONNABORTED	53	/* Software caused connection abort */
+#define	ECONNRESET	54	/* Connection reset by peer */
+#define	ENOBUFS		55	/* No buffer space available */
+#define	EISCONN		56	/* Transport endpoint is already connected */
+#define	ENOTCONN	57	/* Transport endpoint is not connected */
+#define	ESHUTDOWN	58	/* No send after transport endpoint shutdown */
+#define	ETOOMANYREFS	59	/* Too many references: cannot splice */
+#define	ETIMEDOUT	60	/* Connection timed out */
+#define	ECONNREFUSED	61	/* Connection refused */
+#define	ELOOP		62	/* Too many symbolic links encountered */
+#define	ENAMETOOLONG	63	/* File name too long */
+#define	EHOSTDOWN	64	/* Host is down */
+#define	EHOSTUNREACH	65	/* No route to host */
+#define	ENOTEMPTY	66	/* Directory not empty */
+#define EPROCLIM        67      /* SUNOS: Too many processes */
+#define	EUSERS		68	/* Too many users */
+#define	EDQUOT		69	/* Quota exceeded */
+#define	ESTALE		70	/* Stale file handle */
+#define	EREMOTE		71	/* Object is remote */
+#define	ENOSTR		72	/* Device not a stream */
+#define	ETIME		73	/* Timer expired */
+#define	ENOSR		74	/* Out of streams resources */
+#define	ENOMSG		75	/* No message of desired type */
+#define	EBADMSG		76	/* Not a data message */
+#define	EIDRM		77	/* Identifier removed */
+#define	EDEADLK		78	/* Resource deadlock would occur */
+#define	ENOLCK		79	/* No record locks available */
+#define	ENONET		80	/* Machine is not on the network */
+#define ERREMOTE        81      /* SunOS: Too many lvls of remote in path */
+#define	ENOLINK		82	/* Link has been severed */
+#define	EADV		83	/* Advertise error */
+#define	ESRMNT		84	/* Srmount error */
+#define	ECOMM		85      /* Communication error on send */
+#define	EPROTO		86	/* Protocol error */
+#define	EMULTIHOP	87	/* Multihop attempted */
+#define	EDOTDOT		88	/* RFS specific error */
+#define	EREMCHG		89	/* Remote address changed */
+#define	ENOSYS		90	/* Function not implemented */
+
+/* The rest have no SunOS equivalent. */
+#define	ESTRPIPE	91	/* Streams pipe error */
+#define	EOVERFLOW	92	/* Value too large for defined data type */
+#define	EBADFD		93	/* File descriptor in bad state */
+#define	ECHRNG		94	/* Channel number out of range */
+#define	EL2NSYNC	95	/* Level 2 not synchronized */
+#define	EL3HLT		96	/* Level 3 halted */
+#define	EL3RST		97	/* Level 3 reset */
+#define	ELNRNG		98	/* Link number out of range */
+#define	EUNATCH		99	/* Protocol driver not attached */
+#define	ENOCSI		100	/* No CSI structure available */
+#define	EL2HLT		101	/* Level 2 halted */
+#define	EBADE		102	/* Invalid exchange */
+#define	EBADR		103	/* Invalid request descriptor */
+#define	EXFULL		104	/* Exchange full */
+#define	ENOANO		105	/* No anode */
+#define	EBADRQC		106	/* Invalid request code */
+#define	EBADSLT		107	/* Invalid slot */
+#define	EDEADLOCK	108	/* File locking deadlock error */
+#define	EBFONT		109	/* Bad font file format */
+#define	ELIBEXEC	110	/* Cannot exec a shared library directly */
+#define	ENODATA		111	/* No data available */
+#define	ELIBBAD		112	/* Accessing a corrupted shared library */
+#define	ENOPKG		113	/* Package not installed */
+#define	ELIBACC		114	/* Can not access a needed shared library */
+#define	ENOTUNIQ	115	/* Name not unique on network */
+#define	ERESTART	116	/* Interrupted syscall should be restarted */
+#define	EUCLEAN		117	/* Structure needs cleaning */
+#define	ENOTNAM		118	/* Not a XENIX named type file */
+#define	ENAVAIL		119	/* No XENIX semaphores available */
+#define	EISNAM		120	/* Is a named type file */
+#define	EREMOTEIO	121	/* Remote I/O error */
+#define	EILSEQ		122	/* Illegal byte sequence */
+#define	ELIBMAX		123	/* Atmpt to link in too many shared libs */
+#define	ELIBSCN		124	/* .lib section in a.out corrupted */
+
+#define	ENOMEDIUM	125	/* No medium found */
+#define	EMEDIUMTYPE	126	/* Wrong medium type */
+#define	ECANCELED	127	/* Operation Cancelled */
+#define	ENOKEY		128	/* Required key not available */
+#define	EKEYEXPIRED	129	/* Key has expired */
+#define	EKEYREVOKED	130	/* Key has been revoked */
+#define	EKEYREJECTED	131	/* Key was rejected by service */
+
+/* for robust mutexes */
+#define	EOWNERDEAD	132	/* Owner died */
+#define	ENOTRECOVERABLE	133	/* State not recoverable */
+
+#define	ERFKILL		134	/* Operation not possible due to RF-kill */
+
+#define EHWPOISON	135	/* Memory page has hardware error */
+
+#endif
diff --git a/arch/sparc/include/uapi/asm/fbio.h b/arch/sparc/include/uapi/asm/fbio.h
new file mode 100644
index 0000000..0dafe2c
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/fbio.h
@@ -0,0 +1,260 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_FBIO_H
+#define _UAPI__LINUX_FBIO_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/* Constants used for fbio SunOS compatibility */
+/* (C) 1996 Miguel de Icaza */
+
+/* Frame buffer types */
+#define FBTYPE_NOTYPE           -1
+#define FBTYPE_SUN1BW           0   /* mono */
+#define FBTYPE_SUN1COLOR        1 
+#define FBTYPE_SUN2BW           2 
+#define FBTYPE_SUN2COLOR        3 
+#define FBTYPE_SUN2GP           4 
+#define FBTYPE_SUN5COLOR        5 
+#define FBTYPE_SUN3COLOR        6 
+#define FBTYPE_MEMCOLOR         7 
+#define FBTYPE_SUN4COLOR        8 
+ 
+#define FBTYPE_NOTSUN1          9 
+#define FBTYPE_NOTSUN2          10
+#define FBTYPE_NOTSUN3          11
+ 
+#define FBTYPE_SUNFAST_COLOR    12  /* cg6 */
+#define FBTYPE_SUNROP_COLOR     13
+#define FBTYPE_SUNFB_VIDEO      14
+#define FBTYPE_SUNGIFB          15
+#define FBTYPE_SUNGPLAS         16
+#define FBTYPE_SUNGP3           17
+#define FBTYPE_SUNGT            18
+#define FBTYPE_SUNLEO           19      /* zx Leo card */
+#define FBTYPE_MDICOLOR         20      /* cg14 */
+#define FBTYPE_TCXCOLOR		21	/* SUNW,tcx card */
+
+#define FBTYPE_LASTPLUSONE      21	/* This is not last + 1 in fact... */
+
+/* Does not seem to be listed in the Sun file either */
+#define FBTYPE_CREATOR          22
+#define FBTYPE_PCI_IGA1682	23
+#define FBTYPE_P9100COLOR	24
+
+#define FBTYPE_PCI_GENERIC	1000
+#define FBTYPE_PCI_MACH64	1001
+
+/* fbio ioctls */
+/* Returned by FBIOGTYPE */
+struct  fbtype {
+        int     fb_type;        /* fb type, see above */
+        int     fb_height;      /* pixels */
+        int     fb_width;       /* pixels */
+        int     fb_depth;
+        int     fb_cmsize;      /* color map entries */
+        int     fb_size;        /* fb size in bytes */
+};
+#define FBIOGTYPE _IOR('F', 0, struct fbtype)
+
+struct  fbcmap {
+        int             index;          /* first element (0 origin) */
+        int             count;
+        unsigned char   __user *red;
+        unsigned char   __user *green;
+        unsigned char   __user *blue;
+};
+
+#ifndef __KERNEL__
+#define FBIOPUTCMAP _IOW('F', 3, struct fbcmap)
+#define FBIOGETCMAP _IOW('F', 4, struct fbcmap)
+#endif
+
+/* # of device specific values */
+#define FB_ATTR_NDEVSPECIFIC    8
+/* # of possible emulations */
+#define FB_ATTR_NEMUTYPES       4
+ 
+struct fbsattr {
+        int     flags;
+        int     emu_type;	/* -1 if none */
+        int     dev_specific[FB_ATTR_NDEVSPECIFIC];
+};
+ 
+struct fbgattr {
+        int     real_type;	/* real frame buffer type */
+        int     owner;		/* unknown */
+        struct fbtype fbtype;	/* real frame buffer fbtype */
+        struct fbsattr sattr;   
+        int     emu_types[FB_ATTR_NEMUTYPES]; /* supported emulations */
+};
+#define FBIOSATTR  _IOW('F', 5, struct fbgattr) /* Unsupported: */
+#define FBIOGATTR  _IOR('F', 6, struct fbgattr)	/* supported */
+
+#define FBIOSVIDEO _IOW('F', 7, int)
+#define FBIOGVIDEO _IOR('F', 8, int)
+
+struct fbcursor {
+        short set;              /* what to set, choose from the list above */
+        short enable;           /* cursor on/off */
+        struct fbcurpos pos;    /* cursor position */
+        struct fbcurpos hot;    /* cursor hot spot */
+        struct fbcmap cmap;     /* color map info */
+        struct fbcurpos size;   /* cursor bit map size */
+        char __user *image;     /* cursor image bits */
+        char __user *mask;      /* cursor mask bits */
+};
+
+/* set/get cursor attributes/shape */
+#define FBIOSCURSOR     _IOW('F', 24, struct fbcursor)
+#define FBIOGCURSOR     _IOWR('F', 25, struct fbcursor)
+ 
+/* set/get cursor position */
+#define FBIOSCURPOS     _IOW('F', 26, struct fbcurpos)
+#define FBIOGCURPOS     _IOW('F', 27, struct fbcurpos)
+ 
+/* get max cursor size */
+#define FBIOGCURMAX     _IOR('F', 28, struct fbcurpos)
+
+/* wid manipulation */
+struct fb_wid_alloc {
+#define FB_WID_SHARED_8		0
+#define FB_WID_SHARED_24	1
+#define FB_WID_DBL_8		2
+#define FB_WID_DBL_24		3
+	__u32	wa_type;
+	__s32	wa_index;	/* Set on return */
+	__u32	wa_count;	
+};
+struct fb_wid_item {
+	__u32	wi_type;
+	__s32	wi_index;
+	__u32	wi_attrs;
+	__u32	wi_values[32];
+};
+struct fb_wid_list {
+	__u32	wl_flags;
+	__u32	wl_count;
+	struct fb_wid_item	*wl_list;
+};
+
+#define FBIO_WID_ALLOC	_IOWR('F', 30, struct fb_wid_alloc)
+#define FBIO_WID_FREE	_IOW('F', 31, struct fb_wid_alloc)
+#define FBIO_WID_PUT	_IOW('F', 32, struct fb_wid_list)
+#define FBIO_WID_GET	_IOWR('F', 33, struct fb_wid_list)
+
+/* Creator ioctls */
+#define FFB_IOCTL	('F'<<8)
+#define FFB_SYS_INFO		(FFB_IOCTL|80)
+#define FFB_CLUTREAD		(FFB_IOCTL|81)
+#define FFB_CLUTPOST		(FFB_IOCTL|82)
+#define FFB_SETDIAGMODE		(FFB_IOCTL|83)
+#define FFB_GETMONITORID	(FFB_IOCTL|84)
+#define FFB_GETVIDEOMODE	(FFB_IOCTL|85)
+#define FFB_SETVIDEOMODE	(FFB_IOCTL|86)
+#define FFB_SETSERVER		(FFB_IOCTL|87)
+#define FFB_SETOVCTL		(FFB_IOCTL|88)
+#define FFB_GETOVCTL		(FFB_IOCTL|89)
+#define FFB_GETSAXNUM		(FFB_IOCTL|90)
+#define FFB_FBDEBUG		(FFB_IOCTL|91)
+
+/* Cg14 ioctls */
+#define MDI_IOCTL          ('M'<<8)
+#define MDI_RESET          (MDI_IOCTL|1)
+#define MDI_GET_CFGINFO    (MDI_IOCTL|2)
+#define MDI_SET_PIXELMODE  (MDI_IOCTL|3)
+#    define MDI_32_PIX     32
+#    define MDI_16_PIX     16
+#    define MDI_8_PIX      8
+
+struct mdi_cfginfo {
+	int     mdi_ncluts;     /* Number of implemented CLUTs in this MDI */
+        int     mdi_type;       /* FBTYPE name */
+        int     mdi_height;     /* height */
+        int     mdi_width;      /* width */
+        int     mdi_size;       /* available ram */
+        int     mdi_mode;       /* 8bpp, 16bpp or 32bpp */
+        int     mdi_pixfreq;    /* pixel clock (from PROM) */
+};
+
+/* SparcLinux specific ioctl for the MDI, should be replaced for
+ * the SET_XLUT/SET_CLUTn ioctls instead
+ */
+#define MDI_CLEAR_XLUT       (MDI_IOCTL|9)
+
+/* leo & ffb ioctls */
+struct fb_clut_alloc {
+	__u32	clutid;	/* Set on return */
+ 	__u32	flag;
+ 	__u32	index;
+};
+
+struct fb_clut {
+#define FB_CLUT_WAIT	0x00000001	/* Not yet implemented */
+ 	__u32	flag;
+ 	__u32	clutid;
+ 	__u32	offset;
+ 	__u32	count;
+ 	char *	red;
+ 	char *	green;
+ 	char *	blue;
+};
+
+struct fb_clut32 {
+ 	__u32	flag;
+ 	__u32	clutid;
+ 	__u32	offset;
+ 	__u32	count;
+ 	__u32	red;
+ 	__u32	green;
+ 	__u32	blue;
+};
+
+#define LEO_CLUTALLOC	_IOWR('L', 53, struct fb_clut_alloc)
+#define LEO_CLUTFREE	_IOW('L', 54, struct fb_clut_alloc)
+#define LEO_CLUTREAD	_IOW('L', 55, struct fb_clut)
+#define LEO_CLUTPOST	_IOW('L', 56, struct fb_clut)
+#define LEO_SETGAMMA	_IOW('L', 68, int) /* Not yet implemented */
+#define LEO_GETGAMMA	_IOR('L', 69, int) /* Not yet implemented */
+
+
+/* These are exported to userland for applications to use */
+/* Mappable offsets for the cg14: control registers */
+#define MDI_DIRECT_MAP 0x10000000
+#define MDI_CTLREG_MAP 0x20000000
+#define MDI_CURSOR_MAP 0x30000000
+#define MDI_SHDW_VRT_MAP 0x40000000
+
+/* Mappable offsets for the cg14: frame buffer resolutions */
+/* 32 bits */
+#define MDI_CHUNKY_XBGR_MAP 0x50000000
+#define MDI_CHUNKY_BGR_MAP 0x60000000
+
+/* 16 bits */
+#define MDI_PLANAR_X16_MAP 0x70000000
+#define MDI_PLANAR_C16_MAP 0x80000000
+
+/* 8 bit is done as CG3 MMAP offset */
+/* 32 bits, planar */
+#define MDI_PLANAR_X32_MAP 0x90000000
+#define MDI_PLANAR_B32_MAP 0xa0000000
+#define MDI_PLANAR_G32_MAP 0xb0000000
+#define MDI_PLANAR_R32_MAP 0xc0000000
+
+/* Mappable offsets on leo */
+#define LEO_SS0_MAP            0x00000000
+#define LEO_LC_SS0_USR_MAP     0x00800000
+#define LEO_LD_SS0_MAP         0x00801000
+#define LEO_LX_CURSOR_MAP      0x00802000
+#define LEO_SS1_MAP            0x00803000
+#define LEO_LC_SS1_USR_MAP     0x01003000
+#define LEO_LD_SS1_MAP         0x01004000
+#define LEO_UNK_MAP            0x01005000
+#define LEO_LX_KRN_MAP         0x01006000
+#define LEO_LC_SS0_KRN_MAP     0x01007000
+#define LEO_LC_SS1_KRN_MAP     0x01008000
+#define LEO_LD_GBL_MAP         0x01009000
+#define LEO_UNK2_MAP           0x0100a000
+
+
+#endif /* _UAPI__LINUX_FBIO_H */
diff --git a/arch/sparc/include/uapi/asm/fcntl.h b/arch/sparc/include/uapi/asm/fcntl.h
new file mode 100644
index 0000000..67dae75
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/fcntl.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_FCNTL_H
+#define _SPARC_FCNTL_H
+
+#define O_APPEND	0x0008
+#define FASYNC		0x0040	/* fcntl, for BSD compatibility */
+#define O_CREAT		0x0200	/* not fcntl */
+#define O_TRUNC		0x0400	/* not fcntl */
+#define O_EXCL		0x0800	/* not fcntl */
+#define O_DSYNC		0x2000	/* used to be O_SYNC, see below */
+#define O_NONBLOCK	0x4000
+#if defined(__sparc__) && defined(__arch64__)
+#define O_NDELAY	0x0004
+#else
+#define O_NDELAY	(0x0004 | O_NONBLOCK)
+#endif
+#define O_NOCTTY	0x8000	/* not fcntl */
+#define O_LARGEFILE	0x40000
+#define O_DIRECT        0x100000 /* direct disk access hint */
+#define O_NOATIME	0x200000
+#define O_CLOEXEC	0x400000
+/*
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
+ * the O_SYNC flag.  We continue to use the existing numerical value
+ * for O_DSYNC semantics now, but using the correct symbolic name for it.
+ * This new value is used to request true Posix O_SYNC semantics.  It is
+ * defined in this strange way to make sure applications compiled against
+ * new headers get at least O_DSYNC semantics on older kernels.
+ *
+ * This has the nice side-effect that we can simply test for O_DSYNC
+ * wherever we do not care if O_DSYNC or O_SYNC is used.
+ *
+ * Note: __O_SYNC must never be used directly.
+ */
+#define __O_SYNC	0x800000
+#define O_SYNC		(__O_SYNC|O_DSYNC)
+
+#define O_PATH		0x1000000
+#define __O_TMPFILE	0x2000000
+
+#define F_GETOWN	5	/*  for sockets. */
+#define F_SETOWN	6	/*  for sockets. */
+#define F_GETLK		7
+#define F_SETLK		8
+#define F_SETLKW	9
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK		1
+#define F_WRLCK		2
+#define F_UNLCK		3
+
+#define __ARCH_FLOCK_PAD	short __unused;
+#define __ARCH_FLOCK64_PAD	short __unused;
+
+#include <asm-generic/fcntl.h>
+
+#endif
diff --git a/arch/sparc/include/uapi/asm/ioctl.h b/arch/sparc/include/uapi/asm/ioctl.h
new file mode 100644
index 0000000..96c598f
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/ioctl.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_IOCTL_H
+#define _SPARC_IOCTL_H
+
+/*
+ * Our DIR and SIZE overlap in order to simulteneously provide
+ * a non-zero _IOC_NONE (for binary compatibility) and
+ * 14 bits of size as on i386. Here's the layout:
+ *
+ *   0xE0000000   DIR
+ *   0x80000000     DIR = WRITE
+ *   0x40000000     DIR = READ
+ *   0x20000000     DIR = NONE
+ *   0x3FFF0000   SIZE (overlaps NONE bit)
+ *   0x0000FF00   TYPE
+ *   0x000000FF   NR (CMD)
+ */
+
+#define _IOC_NRBITS      8
+#define _IOC_TYPEBITS    8
+#define _IOC_SIZEBITS   13	/* Actually 14, see below. */
+#define _IOC_DIRBITS     3
+
+#define _IOC_NRMASK      ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK    ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK    ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_XSIZEMASK   ((1 << (_IOC_SIZEBITS+1))-1)
+#define _IOC_DIRMASK     ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT     0
+#define _IOC_TYPESHIFT   (_IOC_NRSHIFT + _IOC_NRBITS)
+#define _IOC_SIZESHIFT   (_IOC_TYPESHIFT + _IOC_TYPEBITS)
+#define _IOC_DIRSHIFT    (_IOC_SIZESHIFT + _IOC_SIZEBITS)
+
+#define _IOC_NONE        1U
+#define _IOC_READ        2U
+#define _IOC_WRITE       4U
+
+#define _IOC(dir,type,nr,size) \
+        (((dir)  << _IOC_DIRSHIFT) | \
+         ((type) << _IOC_TYPESHIFT) | \
+         ((nr)   << _IOC_NRSHIFT) | \
+         ((size) << _IOC_SIZESHIFT))
+
+#define _IO(type,nr)        _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)  _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)  _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* Used to decode ioctl numbers in drivers despite the leading underscore... */
+#define _IOC_DIR(nr)    \
+ ( (((((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) & (_IOC_WRITE|_IOC_READ)) != 0)?   \
+                            (((nr) >> _IOC_DIRSHIFT) & (_IOC_WRITE|_IOC_READ)):  \
+                            (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) )
+#define _IOC_TYPE(nr)       (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)         (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)   \
+ ((((((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) & (_IOC_WRITE|_IOC_READ)) == 0)?    \
+                         0: (((nr) >> _IOC_SIZESHIFT) & _IOC_XSIZEMASK))
+
+/* ...and for the PCMCIA and sound. */
+#define IOC_IN          (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT         (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT       ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK    (_IOC_XSIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT   (_IOC_SIZESHIFT)
+
+#endif /* !(_SPARC_IOCTL_H) */
diff --git a/arch/sparc/include/uapi/asm/ioctls.h b/arch/sparc/include/uapi/asm/ioctls.h
new file mode 100644
index 0000000..2df5271
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/ioctls.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_SPARC_IOCTLS_H
+#define _UAPI_ASM_SPARC_IOCTLS_H
+
+#include <asm/ioctl.h>
+
+/* Big T */
+#define TCGETA		_IOR('T', 1, struct termio)
+#define TCSETA		_IOW('T', 2, struct termio)
+#define TCSETAW		_IOW('T', 3, struct termio)
+#define TCSETAF		_IOW('T', 4, struct termio)
+#define TCSBRK		_IO('T', 5)
+#define TCXONC		_IO('T', 6)
+#define TCFLSH		_IO('T', 7)
+#define TCGETS		_IOR('T', 8, struct termios)
+#define TCSETS		_IOW('T', 9, struct termios)
+#define TCSETSW		_IOW('T', 10, struct termios)
+#define TCSETSF		_IOW('T', 11, struct termios)
+#define TCGETS2		_IOR('T', 12, struct termios2)
+#define TCSETS2		_IOW('T', 13, struct termios2)
+#define TCSETSW2	_IOW('T', 14, struct termios2)
+#define TCSETSF2	_IOW('T', 15, struct termios2)
+#define TIOCGDEV	_IOR('T',0x32, unsigned int) /* Get primary device node of /dev/console */
+#define TIOCVHANGUP	_IO('T', 0x37)
+#define TIOCGPKT	_IOR('T', 0x38, int) /* Get packet mode state */
+#define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
+#define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGRS485	_IOR('T', 0x41, struct serial_rs485)
+#define TIOCSRS485	_IOWR('T', 0x42, struct serial_rs485)
+
+/* Note that all the ioctls that are not available in Linux have a
+ * double underscore on the front to: a) avoid some programs to
+ * think we support some ioctls under Linux (autoconfiguration stuff)
+ */
+/* Little t */
+#define TIOCGETD	_IOR('t', 0, int)
+#define TIOCSETD	_IOW('t', 1, int)
+#define __TIOCHPCL        _IO('t', 2) /* SunOS Specific */
+#define __TIOCMODG        _IOR('t', 3, int) /* SunOS Specific */
+#define __TIOCMODS        _IOW('t', 4, int) /* SunOS Specific */
+#define __TIOCGETP        _IOR('t', 8, struct sgttyb) /* SunOS Specific */
+#define __TIOCSETP        _IOW('t', 9, struct sgttyb) /* SunOS Specific */
+#define __TIOCSETN        _IOW('t', 10, struct sgttyb) /* SunOS Specific */
+#define TIOCEXCL	_IO('t', 13)
+#define TIOCNXCL	_IO('t', 14)
+#define __TIOCFLUSH       _IOW('t', 16, int) /* SunOS Specific */
+#define __TIOCSETC        _IOW('t', 17, struct tchars) /* SunOS Specific */
+#define __TIOCGETC        _IOR('t', 18, struct tchars) /* SunOS Specific */
+#define __TIOCTCNTL       _IOW('t', 32, int) /* SunOS Specific */
+#define __TIOCSIGNAL      _IOW('t', 33, int) /* SunOS Specific */
+#define __TIOCSETX        _IOW('t', 34, int) /* SunOS Specific */
+#define __TIOCGETX        _IOR('t', 35, int) /* SunOS Specific */
+#define TIOCCONS	_IO('t', 36)
+#define TIOCGSOFTCAR	_IOR('t', 100, int)
+#define TIOCSSOFTCAR	_IOW('t', 101, int)
+#define __TIOCUCNTL       _IOW('t', 102, int) /* SunOS Specific */
+#define TIOCSWINSZ	_IOW('t', 103, struct winsize)
+#define TIOCGWINSZ	_IOR('t', 104, struct winsize)
+#define __TIOCREMOTE      _IOW('t', 105, int) /* SunOS Specific */
+#define TIOCMGET	_IOR('t', 106, int)
+#define TIOCMBIC	_IOW('t', 107, int)
+#define TIOCMBIS	_IOW('t', 108, int)
+#define TIOCMSET	_IOW('t', 109, int)
+#define TIOCSTART       _IO('t', 110)
+#define TIOCSTOP        _IO('t', 111)
+#define TIOCPKT		_IOW('t', 112, int)
+#define TIOCNOTTY	_IO('t', 113)
+#define TIOCSTI		_IOW('t', 114, char)
+#define TIOCOUTQ	_IOR('t', 115, int)
+#define __TIOCGLTC        _IOR('t', 116, struct ltchars) /* SunOS Specific */
+#define __TIOCSLTC        _IOW('t', 117, struct ltchars) /* SunOS Specific */
+/* 118 is the non-posix setpgrp tty ioctl */
+/* 119 is the non-posix getpgrp tty ioctl */
+#define __TIOCCDTR        _IO('t', 120) /* SunOS Specific */
+#define __TIOCSDTR        _IO('t', 121) /* SunOS Specific */
+#define TIOCCBRK        _IO('t', 122)
+#define TIOCSBRK        _IO('t', 123)
+#define __TIOCLGET        _IOW('t', 124, int) /* SunOS Specific */
+#define __TIOCLSET        _IOW('t', 125, int) /* SunOS Specific */
+#define __TIOCLBIC        _IOW('t', 126, int) /* SunOS Specific */
+#define __TIOCLBIS        _IOW('t', 127, int) /* SunOS Specific */
+#define __TIOCISPACE      _IOR('t', 128, int) /* SunOS Specific */
+#define __TIOCISIZE       _IOR('t', 129, int) /* SunOS Specific */
+#define TIOCSPGRP	_IOW('t', 130, int)
+#define TIOCGPGRP	_IOR('t', 131, int)
+#define TIOCSCTTY	_IO('t', 132)
+#define TIOCGSID	_IOR('t', 133, int)
+/* Get minor device of a pty master's FD -- Solaris equiv is ISPTM */
+#define TIOCGPTN	_IOR('t', 134, unsigned int) /* Get Pty Number */
+#define TIOCSPTLCK	_IOW('t', 135, int) /* Lock/unlock PTY */
+#define TIOCSIG		_IOW('t', 136, int) /* Generate signal on Pty slave */
+#define TIOCGPTPEER	_IO('t', 137) /* Safely open the slave */
+
+/* Little f */
+#define FIOCLEX		_IO('f', 1)
+#define FIONCLEX	_IO('f', 2)
+#define FIOASYNC	_IOW('f', 125, int)
+#define FIONBIO		_IOW('f', 126, int)
+#define FIONREAD	_IOR('f', 127, int)
+#define TIOCINQ		FIONREAD
+#define FIOQSIZE	_IOR('f', 128, loff_t)
+
+/* SCARY Rutgers local SunOS kernel hackery, perhaps I will support it
+ * someday.  This is completely bogus, I know...
+ */
+#define __TCGETSTAT       _IO('T', 200) /* Rutgers specific */
+#define __TCSETSTAT       _IO('T', 201) /* Rutgers specific */
+
+/* Linux specific, no SunOS equivalent. */
+#define TIOCLINUX	0x541C
+#define TIOCGSERIAL	0x541E
+#define TIOCSSERIAL	0x541F
+#define TCSBRKP		0x5425
+#define TIOCSERCONFIG	0x5453
+#define TIOCSERGWILD	0x5454
+#define TIOCSERSWILD	0x5455
+#define TIOCGLCKTRMIOS	0x5456
+#define TIOCSLCKTRMIOS	0x5457
+#define TIOCSERGSTRUCT	0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+#define TIOCMIWAIT	0x545C /* Wait for change on serial input line(s) */
+#define TIOCGICOUNT	0x545D /* Read serial port inline interrupt counts */
+
+/* Kernel definitions */
+
+/* Used for packet mode */
+#define TIOCPKT_DATA		 0
+#define TIOCPKT_FLUSHREAD	 1
+#define TIOCPKT_FLUSHWRITE	 2
+#define TIOCPKT_STOP		 4
+#define TIOCPKT_START		 8
+#define TIOCPKT_NOSTOP		16
+#define TIOCPKT_DOSTOP		32
+#define TIOCPKT_IOCTL		64
+
+#endif /* _UAPI_ASM_SPARC_IOCTLS_H */
diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h
new file mode 100644
index 0000000..9d0d125
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/ipcbuf.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __SPARC_IPCBUF_H
+#define __SPARC_IPCBUF_H
+
+/*
+ * The ipc64_perm structure for sparc/sparc64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit seq
+ * - on sparc for 32 bit mode (it is 32 bit on sparc64)
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct ipc64_perm
+{
+	__kernel_key_t	key;
+	__kernel_uid_t	uid;
+	__kernel_gid_t	gid;
+	__kernel_uid_t	cuid;
+	__kernel_gid_t	cgid;
+#ifndef __arch64__
+	unsigned short	__pad0;
+#endif
+	__kernel_mode_t	mode;
+	unsigned short	__pad1;
+	unsigned short	seq;
+	unsigned long long __unused1;
+	unsigned long long __unused2;
+};
+
+#endif /* __SPARC_IPCBUF_H */
diff --git a/arch/sparc/include/uapi/asm/kvm_para.h b/arch/sparc/include/uapi/asm/kvm_para.h
new file mode 100644
index 0000000..baacc49
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/kvm_para.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#include <asm-generic/kvm_para.h>
diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..f6f99ec
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/mman.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__SPARC_MMAN_H__
+#define _UAPI__SPARC_MMAN_H__
+
+#include <asm-generic/mman-common.h>
+
+/* SunOS'ified... */
+
+#define PROT_ADI	0x10		/* ADI enabled */
+
+#define MAP_RENAME      MAP_ANONYMOUS   /* In SunOS terminology */
+#define MAP_NORESERVE   0x40            /* don't reserve swap pages */
+#define MAP_INHERIT     0x80            /* SunOS doesn't do this, but... */
+#define MAP_LOCKED      0x100           /* lock the mapping */
+#define _MAP_NEW        0x80000000      /* Binary compatibility is fun... */
+
+#define MAP_GROWSDOWN	0x0200		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+
+#define MCL_CURRENT     0x2000          /* lock all currently mapped pages */
+#define MCL_FUTURE      0x4000          /* lock all additions to address space */
+#define MCL_ONFAULT	0x8000		/* lock all pages that are faulted in */
+
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+
+
+#endif /* _UAPI__SPARC_MMAN_H__ */
diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h
new file mode 100644
index 0000000..ffc46c2
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/msgbuf.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_MSGBUF_H
+#define _SPARC_MSGBUF_H
+
+/*
+ * The msqid64_ds structure for sparc64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 32-bit values
+ */
+struct msqid64_ds {
+	struct ipc64_perm msg_perm;
+#if defined(__sparc__) && defined(__arch64__)
+	__kernel_time_t msg_stime;	/* last msgsnd time */
+	__kernel_time_t msg_rtime;	/* last msgrcv time */
+	__kernel_time_t msg_ctime;	/* last change time */
+#else
+	unsigned long msg_stime_high;
+	unsigned long msg_stime;	/* last msgsnd time */
+	unsigned long msg_rtime_high;
+	unsigned long msg_rtime;	/* last msgrcv time */
+	unsigned long msg_ctime_high;
+	unsigned long msg_ctime;	/* last change time */
+#endif
+	unsigned long  msg_cbytes;	/* current number of bytes on queue */
+	unsigned long  msg_qnum;	/* number of messages in queue */
+	unsigned long  msg_qbytes;	/* max number of bytes on queue */
+	__kernel_pid_t msg_lspid;	/* pid of last msgsnd */
+	__kernel_pid_t msg_lrpid;	/* last receive pid */
+	unsigned long  __unused1;
+	unsigned long  __unused2;
+};
+#endif /* _SPARC_MSGBUF_H */
diff --git a/arch/sparc/include/uapi/asm/openpromio.h b/arch/sparc/include/uapi/asm/openpromio.h
new file mode 100644
index 0000000..8817f7d
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/openpromio.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef	_SPARC_OPENPROMIO_H
+#define	_SPARC_OPENPROMIO_H
+
+#include <linux/compiler.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * SunOS and Solaris /dev/openprom definitions. The ioctl values
+ * were chosen to be exactly equal to the SunOS equivalents.
+ */
+
+struct openpromio
+{
+	u_int	oprom_size;		/* Actual size of the oprom_array. */
+	char	oprom_array[1];		/* Holds property names and values. */
+};
+
+#define	OPROMMAXPARAM	4096		/* Maximum size of oprom_array. */
+
+#define	OPROMGETOPT		0x20004F01
+#define	OPROMSETOPT		0x20004F02
+#define	OPROMNXTOPT		0x20004F03
+#define	OPROMSETOPT2		0x20004F04
+#define	OPROMNEXT		0x20004F05
+#define	OPROMCHILD		0x20004F06
+#define	OPROMGETPROP		0x20004F07
+#define	OPROMNXTPROP		0x20004F08
+#define	OPROMU2P		0x20004F09
+#define	OPROMGETCONS		0x20004F0A
+#define	OPROMGETFBNAME		0x20004F0B
+#define	OPROMGETBOOTARGS	0x20004F0C
+/* Linux extensions */				/* Arguments in oprom_array: */
+#define OPROMSETCUR		0x20004FF0	/* int node - Sets current node */
+#define OPROMPCI2NODE		0x20004FF1	/* int pci_bus, pci_devfn - Sets current node to PCI device's node */
+#define OPROMPATH2NODE		0x20004FF2	/* char path[] - Set current node from fully qualified PROM path */
+
+/*
+ * Return values from OPROMGETCONS:
+ */
+
+#define OPROMCONS_NOT_WSCONS    0
+#define OPROMCONS_STDIN_IS_KBD  0x1     /* stdin device is kbd */
+#define OPROMCONS_STDOUT_IS_FB  0x2     /* stdout is a framebuffer */
+#define OPROMCONS_OPENPROM      0x4     /* supports openboot */
+
+
+/*
+ *  NetBSD/OpenBSD /dev/openprom definitions.
+ */
+
+struct opiocdesc
+{
+	int	op_nodeid;		/* PROM Node ID (value-result) */
+	int	op_namelen;		/* Length of op_name. */
+	char	__user *op_name;	/* Pointer to the property name. */
+	int	op_buflen;		/* Length of op_buf (value-result) */
+	char	__user *op_buf;		/* Pointer to buffer. */
+};
+
+#define	OPIOCGET	_IOWR('O', 1, struct opiocdesc)
+#define	OPIOCSET	_IOW('O', 2, struct opiocdesc)
+#define	OPIOCNEXTPROP	_IOWR('O', 3, struct opiocdesc)
+#define	OPIOCGETOPTNODE	_IOR('O', 4, int)
+#define	OPIOCGETNEXT	_IOWR('O', 5, int)
+#define	OPIOCGETCHILD	_IOWR('O', 6, int)
+
+#endif /* _SPARC_OPENPROMIO_H */
+
diff --git a/arch/sparc/include/uapi/asm/oradax.h b/arch/sparc/include/uapi/asm/oradax.h
new file mode 100644
index 0000000..4f6676f
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/oradax.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Oracle DAX driver API definitions
+ */
+
+#ifndef _ORADAX_H
+#define	_ORADAX_H
+
+#include <linux/types.h>
+
+#define	CCB_KILL 0
+#define	CCB_INFO 1
+#define	CCB_DEQUEUE 2
+
+struct dax_command {
+	__u16 command;		/* CCB_KILL/INFO/DEQUEUE */
+	__u16 ca_offset;	/* offset into mmapped completion area */
+};
+
+struct ccb_kill_result {
+	__u16 action;		/* action taken to kill ccb */
+};
+
+struct ccb_info_result {
+	__u16 state;		/* state of enqueued ccb */
+	__u16 inst_num;		/* dax instance number of enqueued ccb */
+	__u16 q_num;		/* queue number of enqueued ccb */
+	__u16 q_pos;		/* ccb position in queue */
+};
+
+struct ccb_exec_result {
+	__u64	status_data;	/* additional status data (e.g. bad VA) */
+	__u32	status;		/* one of DAX_SUBMIT_* */
+};
+
+union ccb_result {
+	struct ccb_exec_result exec;
+	struct ccb_info_result info;
+	struct ccb_kill_result kill;
+};
+
+#define	DAX_MMAP_LEN		(16 * 1024)
+#define	DAX_MAX_CCBS		15
+#define	DAX_CCB_BUF_MAXLEN	(DAX_MAX_CCBS * 64)
+#define	DAX_NAME		"oradax"
+
+/* CCB_EXEC status */
+#define	DAX_SUBMIT_OK			0
+#define	DAX_SUBMIT_ERR_RETRY		1
+#define	DAX_SUBMIT_ERR_WOULDBLOCK	2
+#define	DAX_SUBMIT_ERR_BUSY		3
+#define	DAX_SUBMIT_ERR_THR_INIT		4
+#define	DAX_SUBMIT_ERR_ARG_INVAL	5
+#define	DAX_SUBMIT_ERR_CCB_INVAL	6
+#define	DAX_SUBMIT_ERR_NO_CA_AVAIL	7
+#define	DAX_SUBMIT_ERR_CCB_ARR_MMU_MISS	8
+#define	DAX_SUBMIT_ERR_NOMAP		9
+#define	DAX_SUBMIT_ERR_NOACCESS		10
+#define	DAX_SUBMIT_ERR_TOOMANY		11
+#define	DAX_SUBMIT_ERR_UNAVAIL		12
+#define	DAX_SUBMIT_ERR_INTERNAL		13
+
+/* CCB_INFO states - must match HV_CCB_STATE_* definitions */
+#define	DAX_CCB_COMPLETED	0
+#define	DAX_CCB_ENQUEUED	1
+#define	DAX_CCB_INPROGRESS	2
+#define	DAX_CCB_NOTFOUND	3
+
+/* CCB_KILL actions - must match HV_CCB_KILL_* definitions */
+#define	DAX_KILL_COMPLETED	0
+#define	DAX_KILL_DEQUEUED	1
+#define	DAX_KILL_KILLED		2
+#define	DAX_KILL_NOTFOUND	3
+
+#endif /* _ORADAX_H */
diff --git a/arch/sparc/include/uapi/asm/param.h b/arch/sparc/include/uapi/asm/param.h
new file mode 100644
index 0000000..057d713
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/param.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASMSPARC_PARAM_H
+#define _ASMSPARC_PARAM_H
+
+#define EXEC_PAGESIZE	8192    /* Thanks for sun4's we carry baggage... */
+#include <asm-generic/param.h>
+
+#endif /* _ASMSPARC_PARAM_H */
diff --git a/arch/sparc/include/uapi/asm/perfctr.h b/arch/sparc/include/uapi/asm/perfctr.h
new file mode 100644
index 0000000..316b837
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/perfctr.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*----------------------------------------
+  PERFORMANCE INSTRUMENTATION  
+  Guillaume Thouvenin           08/10/98
+  David S. Miller               10/06/98
+  ---------------------------------------*/
+#ifndef PERF_COUNTER_API
+#define PERF_COUNTER_API
+
+/* sys_perfctr() interface.  First arg is operation code
+ * from enumeration below.  The meaning of further arguments
+ * are determined by the operation code.
+ *
+ * NOTE: This system call is no longer provided, use the perf_events
+ *       infrastructure.
+ *
+ * Pointers which are passed by the user are pointers to 64-bit
+ * integers.
+ *
+ * Once enabled, performance counter state is retained until the
+ * process either exits or performs an exec.  That is, performance
+ * counters remain enabled for fork/clone children.
+ */
+enum perfctr_opcode {
+	/* Enable UltraSparc performance counters, ARG0 is pointer
+	 * to 64-bit accumulator for D0 counter in PIC, ARG1 is pointer
+	 * to 64-bit accumulator for D1 counter.  ARG2 is a pointer to
+	 * the initial PCR register value to use.
+	 */
+	PERFCTR_ON,
+
+	/* Disable UltraSparc performance counters.  The PCR is written
+	 * with zero and the user counter accumulator pointers and
+	 * working PCR register value are forgotten.
+	 */
+	PERFCTR_OFF,
+
+	/* Add current D0 and D1 PIC values into user pointers given
+	 * in PERFCTR_ON operation.  The PIC is cleared before returning.
+	 */
+	PERFCTR_READ,
+
+	/* Clear the PIC register. */
+	PERFCTR_CLRPIC,
+
+	/* Begin using a new PCR value, the pointer to which is passed
+	 * in ARG0.  The PIC is also cleared after the new PCR value is
+	 * written.
+	 */
+	PERFCTR_SETPCR,
+
+	/* Store in pointer given in ARG0 the current PCR register value
+	 * being used.
+	 */
+	PERFCTR_GETPCR
+};
+
+#define  PRIV 0x00000001
+#define  SYS  0x00000002
+#define  USR  0x00000004
+
+/* Pic.S0 Selection Bit Field Encoding, Ultra-I/II  */
+#define  CYCLE_CNT            0x00000000
+#define  INSTR_CNT            0x00000010
+#define  DISPATCH0_IC_MISS    0x00000020
+#define  DISPATCH0_STOREBUF   0x00000030
+#define  IC_REF               0x00000080
+#define  DC_RD                0x00000090
+#define  DC_WR                0x000000A0
+#define  LOAD_USE             0x000000B0
+#define  EC_REF               0x000000C0
+#define  EC_WRITE_HIT_RDO     0x000000D0
+#define  EC_SNOOP_INV         0x000000E0
+#define  EC_RD_HIT            0x000000F0
+
+/* Pic.S0 Selection Bit Field Encoding, Ultra-III  */
+#define  US3_CYCLE_CNT	      	0x00000000
+#define  US3_INSTR_CNT	      	0x00000010
+#define  US3_DISPATCH0_IC_MISS	0x00000020
+#define  US3_DISPATCH0_BR_TGT	0x00000030
+#define  US3_DISPATCH0_2ND_BR	0x00000040
+#define  US3_RSTALL_STOREQ	0x00000050
+#define  US3_RSTALL_IU_USE	0x00000060
+#define  US3_IC_REF		0x00000080
+#define  US3_DC_RD		0x00000090
+#define  US3_DC_WR		0x000000a0
+#define  US3_EC_REF		0x000000c0
+#define  US3_EC_WR_HIT_RTO	0x000000d0
+#define  US3_EC_SNOOP_INV	0x000000e0
+#define  US3_EC_RD_MISS		0x000000f0
+#define  US3_PC_PORT0_RD	0x00000100
+#define  US3_SI_SNOOP		0x00000110
+#define  US3_SI_CIQ_FLOW	0x00000120
+#define  US3_SI_OWNED		0x00000130
+#define  US3_SW_COUNT_0		0x00000140
+#define  US3_IU_BR_MISS_TAKEN	0x00000150
+#define  US3_IU_BR_COUNT_TAKEN	0x00000160
+#define  US3_DISP_RS_MISPRED	0x00000170
+#define  US3_FA_PIPE_COMPL	0x00000180
+#define  US3_MC_READS_0		0x00000200
+#define  US3_MC_READS_1		0x00000210
+#define  US3_MC_READS_2		0x00000220
+#define  US3_MC_READS_3		0x00000230
+#define  US3_MC_STALLS_0	0x00000240
+#define  US3_MC_STALLS_2	0x00000250
+
+/* Pic.S1 Selection Bit Field Encoding, Ultra-I/II  */
+#define  CYCLE_CNT_D1         0x00000000
+#define  INSTR_CNT_D1         0x00000800
+#define  DISPATCH0_IC_MISPRED 0x00001000
+#define  DISPATCH0_FP_USE     0x00001800
+#define  IC_HIT               0x00004000
+#define  DC_RD_HIT            0x00004800
+#define  DC_WR_HIT            0x00005000
+#define  LOAD_USE_RAW         0x00005800
+#define  EC_HIT               0x00006000
+#define  EC_WB                0x00006800
+#define  EC_SNOOP_CB          0x00007000
+#define  EC_IT_HIT            0x00007800
+
+/* Pic.S1 Selection Bit Field Encoding, Ultra-III  */
+#define  US3_CYCLE_CNT_D1	0x00000000
+#define  US3_INSTR_CNT_D1	0x00000800
+#define  US3_DISPATCH0_MISPRED	0x00001000
+#define  US3_IC_MISS_CANCELLED	0x00001800
+#define  US3_RE_ENDIAN_MISS	0x00002000
+#define  US3_RE_FPU_BYPASS	0x00002800
+#define  US3_RE_DC_MISS		0x00003000
+#define  US3_RE_EC_MISS		0x00003800
+#define  US3_IC_MISS		0x00004000
+#define  US3_DC_RD_MISS		0x00004800
+#define  US3_DC_WR_MISS		0x00005000
+#define  US3_RSTALL_FP_USE	0x00005800
+#define  US3_EC_MISSES		0x00006000
+#define  US3_EC_WB		0x00006800
+#define  US3_EC_SNOOP_CB	0x00007000
+#define  US3_EC_IC_MISS		0x00007800
+#define  US3_RE_PC_MISS		0x00008000
+#define  US3_ITLB_MISS		0x00008800
+#define  US3_DTLB_MISS		0x00009000
+#define  US3_WC_MISS		0x00009800
+#define  US3_WC_SNOOP_CB	0x0000a000
+#define  US3_WC_SCRUBBED	0x0000a800
+#define  US3_WC_WB_WO_READ	0x0000b000
+#define  US3_PC_SOFT_HIT	0x0000c000
+#define  US3_PC_SNOOP_INV	0x0000c800
+#define  US3_PC_HARD_HIT	0x0000d000
+#define  US3_PC_PORT1_RD	0x0000d800
+#define  US3_SW_COUNT_1		0x0000e000
+#define  US3_IU_STAT_BR_MIS_UNTAKEN	0x0000e800
+#define  US3_IU_STAT_BR_COUNT_UNTAKEN	0x0000f000
+#define  US3_PC_MS_MISSES	0x0000f800
+#define  US3_MC_WRITES_0	0x00010800
+#define  US3_MC_WRITES_1	0x00011000
+#define  US3_MC_WRITES_2	0x00011800
+#define  US3_MC_WRITES_3	0x00012000
+#define  US3_MC_STALLS_1	0x00012800
+#define  US3_MC_STALLS_3	0x00013000
+#define  US3_RE_RAW_MISS	0x00013800
+#define  US3_FM_PIPE_COMPLETION	0x00014000
+
+struct vcounter_struct {
+  unsigned long long vcnt0;
+  unsigned long long vcnt1;
+};
+
+#endif /* !(PERF_COUNTER_API) */
diff --git a/arch/sparc/include/uapi/asm/poll.h b/arch/sparc/include/uapi/asm/poll.h
new file mode 100644
index 0000000..72356c9
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/poll.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __SPARC_POLL_H
+#define __SPARC_POLL_H
+
+#define POLLWRNORM	POLLOUT
+#define POLLWRBAND	256
+#define POLLMSG		512
+#define POLLREMOVE	1024
+#define POLLRDHUP       2048
+
+#include <asm-generic/poll.h>
+
+#endif
diff --git a/arch/sparc/include/uapi/asm/posix_types.h b/arch/sparc/include/uapi/asm/posix_types.h
new file mode 100644
index 0000000..fec499d
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/posix_types.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+#ifndef __SPARC_POSIX_TYPES_H
+#define __SPARC_POSIX_TYPES_H
+
+#if defined(__sparc__) && defined(__arch64__)
+/* sparc 64 bit */
+
+typedef unsigned short 	       __kernel_old_uid_t;
+typedef unsigned short         __kernel_old_gid_t;
+#define __kernel_old_uid_t __kernel_old_uid_t
+
+/* Note this piece of asymmetry from the v9 ABI.  */
+typedef int		       __kernel_suseconds_t;
+#define __kernel_suseconds_t __kernel_suseconds_t
+
+#else
+/* sparc 32 bit */
+
+typedef unsigned int           __kernel_size_t;
+typedef int                    __kernel_ssize_t;
+typedef long int               __kernel_ptrdiff_t;
+#define __kernel_size_t __kernel_size_t
+
+typedef unsigned short         __kernel_ipc_pid_t;
+#define __kernel_ipc_pid_t __kernel_ipc_pid_t
+
+typedef unsigned short         __kernel_uid_t;
+typedef unsigned short         __kernel_gid_t;
+#define __kernel_uid_t __kernel_uid_t
+
+typedef unsigned short         __kernel_mode_t;
+#define __kernel_mode_t __kernel_mode_t
+
+typedef long                   __kernel_daddr_t;
+#define __kernel_daddr_t __kernel_daddr_t
+
+typedef unsigned short	       __kernel_old_dev_t;
+#define __kernel_old_dev_t __kernel_old_dev_t
+
+#endif /* defined(__sparc__) && defined(__arch64__) */
+
+#include <asm-generic/posix_types.h>
+
+#endif /* __SPARC_POSIX_TYPES_H */
diff --git a/arch/sparc/include/uapi/asm/psr.h b/arch/sparc/include/uapi/asm/psr.h
new file mode 100644
index 0000000..e41f65f
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/psr.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * psr.h: This file holds the macros for masking off various parts of
+ *        the processor status register on the Sparc. This is valid
+ *        for Version 8. On the V9 this is renamed to the PSTATE
+ *        register and its members are accessed as fields like
+ *        PSTATE.PRIV for the current CPU privilege level.
+ *
+ * Copyright (C) 1994 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _UAPI__LINUX_SPARC_PSR_H
+#define _UAPI__LINUX_SPARC_PSR_H
+
+/* The Sparc PSR fields are laid out as the following:
+ *
+ *  ------------------------------------------------------------------------
+ *  | impl  | vers  | icc   | resv  | EC | EF | PIL  | S | PS | ET |  CWP  |
+ *  | 31-28 | 27-24 | 23-20 | 19-14 | 13 | 12 | 11-8 | 7 | 6  | 5  |  4-0  |
+ *  ------------------------------------------------------------------------
+ */
+#define PSR_CWP     0x0000001f         /* current window pointer     */
+#define PSR_ET      0x00000020         /* enable traps field         */
+#define PSR_PS      0x00000040         /* previous privilege level   */
+#define PSR_S       0x00000080         /* current privilege level    */
+#define PSR_PIL     0x00000f00         /* processor interrupt level  */
+#define PSR_EF      0x00001000         /* enable floating point      */
+#define PSR_EC      0x00002000         /* enable co-processor        */
+#define PSR_SYSCALL 0x00004000         /* inside of a syscall        */
+#define PSR_LE      0x00008000         /* SuperSparcII little-endian */
+#define PSR_ICC     0x00f00000         /* integer condition codes    */
+#define PSR_C       0x00100000         /* carry bit                  */
+#define PSR_V       0x00200000         /* overflow bit               */
+#define PSR_Z       0x00400000         /* zero bit                   */
+#define PSR_N       0x00800000         /* negative bit               */
+#define PSR_VERS    0x0f000000         /* cpu-version field          */
+#define PSR_IMPL    0xf0000000         /* cpu-implementation field   */
+
+#define PSR_VERS_SHIFT		24
+#define PSR_IMPL_SHIFT		28
+#define PSR_VERS_SHIFTED_MASK	0xf
+#define PSR_IMPL_SHIFTED_MASK	0xf
+
+#define PSR_IMPL_TI		0x4
+#define PSR_IMPL_LEON		0xf
+
+
+#endif /* _UAPI__LINUX_SPARC_PSR_H */
diff --git a/arch/sparc/include/uapi/asm/psrcompat.h b/arch/sparc/include/uapi/asm/psrcompat.h
new file mode 100644
index 0000000..1eaffbe
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/psrcompat.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC64_PSRCOMPAT_H
+#define _SPARC64_PSRCOMPAT_H
+
+#include <asm/pstate.h>
+
+/* Old 32-bit PSR fields for the compatibility conversion code. */
+#define PSR_CWP     0x0000001f         /* current window pointer     */
+#define PSR_ET      0x00000020         /* enable traps field         */
+#define PSR_PS      0x00000040         /* previous privilege level   */
+#define PSR_S       0x00000080         /* current privilege level    */
+#define PSR_PIL     0x00000f00         /* processor interrupt level  */
+#define PSR_EF      0x00001000         /* enable floating point      */
+#define PSR_EC      0x00002000         /* enable co-processor        */
+#define PSR_SYSCALL 0x00004000         /* inside of a syscall        */
+#define PSR_LE      0x00008000         /* SuperSparcII little-endian */
+#define PSR_ICC     0x00f00000         /* integer condition codes    */
+#define PSR_C       0x00100000         /* carry bit                  */
+#define PSR_V       0x00200000         /* overflow bit               */
+#define PSR_Z       0x00400000         /* zero bit                   */
+#define PSR_N       0x00800000         /* negative bit               */
+#define PSR_VERS    0x0f000000         /* cpu-version field          */
+#define PSR_IMPL    0xf0000000         /* cpu-implementation field   */
+
+#define PSR_V8PLUS  0xff000000         /* fake impl/ver, meaning a 64bit CPU is present */
+#define PSR_XCC	    0x000f0000         /* if PSR_V8PLUS, this is %xcc */
+
+static inline unsigned int tstate_to_psr(unsigned long tstate)
+{
+	return ((tstate & TSTATE_CWP)			|
+		PSR_S					|
+		((tstate & TSTATE_ICC) >> 12)		|
+		((tstate & TSTATE_XCC) >> 20)		|
+		((tstate & TSTATE_SYSCALL) ? PSR_SYSCALL : 0) |
+		PSR_V8PLUS);
+}
+
+static inline unsigned long psr_to_tstate_icc(unsigned int psr)
+{
+	unsigned long tstate = ((unsigned long)(psr & PSR_ICC)) << 12;
+	if ((psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS)
+		tstate |= ((unsigned long)(psr & PSR_XCC)) << 20;
+	return tstate;
+}
+
+#endif /* !(_SPARC64_PSRCOMPAT_H) */
diff --git a/arch/sparc/include/uapi/asm/pstate.h b/arch/sparc/include/uapi/asm/pstate.h
new file mode 100644
index 0000000..ceca96e
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/pstate.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC64_PSTATE_H
+#define _SPARC64_PSTATE_H
+
+#include <linux/const.h>
+
+/* The V9 PSTATE Register (with SpitFire extensions).
+ *
+ * -----------------------------------------------------------------------
+ * | Resv | IG | MG | CLE | TLE |  MM  | RED | PEF | AM | PRIV | IE | AG |
+ * -----------------------------------------------------------------------
+ *  63  12  11   10    9     8    7   6   5     4     3     2     1    0
+ */
+/* IG on V9 conflicts with MCDE on M7. PSTATE_MCDE will only be used on
+ * processors that support ADI which do not use IG, hence there is no
+ * functional conflict
+ */
+#define PSTATE_IG   _AC(0x0000000000000800,UL) /* Interrupt Globals.	*/
+#define PSTATE_MCDE _AC(0x0000000000000800,UL) /* MCD Enable		*/
+#define PSTATE_MG   _AC(0x0000000000000400,UL) /* MMU Globals.		*/
+#define PSTATE_CLE  _AC(0x0000000000000200,UL) /* Current Little Endian.*/
+#define PSTATE_TLE  _AC(0x0000000000000100,UL) /* Trap Little Endian.	*/
+#define PSTATE_MM   _AC(0x00000000000000c0,UL) /* Memory Model.		*/
+#define PSTATE_TSO  _AC(0x0000000000000000,UL) /* MM: TotalStoreOrder	*/
+#define PSTATE_PSO  _AC(0x0000000000000040,UL) /* MM: PartialStoreOrder	*/
+#define PSTATE_RMO  _AC(0x0000000000000080,UL) /* MM: RelaxedMemoryOrder*/
+#define PSTATE_RED  _AC(0x0000000000000020,UL) /* Reset Error Debug.	*/
+#define PSTATE_PEF  _AC(0x0000000000000010,UL) /* Floating Point Enable.*/
+#define PSTATE_AM   _AC(0x0000000000000008,UL) /* Address Mask.		*/
+#define PSTATE_PRIV _AC(0x0000000000000004,UL) /* Privilege.		*/
+#define PSTATE_IE   _AC(0x0000000000000002,UL) /* Interrupt Enable.	*/
+#define PSTATE_AG   _AC(0x0000000000000001,UL) /* Alternate Globals.	*/
+
+/* The V9 TSTATE Register (with SpitFire and Linux extensions).
+ *
+ * ---------------------------------------------------------------------
+ * |  Resv |  GL  |  CCR  |  ASI  |  %pil  |  PSTATE  |  Resv  |  CWP  |
+ * ---------------------------------------------------------------------
+ *  63   43 42  40 39   32 31   24 23    20 19       8 7      5 4     0
+ */
+#define TSTATE_GL	_AC(0x0000070000000000,UL) /* Global reg level  */
+#define TSTATE_CCR	_AC(0x000000ff00000000,UL) /* Condition Codes.	*/
+#define TSTATE_XCC	_AC(0x000000f000000000,UL) /* Condition Codes.	*/
+#define TSTATE_XNEG	_AC(0x0000008000000000,UL) /* %xcc Negative.	*/
+#define TSTATE_XZERO	_AC(0x0000004000000000,UL) /* %xcc Zero.	*/
+#define TSTATE_XOVFL	_AC(0x0000002000000000,UL) /* %xcc Overflow.	*/
+#define TSTATE_XCARRY	_AC(0x0000001000000000,UL) /* %xcc Carry.	*/
+#define TSTATE_ICC	_AC(0x0000000f00000000,UL) /* Condition Codes.	*/
+#define TSTATE_INEG	_AC(0x0000000800000000,UL) /* %icc Negative.	*/
+#define TSTATE_IZERO	_AC(0x0000000400000000,UL) /* %icc Zero.	*/
+#define TSTATE_IOVFL	_AC(0x0000000200000000,UL) /* %icc Overflow.	*/
+#define TSTATE_ICARRY	_AC(0x0000000100000000,UL) /* %icc Carry.	*/
+#define TSTATE_ASI	_AC(0x00000000ff000000,UL) /* AddrSpace ID.	*/
+#define TSTATE_PIL	_AC(0x0000000000f00000,UL) /* %pil (Linux traps)*/
+#define TSTATE_PSTATE	_AC(0x00000000000fff00,UL) /* PSTATE.		*/
+/* IG on V9 conflicts with MCDE on M7. TSTATE_MCDE will only be used on
+ * processors that support ADI which do not support IG, hence there is
+ * no functional conflict
+ */
+#define TSTATE_IG	_AC(0x0000000000080000,UL) /* Interrupt Globals.*/
+#define TSTATE_MCDE	_AC(0x0000000000080000,UL) /* MCD enable.       */
+#define TSTATE_MG	_AC(0x0000000000040000,UL) /* MMU Globals.	*/
+#define TSTATE_CLE	_AC(0x0000000000020000,UL) /* CurrLittleEndian.	*/
+#define TSTATE_TLE	_AC(0x0000000000010000,UL) /* TrapLittleEndian.	*/
+#define TSTATE_MM	_AC(0x000000000000c000,UL) /* Memory Model.	*/
+#define TSTATE_TSO	_AC(0x0000000000000000,UL) /* MM: TSO		*/
+#define TSTATE_PSO	_AC(0x0000000000004000,UL) /* MM: PSO		*/
+#define TSTATE_RMO	_AC(0x0000000000008000,UL) /* MM: RMO		*/
+#define TSTATE_RED	_AC(0x0000000000002000,UL) /* Reset Error Debug.*/
+#define TSTATE_PEF	_AC(0x0000000000001000,UL) /* FPU Enable.	*/
+#define TSTATE_AM	_AC(0x0000000000000800,UL) /* Address Mask.	*/
+#define TSTATE_PRIV	_AC(0x0000000000000400,UL) /* Privilege.	*/
+#define TSTATE_IE	_AC(0x0000000000000200,UL) /* Interrupt Enable.	*/
+#define TSTATE_AG	_AC(0x0000000000000100,UL) /* Alternate Globals.*/
+#define TSTATE_SYSCALL	_AC(0x0000000000000020,UL) /* in syscall trap   */
+#define TSTATE_CWP	_AC(0x000000000000001f,UL) /* Curr Win-Pointer.	*/
+
+/* Floating-Point Registers State Register.
+ *
+ * --------------------------------
+ * |  Resv  |  FEF  |  DU  |  DL  |
+ * --------------------------------
+ *  63     3    2       1      0
+ */
+#define FPRS_FEF	_AC(0x0000000000000004,UL) /* FPU Enable.	*/
+#define FPRS_DU		_AC(0x0000000000000002,UL) /* Dirty Upper.	*/
+#define FPRS_DL		_AC(0x0000000000000001,UL) /* Dirty Lower.	*/
+
+/* Version Register.
+ *
+ * ------------------------------------------------------
+ * | MANUF | IMPL | MASK | Resv | MAXTL | Resv | MAXWIN |
+ * ------------------------------------------------------
+ *  63   48 47  32 31  24 23  16 15    8 7    5 4      0
+ */
+#define VERS_MANUF	_AC(0xffff000000000000,UL) /* Manufacturer.	*/
+#define VERS_IMPL	_AC(0x0000ffff00000000,UL) /* Implementation.	*/
+#define VERS_MASK	_AC(0x00000000ff000000,UL) /* Mask Set Revision.*/
+#define VERS_MAXTL	_AC(0x000000000000ff00,UL) /* Max Trap Level.	*/
+#define VERS_MAXWIN	_AC(0x000000000000001f,UL) /* Max RegWindow Idx.*/
+
+/* Compatibility Feature Register (%asr26), SPARC-T4 and later  */
+#define CFR_AES		_AC(0x0000000000000001,UL) /* Supports AES opcodes     */
+#define CFR_DES		_AC(0x0000000000000002,UL) /* Supports DES opcodes     */
+#define CFR_KASUMI	_AC(0x0000000000000004,UL) /* Supports KASUMI opcodes  */
+#define CFR_CAMELLIA	_AC(0x0000000000000008,UL) /* Supports CAMELLIA opcodes*/
+#define CFR_MD5		_AC(0x0000000000000010,UL) /* Supports MD5 opcodes     */
+#define CFR_SHA1	_AC(0x0000000000000020,UL) /* Supports SHA1 opcodes    */
+#define CFR_SHA256	_AC(0x0000000000000040,UL) /* Supports SHA256 opcodes  */
+#define CFR_SHA512	_AC(0x0000000000000080,UL) /* Supports SHA512 opcodes  */
+#define CFR_MPMUL	_AC(0x0000000000000100,UL) /* Supports MPMUL opcodes   */
+#define CFR_MONTMUL	_AC(0x0000000000000200,UL) /* Supports MONTMUL opcodes */
+#define CFR_MONTSQR	_AC(0x0000000000000400,UL) /* Supports MONTSQR opcodes */
+#define CFR_CRC32C	_AC(0x0000000000000800,UL) /* Supports CRC32C opcodes  */
+
+#endif /* !(_SPARC64_PSTATE_H) */
diff --git a/arch/sparc/include/uapi/asm/ptrace.h b/arch/sparc/include/uapi/asm/ptrace.h
new file mode 100644
index 0000000..abe6400
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/ptrace.h
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__SPARC_PTRACE_H
+#define _UAPI__SPARC_PTRACE_H
+
+#if defined(__sparc__) && defined(__arch64__)
+/* 64 bit sparc */
+#include <asm/pstate.h>
+
+/* This struct defines the way the registers are stored on the
+ * stack during a system call and basically all traps.
+ */
+
+/* This magic value must have the low 9 bits clear,
+ * as that is where we encode the %tt value, see below.
+ */
+#define PT_REGS_MAGIC 0x57ac6c00
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct pt_regs {
+	unsigned long u_regs[16]; /* globals and ins */
+	unsigned long tstate;
+	unsigned long tpc;
+	unsigned long tnpc;
+	unsigned int y;
+
+	/* We encode a magic number, PT_REGS_MAGIC, along
+	 * with the %tt (trap type) register value at trap
+	 * entry time.  The magic number allows us to identify
+	 * accurately a trap stack frame in the stack
+	 * unwinder, and the %tt value allows us to test
+	 * things like "in a system call" etc. for an arbitray
+	 * process.
+	 *
+	 * The PT_REGS_MAGIC is chosen such that it can be
+	 * loaded completely using just a sethi instruction.
+	 */
+	unsigned int magic;
+};
+
+struct pt_regs32 {
+	unsigned int psr;
+	unsigned int pc;
+	unsigned int npc;
+	unsigned int y;
+	unsigned int u_regs[16]; /* globals and ins */
+};
+
+/* A V9 register window */
+struct reg_window {
+	unsigned long locals[8];
+	unsigned long ins[8];
+};
+
+/* A 32-bit register window. */
+struct reg_window32 {
+	unsigned int locals[8];
+	unsigned int ins[8];
+};
+
+/* A V9 Sparc stack frame */
+struct sparc_stackf {
+	unsigned long locals[8];
+        unsigned long ins[6];
+	struct sparc_stackf *fp;
+	unsigned long callers_pc;
+	char *structptr;
+	unsigned long xargs[6];
+	unsigned long xxargs[1];
+};
+
+/* A 32-bit Sparc stack frame */
+struct sparc_stackf32 {
+	unsigned int locals[8];
+        unsigned int ins[6];
+	unsigned int fp;
+	unsigned int callers_pc;
+	unsigned int structptr;
+	unsigned int xargs[6];
+	unsigned int xxargs[1];
+};
+
+struct sparc_trapf {
+	unsigned long locals[8];
+	unsigned long ins[8];
+	unsigned long _unused;
+	struct pt_regs *regs;
+};
+#endif /* (!__ASSEMBLY__) */
+#else
+/* 32 bit sparc */
+
+#include <asm/psr.h>
+
+/* This struct defines the way the registers are stored on the
+ * stack during a system call and basically all traps.
+ */
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+struct pt_regs {
+	unsigned long psr;
+	unsigned long pc;
+	unsigned long npc;
+	unsigned long y;
+	unsigned long u_regs[16]; /* globals and ins */
+};
+
+/* A 32-bit register window. */
+struct reg_window32 {
+	unsigned long locals[8];
+	unsigned long ins[8];
+};
+
+/* A Sparc stack frame */
+struct sparc_stackf {
+	unsigned long locals[8];
+        unsigned long ins[6];
+	struct sparc_stackf *fp;
+	unsigned long callers_pc;
+	char *structptr;
+	unsigned long xargs[6];
+	unsigned long xxargs[1];
+};
+#endif /* (!__ASSEMBLY__) */
+
+#endif /* (defined(__sparc__) && defined(__arch64__))*/
+
+#ifndef __ASSEMBLY__
+
+#define TRACEREG_SZ	sizeof(struct pt_regs)
+#define STACKFRAME_SZ	sizeof(struct sparc_stackf)
+
+#define TRACEREG32_SZ	sizeof(struct pt_regs32)
+#define STACKFRAME32_SZ	sizeof(struct sparc_stackf32)
+
+#endif /* (!__ASSEMBLY__) */
+
+#define UREG_G0        0
+#define UREG_G1        1
+#define UREG_G2        2
+#define UREG_G3        3
+#define UREG_G4        4
+#define UREG_G5        5
+#define UREG_G6        6
+#define UREG_G7        7
+#define UREG_I0        8
+#define UREG_I1        9
+#define UREG_I2        10
+#define UREG_I3        11
+#define UREG_I4        12
+#define UREG_I5        13
+#define UREG_I6        14
+#define UREG_I7        15
+#define UREG_FP        UREG_I6
+#define UREG_RETPC     UREG_I7
+
+#if defined(__sparc__) && defined(__arch64__)
+/* 64 bit sparc */
+
+#ifndef __ASSEMBLY__
+
+
+#else /* __ASSEMBLY__ */
+/* For assembly code. */
+#define TRACEREG_SZ		0xa0
+#define STACKFRAME_SZ		0xc0
+
+#define TRACEREG32_SZ		0x50
+#define STACKFRAME32_SZ		0x60
+#endif /* __ASSEMBLY__ */
+
+#else /* (defined(__sparc__) && defined(__arch64__)) */
+
+/* 32 bit sparc */
+
+#ifndef __ASSEMBLY__
+
+
+#else /* (!__ASSEMBLY__) */
+/* For assembly code. */
+#define TRACEREG_SZ       0x50
+#define STACKFRAME_SZ     0x60
+#endif /* (!__ASSEMBLY__) */
+
+#endif /* (defined(__sparc__) && defined(__arch64__)) */
+
+
+/* These are for pt_regs. */
+#define PT_V9_G0     0x00
+#define PT_V9_G1     0x08
+#define PT_V9_G2     0x10
+#define PT_V9_G3     0x18
+#define PT_V9_G4     0x20
+#define PT_V9_G5     0x28
+#define PT_V9_G6     0x30
+#define PT_V9_G7     0x38
+#define PT_V9_I0     0x40
+#define PT_V9_I1     0x48
+#define PT_V9_I2     0x50
+#define PT_V9_I3     0x58
+#define PT_V9_I4     0x60
+#define PT_V9_I5     0x68
+#define PT_V9_I6     0x70
+#define PT_V9_FP     PT_V9_I6
+#define PT_V9_I7     0x78
+#define PT_V9_TSTATE 0x80
+#define PT_V9_TPC    0x88
+#define PT_V9_TNPC   0x90
+#define PT_V9_Y      0x98
+#define PT_V9_MAGIC  0x9c
+#define PT_TSTATE	PT_V9_TSTATE
+#define PT_TPC		PT_V9_TPC
+#define PT_TNPC		PT_V9_TNPC
+
+/* These for pt_regs32. */
+#define PT_PSR    0x0
+#define PT_PC     0x4
+#define PT_NPC    0x8
+#define PT_Y      0xc
+#define PT_G0     0x10
+#define PT_WIM    PT_G0
+#define PT_G1     0x14
+#define PT_G2     0x18
+#define PT_G3     0x1c
+#define PT_G4     0x20
+#define PT_G5     0x24
+#define PT_G6     0x28
+#define PT_G7     0x2c
+#define PT_I0     0x30
+#define PT_I1     0x34
+#define PT_I2     0x38
+#define PT_I3     0x3c
+#define PT_I4     0x40
+#define PT_I5     0x44
+#define PT_I6     0x48
+#define PT_FP     PT_I6
+#define PT_I7     0x4c
+
+/* Reg_window offsets */
+#define RW_V9_L0     0x00
+#define RW_V9_L1     0x08
+#define RW_V9_L2     0x10
+#define RW_V9_L3     0x18
+#define RW_V9_L4     0x20
+#define RW_V9_L5     0x28
+#define RW_V9_L6     0x30
+#define RW_V9_L7     0x38
+#define RW_V9_I0     0x40
+#define RW_V9_I1     0x48
+#define RW_V9_I2     0x50
+#define RW_V9_I3     0x58
+#define RW_V9_I4     0x60
+#define RW_V9_I5     0x68
+#define RW_V9_I6     0x70
+#define RW_V9_I7     0x78
+
+#define RW_L0     0x00
+#define RW_L1     0x04
+#define RW_L2     0x08
+#define RW_L3     0x0c
+#define RW_L4     0x10
+#define RW_L5     0x14
+#define RW_L6     0x18
+#define RW_L7     0x1c
+#define RW_I0     0x20
+#define RW_I1     0x24
+#define RW_I2     0x28
+#define RW_I3     0x2c
+#define RW_I4     0x30
+#define RW_I5     0x34
+#define RW_I6     0x38
+#define RW_I7     0x3c
+
+/* Stack_frame offsets */
+#define SF_V9_L0     0x00
+#define SF_V9_L1     0x08
+#define SF_V9_L2     0x10
+#define SF_V9_L3     0x18
+#define SF_V9_L4     0x20
+#define SF_V9_L5     0x28
+#define SF_V9_L6     0x30
+#define SF_V9_L7     0x38
+#define SF_V9_I0     0x40
+#define SF_V9_I1     0x48
+#define SF_V9_I2     0x50
+#define SF_V9_I3     0x58
+#define SF_V9_I4     0x60
+#define SF_V9_I5     0x68
+#define SF_V9_FP     0x70
+#define SF_V9_PC     0x78
+#define SF_V9_RETP   0x80
+#define SF_V9_XARG0  0x88
+#define SF_V9_XARG1  0x90
+#define SF_V9_XARG2  0x98
+#define SF_V9_XARG3  0xa0
+#define SF_V9_XARG4  0xa8
+#define SF_V9_XARG5  0xb0
+#define SF_V9_XXARG  0xb8
+
+#define SF_L0     0x00
+#define SF_L1     0x04
+#define SF_L2     0x08
+#define SF_L3     0x0c
+#define SF_L4     0x10
+#define SF_L5     0x14
+#define SF_L6     0x18
+#define SF_L7     0x1c
+#define SF_I0     0x20
+#define SF_I1     0x24
+#define SF_I2     0x28
+#define SF_I3     0x2c
+#define SF_I4     0x30
+#define SF_I5     0x34
+#define SF_FP     0x38
+#define SF_PC     0x3c
+#define SF_RETP   0x40
+#define SF_XARG0  0x44
+#define SF_XARG1  0x48
+#define SF_XARG2  0x4c
+#define SF_XARG3  0x50
+#define SF_XARG4  0x54
+#define SF_XARG5  0x58
+#define SF_XXARG  0x5c
+
+
+/* Stuff for the ptrace system call */
+#define PTRACE_SPARC_DETACH       11
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_READDATA           16
+#define PTRACE_WRITEDATA          17
+#define PTRACE_READTEXT           18
+#define PTRACE_WRITETEXT          19
+#define PTRACE_GETFPAREGS         20
+#define PTRACE_SETFPAREGS         21
+
+/* There are for debugging 64-bit processes, either from a 32 or 64 bit
+ * parent.  Thus their complements are for debugging 32-bit processes only.
+ */
+
+#define PTRACE_GETREGS64	  22
+#define PTRACE_SETREGS64	  23
+/* PTRACE_SYSCALL is 24 */
+#define PTRACE_GETFPREGS64	  25
+#define PTRACE_SETFPREGS64	  26
+
+#endif /* _UAPI__SPARC_PTRACE_H */
diff --git a/arch/sparc/include/uapi/asm/resource.h b/arch/sparc/include/uapi/asm/resource.h
new file mode 100644
index 0000000..cbe2de7
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/resource.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * resource.h: Resource definitions.
+ *
+ * Copyright (C) 1995,1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _SPARC_RESOURCE_H
+#define _SPARC_RESOURCE_H
+
+/*
+ * These two resource limit IDs have a Sparc/Linux-specific ordering,
+ * the rest comes from the generic header:
+ */
+#define RLIMIT_NOFILE		6	/* max number of open files */
+#define RLIMIT_NPROC		7	/* max number of processes */
+
+#if defined(__sparc__) && defined(__arch64__)
+/* Use generic version */
+#else
+/*
+ * SuS says limits have to be unsigned.
+ * We make this unsigned, but keep the
+ * old value for compatibility:
+ */
+#define RLIM_INFINITY		0x7fffffff
+#endif
+
+#include <asm-generic/resource.h>
+
+#endif /* !(_SPARC_RESOURCE_H) */
diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h
new file mode 100644
index 0000000..f3d309c
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/sembuf.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_SEMBUF_H
+#define _SPARC_SEMBUF_H
+
+/*
+ * The semid64_ds structure for sparc architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+	struct ipc64_perm sem_perm;		/* permissions .. see ipc.h */
+#if defined(__sparc__) && defined(__arch64__)
+	__kernel_time_t	sem_otime;		/* last semop time */
+	__kernel_time_t	sem_ctime;		/* last change time */
+#else
+	unsigned long	sem_otime_high;
+	unsigned long	sem_otime;		/* last semop time */
+	unsigned long	sem_ctime_high;
+	unsigned long	sem_ctime;		/* last change time */
+#endif
+	unsigned long	sem_nsems;		/* no. of semaphores in array */
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+};
+
+#endif /* _SPARC64_SEMBUF_H */
diff --git a/arch/sparc/include/uapi/asm/setup.h b/arch/sparc/include/uapi/asm/setup.h
new file mode 100644
index 0000000..3c208a4
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/setup.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *	Just a place holder. 
+ */
+
+#ifndef _UAPI_SPARC_SETUP_H
+#define _UAPI_SPARC_SETUP_H
+
+#if defined(__sparc__) && defined(__arch64__)
+# define COMMAND_LINE_SIZE 2048
+#else
+# define COMMAND_LINE_SIZE 256
+#endif
+
+
+#endif /* _UAPI_SPARC_SETUP_H */
diff --git a/arch/sparc/include/uapi/asm/shmbuf.h b/arch/sparc/include/uapi/asm/shmbuf.h
new file mode 100644
index 0000000..06618b8
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/shmbuf.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_SHMBUF_H
+#define _SPARC_SHMBUF_H
+
+/* 
+ * The shmid64_ds structure for sparc architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct shmid64_ds {
+	struct ipc64_perm	shm_perm;	/* operation perms */
+#if defined(__sparc__) && defined(__arch64__)
+	__kernel_time_t		shm_atime;	/* last attach time */
+	__kernel_time_t		shm_dtime;	/* last detach time */
+	__kernel_time_t		shm_ctime;	/* last change time */
+#else
+	unsigned long		shm_atime_high;
+	unsigned long		shm_atime;	/* last attach time */
+	unsigned long		shm_dtime_high;
+	unsigned long		shm_dtime;	/* last detach time */
+	unsigned long		shm_ctime_high;
+	unsigned long		shm_ctime;	/* last change time */
+#endif
+	size_t			shm_segsz;	/* size of segment (bytes) */
+	__kernel_pid_t		shm_cpid;	/* pid of creator */
+	__kernel_pid_t		shm_lpid;	/* pid of last operator */
+	unsigned long		shm_nattch;	/* no. of current attaches */
+	unsigned long		__unused1;
+	unsigned long		__unused2;
+};
+
+struct shminfo64 {
+	unsigned long	shmmax;
+	unsigned long	shmmin;
+	unsigned long	shmmni;
+	unsigned long	shmseg;
+	unsigned long	shmall;
+	unsigned long	__unused1;
+	unsigned long	__unused2;
+	unsigned long	__unused3;
+	unsigned long	__unused4;
+};
+
+#endif /* _SPARC_SHMBUF_H */
diff --git a/arch/sparc/include/uapi/asm/sigcontext.h b/arch/sparc/include/uapi/asm/sigcontext.h
new file mode 100644
index 0000000..043dd4b
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/sigcontext.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * There isn't anything here anymore, but the file must not be empty or patch
+ * will delete it.
+ */
diff --git a/arch/sparc/include/uapi/asm/siginfo.h b/arch/sparc/include/uapi/asm/siginfo.h
new file mode 100644
index 0000000..e704955
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/siginfo.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__SPARC_SIGINFO_H
+#define _UAPI__SPARC_SIGINFO_H
+
+#if defined(__sparc__) && defined(__arch64__)
+
+#define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
+#define __ARCH_SI_BAND_T int
+
+#endif /* defined(__sparc__) && defined(__arch64__) */
+
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+
+#define SI_NOINFO	32767		/* no information in siginfo_t */
+
+/*
+ * SIGEMT si_codes
+ */
+#define EMT_TAGOVF	1	/* tag overflow */
+#define NSIGEMT		1
+
+#endif /* _UAPI__SPARC_SIGINFO_H */
diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h
new file mode 100644
index 0000000..ff95059
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/signal.h
@@ -0,0 +1,182 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__SPARC_SIGNAL_H
+#define _UAPI__SPARC_SIGNAL_H
+
+#include <asm/sigcontext.h>
+#include <linux/compiler.h>
+
+
+/* On the Sparc the signal handlers get passed a 'sub-signal' code
+ * for certain signal types, which we document here.
+ */
+#define SIGHUP		 1
+#define SIGINT		 2
+#define SIGQUIT		 3
+#define SIGILL		 4
+#define    SUBSIG_STACK       0
+#define    SUBSIG_ILLINST     2
+#define    SUBSIG_PRIVINST    3
+#define    SUBSIG_BADTRAP(t)  (0x80 + (t))
+
+#define SIGTRAP		 5
+#define SIGABRT		 6
+#define SIGIOT		 6
+
+#define SIGEMT           7
+#define    SUBSIG_TAG    10
+
+#define SIGFPE		 8
+#define    SUBSIG_FPDISABLED     0x400
+#define    SUBSIG_FPERROR        0x404
+#define    SUBSIG_FPINTOVFL      0x001
+#define    SUBSIG_FPSTSIG        0x002
+#define    SUBSIG_IDIVZERO       0x014
+#define    SUBSIG_FPINEXACT      0x0c4
+#define    SUBSIG_FPDIVZERO      0x0c8
+#define    SUBSIG_FPUNFLOW       0x0cc
+#define    SUBSIG_FPOPERROR      0x0d0
+#define    SUBSIG_FPOVFLOW       0x0d4
+
+#define SIGKILL		 9
+#define SIGBUS          10
+#define    SUBSIG_BUSTIMEOUT    1
+#define    SUBSIG_ALIGNMENT     2
+#define    SUBSIG_MISCERROR     5
+
+#define SIGSEGV		11
+#define    SUBSIG_NOMAPPING     3
+#define    SUBSIG_PROTECTION    4
+#define    SUBSIG_SEGERROR      5
+
+#define SIGSYS		12
+
+#define SIGPIPE		13
+#define SIGALRM		14
+#define SIGTERM		15
+#define SIGURG          16
+
+/* SunOS values which deviate from the Linux/i386 ones */
+#define SIGSTOP		17
+#define SIGTSTP		18
+#define SIGCONT		19
+#define SIGCHLD		20
+#define SIGTTIN		21
+#define SIGTTOU		22
+#define SIGIO		23
+#define SIGPOLL		SIGIO   /* SysV name for SIGIO */
+#define SIGXCPU		24
+#define SIGXFSZ		25
+#define SIGVTALRM	26
+#define SIGPROF		27
+#define SIGWINCH	28
+#define SIGLOST		29
+#define SIGPWR		SIGLOST
+#define SIGUSR1		30
+#define SIGUSR2		31
+
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+#define __OLD_NSIG	32
+#define __NEW_NSIG      64
+#ifdef __arch64__
+#define _NSIG_BPW       64
+#else
+#define _NSIG_BPW       32
+#endif
+#define _NSIG_WORDS     (__NEW_NSIG / _NSIG_BPW)
+
+#define SIGRTMIN       32
+#define SIGRTMAX       __NEW_NSIG
+
+#if defined(__KERNEL__) || defined(__WANT_POSIX1B_SIGNALS__)
+#define _NSIG			__NEW_NSIG
+#define __new_sigset_t		sigset_t
+#define __new_sigaction		sigaction
+#define __new_sigaction32	sigaction32
+#define __old_sigset_t		old_sigset_t
+#define __old_sigaction		old_sigaction
+#define __old_sigaction32	old_sigaction32
+#else
+#define _NSIG			__OLD_NSIG
+#define NSIG			_NSIG
+#define __old_sigset_t		sigset_t
+#define __old_sigaction		sigaction
+#define __old_sigaction32	sigaction32
+#endif
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long __old_sigset_t;            /* at least 32 bits */
+
+typedef struct {
+       unsigned long sig[_NSIG_WORDS];
+} __new_sigset_t;
+
+/* A SunOS sigstack */
+struct sigstack {
+	/* XXX 32-bit pointers pinhead XXX */
+	char *the_stack;
+	int   cur_status;
+};
+
+/* Sigvec flags */
+#define _SV_SSTACK    1u    /* This signal handler should use sig-stack */
+#define _SV_INTR      2u    /* Sig return should not restart system call */
+#define _SV_RESET     4u    /* Set handler to SIG_DFL upon taken signal */
+#define _SV_IGNCHILD  8u    /* Do not send SIGCHLD */
+
+/*
+ * sa_flags values: SA_STACK is not currently supported, but will allow the
+ * usage of signal stacks by using the (now obsolete) sa_restorer field in
+ * the sigaction structure as a stack pointer. This is now possible due to
+ * the changes in signal handling. LBT 010493.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ */
+#define SA_NOCLDSTOP	_SV_IGNCHILD
+#define SA_STACK	_SV_SSTACK
+#define SA_ONSTACK	_SV_SSTACK
+#define SA_RESTART	_SV_INTR
+#define SA_ONESHOT	_SV_RESET
+#define SA_NODEFER	0x20u
+#define SA_NOCLDWAIT    0x100u
+#define SA_SIGINFO      0x200u
+
+#define SA_NOMASK	SA_NODEFER
+
+#define SIG_BLOCK          0x01	/* for blocking signals */
+#define SIG_UNBLOCK        0x02	/* for unblocking signals */
+#define SIG_SETMASK        0x04	/* for setting the signal mask */
+
+#define MINSIGSTKSZ	4096
+#define SIGSTKSZ	16384
+
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __KERNEL__
+struct __new_sigaction {
+	__sighandler_t		sa_handler;
+	unsigned long		sa_flags;
+	__sigrestore_t		sa_restorer;  /* not used by Linux/SPARC yet */
+	__new_sigset_t		sa_mask;
+};
+
+struct __old_sigaction {
+	__sighandler_t		sa_handler;
+	__old_sigset_t		sa_mask;
+	unsigned long		sa_flags;
+	void			(*sa_restorer)(void);  /* not used by Linux/SPARC yet */
+};
+#endif
+
+typedef struct sigaltstack {
+	void			__user *ss_sp;
+	int			ss_flags;
+	size_t			ss_size;
+} stack_t;
+
+
+#endif /* !(__ASSEMBLY__) */
+
+#endif /* _UAPI__SPARC_SIGNAL_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
new file mode 100644
index 0000000..7ea35e5
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockopt(2) */
+#define SOL_SOCKET	0xffff
+
+#define SO_DEBUG	0x0001
+#define SO_PASSCRED	0x0002
+#define SO_REUSEADDR	0x0004
+#define SO_KEEPALIVE	0x0008
+#define SO_DONTROUTE	0x0010
+#define SO_BROADCAST	0x0020
+#define SO_PEERCRED	0x0040
+#define SO_LINGER	0x0080
+#define SO_OOBINLINE	0x0100
+#define SO_REUSEPORT	0x0200
+#define SO_BSDCOMPAT    0x0400
+#define SO_RCVLOWAT     0x0800
+#define SO_SNDLOWAT     0x1000
+#define SO_RCVTIMEO     0x2000
+#define SO_SNDTIMEO     0x4000
+#define SO_ACCEPTCONN	0x8000
+
+#define SO_SNDBUF	0x1001
+#define SO_RCVBUF	0x1002
+#define SO_SNDBUFFORCE	0x100a
+#define SO_RCVBUFFORCE	0x100b
+#define SO_ERROR	0x1007
+#define SO_TYPE		0x1008
+#define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
+
+
+/* Linux specific, keep the same. */
+#define SO_NO_CHECK	0x000b
+#define SO_PRIORITY	0x000c
+
+#define SO_BINDTODEVICE 0x000d
+
+#define SO_ATTACH_FILTER	0x001a
+#define SO_DETACH_FILTER        0x001b
+#define SO_GET_FILTER		SO_ATTACH_FILTER
+
+#define SO_PEERNAME		0x001c
+#define SO_TIMESTAMP		0x001d
+#define SCM_TIMESTAMP		SO_TIMESTAMP
+
+#define SO_PEERSEC		0x001e
+#define SO_PASSSEC		0x001f
+#define SO_TIMESTAMPNS		0x0021
+#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
+
+#define SO_MARK			0x0022
+
+#define SO_TIMESTAMPING		0x0023
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
+#define SO_RXQ_OVFL             0x0024
+
+#define SO_WIFI_STATUS		0x0025
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+#define SO_PEEK_OFF		0x0026
+
+/* Instruct lower device to use last 4-bytes of skb data as FCS */
+#define SO_NOFCS		0x0027
+
+#define SO_LOCK_FILTER		0x0028
+
+#define SO_SELECT_ERR_QUEUE	0x0029
+
+#define SO_BUSY_POLL		0x0030
+
+#define SO_MAX_PACING_RATE	0x0031
+
+#define SO_BPF_EXTENSIONS	0x0032
+
+#define SO_INCOMING_CPU		0x0033
+
+#define SO_ATTACH_BPF		0x0034
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
+#define SO_ATTACH_REUSEPORT_CBPF	0x0035
+#define SO_ATTACH_REUSEPORT_EBPF	0x0036
+
+#define SO_CNX_ADVICE		0x0037
+
+#define SCM_TIMESTAMPING_OPT_STATS	0x0038
+
+#define SO_MEMINFO		0x0039
+
+#define SO_INCOMING_NAPI_ID	0x003a
+
+#define SO_COOKIE		0x003b
+
+#define SCM_TIMESTAMPING_PKTINFO	0x003c
+
+#define SO_PEERGROUPS		0x003d
+
+#define SO_ZEROCOPY		0x003e
+
+#define SO_TXTIME		0x003f
+#define SCM_TXTIME		SO_TXTIME
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION		0x5001
+#define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
+#define SO_SECURITY_ENCRYPTION_NETWORK		0x5004
+
+#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/sockios.h b/arch/sparc/include/uapi/asm/sockios.h
new file mode 100644
index 0000000..18a3ec1
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/sockios.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_SPARC_SOCKIOS_H
+#define _ASM_SPARC_SOCKIOS_H
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN 	0x8901
+#define SIOCSPGRP	0x8902
+#define FIOGETOWN	0x8903
+#define SIOCGPGRP	0x8904
+#define SIOCATMARK	0x8905
+#define SIOCGSTAMP	0x8906		/* Get stamp (timeval) */
+#define SIOCGSTAMPNS	0x8907		/* Get stamp (timespec) */
+
+#endif /* !(_ASM_SPARC_SOCKIOS_H) */
+
diff --git a/arch/sparc/include/uapi/asm/stat.h b/arch/sparc/include/uapi/asm/stat.h
new file mode 100644
index 0000000..b6ec4eb
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/stat.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __SPARC_STAT_H
+#define __SPARC_STAT_H
+
+#include <linux/types.h>
+
+#if defined(__sparc__) && defined(__arch64__)
+/* 64 bit sparc */
+struct stat {
+	unsigned int st_dev;
+	ino_t   st_ino;
+	mode_t  st_mode;
+	short   st_nlink;
+	uid_t   st_uid;
+	gid_t   st_gid;
+	unsigned int st_rdev;
+	off_t   st_size;
+	time_t  st_atime;
+	time_t  st_mtime;
+	time_t  st_ctime;
+	off_t   st_blksize;
+	off_t   st_blocks;
+	unsigned long  __unused4[2];
+};
+
+struct stat64 {
+	unsigned long	st_dev;
+	unsigned long	st_ino;
+	unsigned long	st_nlink;
+
+	unsigned int	st_mode;
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+	unsigned int	__pad0;
+
+	unsigned long	st_rdev;
+	long		st_size;
+	long		st_blksize;
+	long		st_blocks;
+
+	unsigned long	st_atime;
+	unsigned long	st_atime_nsec;
+	unsigned long	st_mtime;
+	unsigned long	st_mtime_nsec;
+	unsigned long	st_ctime;
+	unsigned long	st_ctime_nsec;
+	long		__unused[3];
+};
+
+#else
+/* 32 bit sparc */
+struct stat {
+	unsigned short	st_dev;
+	ino_t		st_ino;
+	mode_t		st_mode;
+	short		st_nlink;
+	unsigned short	st_uid;
+	unsigned short	st_gid;
+	unsigned short	st_rdev;
+	off_t		st_size;
+	time_t		st_atime;
+	unsigned long	st_atime_nsec;
+	time_t		st_mtime;
+	unsigned long	st_mtime_nsec;
+	time_t		st_ctime;
+	unsigned long	st_ctime_nsec;
+	off_t		st_blksize;
+	off_t		st_blocks;
+	unsigned long	__unused4[2];
+};
+
+#define STAT_HAVE_NSEC 1
+
+struct stat64 {
+	unsigned long long st_dev;
+
+	unsigned long long st_ino;
+
+	unsigned int	st_mode;
+	unsigned int	st_nlink;
+
+	unsigned int	st_uid;
+	unsigned int	st_gid;
+
+	unsigned long long st_rdev;
+
+	unsigned char	__pad3[8];
+
+	long long	st_size;
+	unsigned int	st_blksize;
+
+	unsigned char	__pad4[8];
+	unsigned int	st_blocks;
+
+	unsigned int	st_atime;
+	unsigned int	st_atime_nsec;
+
+	unsigned int	st_mtime;
+	unsigned int	st_mtime_nsec;
+
+	unsigned int	st_ctime;
+	unsigned int	st_ctime_nsec;
+
+	unsigned int	__unused4;
+	unsigned int	__unused5;
+};
+#endif /* defined(__sparc__) && defined(__arch64__) */
+#endif /* __SPARC_STAT_H */
diff --git a/arch/sparc/include/uapi/asm/statfs.h b/arch/sparc/include/uapi/asm/statfs.h
new file mode 100644
index 0000000..20c8f5b
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/statfs.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef ___ASM_SPARC_STATFS_H
+#define ___ASM_SPARC_STATFS_H
+
+#include <asm-generic/statfs.h>
+
+#endif
diff --git a/arch/sparc/include/uapi/asm/swab.h b/arch/sparc/include/uapi/asm/swab.h
new file mode 100644
index 0000000..6b1b3f1
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/swab.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_SWAB_H
+#define _SPARC_SWAB_H
+
+#include <linux/types.h>
+#include <asm/asi.h>
+
+#if defined(__sparc__) && defined(__arch64__)
+static inline __u16 __arch_swab16p(const __u16 *addr)
+{
+	__u16 ret;
+
+	__asm__ __volatile__ ("lduha [%2] %3, %0"
+			      : "=r" (ret)
+			      : "m" (*addr), "r" (addr), "i" (ASI_PL));
+	return ret;
+}
+#define __arch_swab16p __arch_swab16p
+
+static inline __u32 __arch_swab32p(const __u32 *addr)
+{
+	__u32 ret;
+
+	__asm__ __volatile__ ("lduwa [%2] %3, %0"
+			      : "=r" (ret)
+			      : "m" (*addr), "r" (addr), "i" (ASI_PL));
+	return ret;
+}
+#define __arch_swab32p __arch_swab32p
+
+static inline __u64 __arch_swab64p(const __u64 *addr)
+{
+	__u64 ret;
+
+	__asm__ __volatile__ ("ldxa [%2] %3, %0"
+			      : "=r" (ret)
+			      : "m" (*addr), "r" (addr), "i" (ASI_PL));
+	return ret;
+}
+#define __arch_swab64p __arch_swab64p
+
+#else
+#define __SWAB_64_THRU_32__
+#endif /* defined(__sparc__) && defined(__arch64__) */
+
+#endif /* _SPARC_SWAB_H */
diff --git a/arch/sparc/include/uapi/asm/termbits.h b/arch/sparc/include/uapi/asm/termbits.h
new file mode 100644
index 0000000..ce5ad5d
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/termbits.h
@@ -0,0 +1,264 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_SPARC_TERMBITS_H
+#define _UAPI_SPARC_TERMBITS_H
+
+#include <linux/posix_types.h>
+
+typedef unsigned char   cc_t;
+typedef unsigned int    speed_t;
+
+#if defined(__sparc__) && defined(__arch64__)
+typedef unsigned int    tcflag_t;
+#else
+typedef unsigned long   tcflag_t;
+#endif
+
+#define NCC 8
+struct termio {
+	unsigned short c_iflag;		/* input mode flags */
+	unsigned short c_oflag;		/* output mode flags */
+	unsigned short c_cflag;		/* control mode flags */
+	unsigned short c_lflag;		/* local mode flags */
+	unsigned char c_line;		/* line discipline */
+	unsigned char c_cc[NCC];	/* control characters */
+};
+
+#define NCCS 17
+struct termios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+#ifndef __KERNEL__
+	cc_t c_cc[NCCS];		/* control characters */
+#else
+	cc_t c_cc[NCCS+2];	/* kernel needs 2 more to hold vmin/vtime */
+#define SIZEOF_USER_TERMIOS sizeof (struct termios) - (2*sizeof (cc_t))
+#endif
+};
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS+2];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+struct ktermios {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_line;			/* line discipline */
+	cc_t c_cc[NCCS+2];		/* control characters */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
+/* c_cc characters */
+#define VINTR    0
+#define VQUIT    1
+#define VERASE   2
+#define VKILL    3
+#define VEOF     4
+#define VEOL     5
+#define VEOL2    6
+#define VSWTC    7
+#define VSTART   8
+#define VSTOP    9
+
+
+
+#define VSUSP    10
+#define VDSUSP   11  /* SunOS POSIX nicety I do believe... */
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE  14
+#define VLNEXT   15
+
+/* Kernel keeps vmin/vtime separated, user apps assume vmin/vtime is
+ * shared with eof/eol
+ */
+#ifndef __KERNEL__
+#define VMIN     VEOF
+#define VTIME    VEOL
+#endif
+
+/* c_iflag bits */
+#define IGNBRK	0x00000001
+#define BRKINT	0x00000002
+#define IGNPAR	0x00000004
+#define PARMRK	0x00000008
+#define INPCK	0x00000010
+#define ISTRIP	0x00000020
+#define INLCR	0x00000040
+#define IGNCR	0x00000080
+#define ICRNL	0x00000100
+#define IUCLC	0x00000200
+#define IXON	0x00000400
+#define IXANY	0x00000800
+#define IXOFF	0x00001000
+#define IMAXBEL	0x00002000
+#define IUTF8   0x00004000
+
+/* c_oflag bits */
+#define OPOST	0x00000001
+#define OLCUC	0x00000002
+#define ONLCR	0x00000004
+#define OCRNL	0x00000008
+#define ONOCR	0x00000010
+#define ONLRET	0x00000020
+#define OFILL	0x00000040
+#define OFDEL	0x00000080
+#define NLDLY	0x00000100
+#define   NL0	0x00000000
+#define   NL1	0x00000100
+#define CRDLY	0x00000600
+#define   CR0	0x00000000
+#define   CR1	0x00000200
+#define   CR2	0x00000400
+#define   CR3	0x00000600
+#define TABDLY	0x00001800
+#define   TAB0	0x00000000
+#define   TAB1	0x00000800
+#define   TAB2	0x00001000
+#define   TAB3	0x00001800
+#define   XTABS	0x00001800
+#define BSDLY	0x00002000
+#define   BS0	0x00000000
+#define   BS1	0x00002000
+#define VTDLY	0x00004000
+#define   VT0	0x00000000
+#define   VT1	0x00004000
+#define FFDLY	0x00008000
+#define   FF0	0x00000000
+#define   FF1	0x00008000
+#define PAGEOUT 0x00010000  /* SUNOS specific */
+#define WRAP    0x00020000  /* SUNOS specific */
+
+/* c_cflag bit meaning */
+#define CBAUD	  0x0000100f
+#define  B0	  0x00000000   /* hang up */
+#define  B50	  0x00000001
+#define  B75	  0x00000002
+#define  B110	  0x00000003
+#define  B134	  0x00000004
+#define  B150	  0x00000005
+#define  B200	  0x00000006
+#define  B300	  0x00000007
+#define  B600	  0x00000008
+#define  B1200	  0x00000009
+#define  B1800	  0x0000000a
+#define  B2400	  0x0000000b
+#define  B4800	  0x0000000c
+#define  B9600	  0x0000000d
+#define  B19200	  0x0000000e
+#define  B38400	  0x0000000f
+#define EXTA      B19200
+#define EXTB      B38400
+#define  CSIZE    0x00000030
+#define   CS5	  0x00000000
+#define   CS6	  0x00000010
+#define   CS7	  0x00000020
+#define   CS8	  0x00000030
+#define CSTOPB	  0x00000040
+#define CREAD	  0x00000080
+#define PARENB	  0x00000100
+#define PARODD	  0x00000200
+#define HUPCL	  0x00000400
+#define CLOCAL	  0x00000800
+#define CBAUDEX   0x00001000
+/* We'll never see these speeds with the Zilogs, but for completeness... */
+#define  BOTHER   0x00001000
+#define  B57600   0x00001001
+#define  B115200  0x00001002
+#define  B230400  0x00001003
+#define  B460800  0x00001004
+/* This is what we can do with the Zilogs. */
+#define  B76800   0x00001005
+/* This is what we can do with the SAB82532. */
+#define  B153600  0x00001006
+#define  B307200  0x00001007
+#define  B614400  0x00001008
+#define  B921600  0x00001009
+/* And these are the rest... */
+#define  B500000  0x0000100a
+#define  B576000  0x0000100b
+#define B1000000  0x0000100c
+#define B1152000  0x0000100d
+#define B1500000  0x0000100e
+#define B2000000  0x0000100f
+/* These have totally bogus values and nobody uses them
+   so far. Later on we'd have to use say 0x10000x and
+   adjust CBAUD constant and drivers accordingly.
+#define B2500000  0x00001010
+#define B3000000  0x00001011
+#define B3500000  0x00001012
+#define B4000000  0x00001013  */
+#define CIBAUD	  0x100f0000  /* input baud rate (not used) */
+#define CMSPAR	  0x40000000  /* mark or space (stick) parity */
+#define CRTSCTS	  0x80000000  /* flow control */
+
+#define IBSHIFT	  16		/* Shift from CBAUD to CIBAUD */
+
+/* c_lflag bits */
+#define ISIG	0x00000001
+#define ICANON	0x00000002
+#define XCASE	0x00000004
+#define ECHO	0x00000008
+#define ECHOE	0x00000010
+#define ECHOK	0x00000020
+#define ECHONL	0x00000040
+#define NOFLSH	0x00000080
+#define TOSTOP	0x00000100
+#define ECHOCTL	0x00000200
+#define ECHOPRT	0x00000400
+#define ECHOKE	0x00000800
+#define DEFECHO 0x00001000  /* SUNOS thing, what is it? */
+#define FLUSHO	0x00002000
+#define PENDIN	0x00004000
+#define IEXTEN	0x00008000
+#define EXTPROC	0x00010000
+
+/* modem lines */
+#define TIOCM_LE	0x001
+#define TIOCM_DTR	0x002
+#define TIOCM_RTS	0x004
+#define TIOCM_ST	0x008
+#define TIOCM_SR	0x010
+#define TIOCM_CTS	0x020
+#define TIOCM_CAR	0x040
+#define TIOCM_RNG	0x080
+#define TIOCM_DSR	0x100
+#define TIOCM_CD	TIOCM_CAR
+#define TIOCM_RI	TIOCM_RNG
+#define TIOCM_OUT1	0x2000
+#define TIOCM_OUT2	0x4000
+#define TIOCM_LOOP	0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+#define TIOCSER_TEMT    0x01	/* Transmitter physically empty */
+
+
+/* tcflow() and TCXONC use these */
+#define	TCOOFF		0
+#define	TCOON		1
+#define	TCIOFF		2
+#define	TCION		3
+
+/* tcflush() and TCFLSH use these */
+#define	TCIFLUSH	0
+#define	TCOFLUSH	1
+#define	TCIOFLUSH	2
+
+/* tcsetattr uses these */
+#define	TCSANOW		0
+#define	TCSADRAIN	1
+#define	TCSAFLUSH	2
+
+#endif /* _UAPI_SPARC_TERMBITS_H */
diff --git a/arch/sparc/include/uapi/asm/termios.h b/arch/sparc/include/uapi/asm/termios.h
new file mode 100644
index 0000000..ee86f40
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/termios.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_SPARC_TERMIOS_H
+#define _UAPI_SPARC_TERMIOS_H
+
+#include <asm/ioctls.h>
+#include <asm/termbits.h>
+
+#if defined(__KERNEL__) || defined(__DEFINE_BSD_TERMIOS)
+struct sgttyb {
+	char	sg_ispeed;
+	char	sg_ospeed;
+	char	sg_erase;
+	char	sg_kill;
+	short	sg_flags;
+};
+
+struct tchars {
+	char	t_intrc;
+	char	t_quitc;
+	char	t_startc;
+	char	t_stopc;
+	char	t_eofc;
+	char	t_brkc;
+};
+
+struct ltchars {
+	char	t_suspc;
+	char	t_dsuspc;
+	char	t_rprntc;
+	char	t_flushc;
+	char	t_werasc;
+	char	t_lnextc;
+};
+#endif /* __KERNEL__ */
+
+struct winsize {
+	unsigned short ws_row;
+	unsigned short ws_col;
+	unsigned short ws_xpixel;
+	unsigned short ws_ypixel;
+};
+
+
+#endif /* _UAPI_SPARC_TERMIOS_H */
diff --git a/arch/sparc/include/uapi/asm/traps.h b/arch/sparc/include/uapi/asm/traps.h
new file mode 100644
index 0000000..930db74
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/traps.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * traps.h:  Format of entries for the Sparc trap table.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef _UAPI_SPARC_TRAPS_H
+#define _UAPI_SPARC_TRAPS_H
+
+#define NUM_SPARC_TRAPS  255
+
+#ifndef __ASSEMBLY__
+#endif /* !(__ASSEMBLY__) */
+
+/* For patching the trap table at boot time, we need to know how to
+ * form various common Sparc instructions.  Thus these macros...
+ */
+
+#define SPARC_MOV_CONST_L3(const) (0xa6102000 | (const&0xfff))
+
+/* The following assumes that the branch lies before the place we
+ * are branching to.  This is the case for a trap vector...
+ * You have been warned.
+ */
+#define SPARC_BRANCH(dest_addr, inst_addr) \
+          (0x10800000 | (((dest_addr-inst_addr)>>2)&0x3fffff))
+
+#define SPARC_RD_PSR_L0  (0xa1480000)
+#define SPARC_RD_WIM_L3  (0xa7500000)
+#define SPARC_NOP (0x01000000)
+
+/* Various interesting trap levels. */
+/* First, hardware traps. */
+#define SP_TRAP_TFLT    0x1          /* Text fault */
+#define SP_TRAP_II      0x2          /* Illegal Instruction */
+#define SP_TRAP_PI      0x3          /* Privileged Instruction */
+#define SP_TRAP_FPD     0x4          /* Floating Point Disabled */
+#define SP_TRAP_WOVF    0x5          /* Window Overflow */
+#define SP_TRAP_WUNF    0x6          /* Window Underflow */
+#define SP_TRAP_MNA     0x7          /* Memory Address Unaligned */
+#define SP_TRAP_FPE     0x8          /* Floating Point Exception */
+#define SP_TRAP_DFLT    0x9          /* Data Fault */
+#define SP_TRAP_TOF     0xa          /* Tag Overflow */
+#define SP_TRAP_WDOG    0xb          /* Watchpoint Detected */
+#define SP_TRAP_IRQ1    0x11         /* IRQ level 1 */
+#define SP_TRAP_IRQ2    0x12         /* IRQ level 2 */
+#define SP_TRAP_IRQ3    0x13         /* IRQ level 3 */
+#define SP_TRAP_IRQ4    0x14         /* IRQ level 4 */
+#define SP_TRAP_IRQ5    0x15         /* IRQ level 5 */
+#define SP_TRAP_IRQ6    0x16         /* IRQ level 6 */
+#define SP_TRAP_IRQ7    0x17         /* IRQ level 7 */
+#define SP_TRAP_IRQ8    0x18         /* IRQ level 8 */
+#define SP_TRAP_IRQ9    0x19         /* IRQ level 9 */
+#define SP_TRAP_IRQ10   0x1a         /* IRQ level 10 */
+#define SP_TRAP_IRQ11   0x1b         /* IRQ level 11 */
+#define SP_TRAP_IRQ12   0x1c         /* IRQ level 12 */
+#define SP_TRAP_IRQ13   0x1d         /* IRQ level 13 */
+#define SP_TRAP_IRQ14   0x1e         /* IRQ level 14 */
+#define SP_TRAP_IRQ15   0x1f         /* IRQ level 15 Non-maskable */
+#define SP_TRAP_RACC    0x20         /* Register Access Error ??? */
+#define SP_TRAP_IACC    0x21         /* Instruction Access Error */
+#define SP_TRAP_CPDIS   0x24         /* Co-Processor Disabled */
+#define SP_TRAP_BADFL   0x25         /* Unimplemented Flush Instruction */
+#define SP_TRAP_CPEXP   0x28         /* Co-Processor Exception */
+#define SP_TRAP_DACC    0x29         /* Data Access Error */
+#define SP_TRAP_DIVZ    0x2a         /* Divide By Zero */
+#define SP_TRAP_DSTORE  0x2b         /* Data Store Error ??? */
+#define SP_TRAP_DMM     0x2c         /* Data Access MMU Miss ??? */
+#define SP_TRAP_IMM     0x3c         /* Instruction Access MMU Miss ??? */
+
+/* Now the Software Traps... */
+#define SP_TRAP_SUNOS   0x80         /* SunOS System Call */
+#define SP_TRAP_SBPT    0x81         /* Software Breakpoint */
+#define SP_TRAP_SDIVZ   0x82         /* Software Divide-by-Zero trap */
+#define SP_TRAP_FWIN    0x83         /* Flush Windows */
+#define SP_TRAP_CWIN    0x84         /* Clean Windows */
+#define SP_TRAP_RCHK    0x85         /* Range Check */
+#define SP_TRAP_FUNA    0x86         /* Fix Unaligned Access */
+#define SP_TRAP_IOWFL   0x87         /* Integer Overflow */
+#define SP_TRAP_SOLARIS 0x88         /* Solaris System Call */
+#define SP_TRAP_NETBSD  0x89         /* NetBSD System Call */
+#define SP_TRAP_LINUX   0x90         /* Linux System Call */
+
+/* Names used for compatibility with SunOS */
+#define ST_SYSCALL              0x00
+#define ST_BREAKPOINT           0x01
+#define ST_DIV0                 0x02
+#define ST_FLUSH_WINDOWS        0x03
+#define ST_CLEAN_WINDOWS        0x04
+#define ST_RANGE_CHECK          0x05
+#define ST_FIX_ALIGN            0x06
+#define ST_INT_OVERFLOW         0x07
+
+/* Special traps... */
+#define SP_TRAP_KBPT1   0xfe         /* KADB/PROM Breakpoint one */
+#define SP_TRAP_KBPT2   0xff         /* KADB/PROM Breakpoint two */
+
+/* Handy Macros */
+/* Is this a trap we never expect to get? */
+#define BAD_TRAP_P(level) \
+        ((level > SP_TRAP_WDOG && level < SP_TRAP_IRQ1) || \
+	 (level > SP_TRAP_IACC && level < SP_TRAP_CPDIS) || \
+	 (level > SP_TRAP_BADFL && level < SP_TRAP_CPEXP) || \
+	 (level > SP_TRAP_DMM && level < SP_TRAP_IMM) || \
+	 (level > SP_TRAP_IMM && level < SP_TRAP_SUNOS) || \
+	 (level > SP_TRAP_LINUX && level < SP_TRAP_KBPT1))
+
+/* Is this a Hardware trap? */
+#define HW_TRAP_P(level) ((level > 0) && (level < SP_TRAP_SUNOS))
+
+/* Is this a Software trap? */
+#define SW_TRAP_P(level) ((level >= SP_TRAP_SUNOS) && (level <= SP_TRAP_KBPT2))
+
+/* Is this a system call for some OS we know about? */
+#define SCALL_TRAP_P(level) ((level == SP_TRAP_SUNOS) || \
+			     (level == SP_TRAP_SOLARIS) || \
+			     (level == SP_TRAP_NETBSD) || \
+			     (level == SP_TRAP_LINUX))
+
+#endif /* _UAPI_SPARC_TRAPS_H */
diff --git a/arch/sparc/include/uapi/asm/uctx.h b/arch/sparc/include/uapi/asm/uctx.h
new file mode 100644
index 0000000..13a1319
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/uctx.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * uctx.h: Sparc64 {set,get}context() register state layouts.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#ifndef __SPARC64_UCTX_H
+#define __SPARC64_UCTX_H
+
+#define MC_TSTATE	0
+#define MC_PC		1
+#define MC_NPC		2
+#define MC_Y		3
+#define MC_G1		4
+#define MC_G2		5
+#define MC_G3		6
+#define MC_G4		7
+#define MC_G5		8
+#define MC_G6		9
+#define MC_G7		10
+#define MC_O0		11
+#define MC_O1		12
+#define MC_O2		13
+#define MC_O3		14
+#define MC_O4		15
+#define MC_O5		16
+#define MC_O6		17
+#define MC_O7		18
+#define MC_NGREG	19
+
+typedef unsigned long mc_greg_t;
+typedef mc_greg_t mc_gregset_t[MC_NGREG];
+
+#define MC_MAXFPQ	16
+struct mc_fq {
+	unsigned long	*mcfq_addr;
+	unsigned int	mcfq_insn;
+};
+
+struct mc_fpu {
+	union {
+		unsigned int	sregs[32];
+		unsigned long	dregs[32];
+		long double	qregs[16];
+	} mcfpu_fregs;
+	unsigned long	mcfpu_fsr;
+	unsigned long	mcfpu_fprs;
+	unsigned long	mcfpu_gsr;
+	struct mc_fq	*mcfpu_fq;
+	unsigned char	mcfpu_qcnt;
+	unsigned char	mcfpu_qentsz;
+	unsigned char	mcfpu_enab;
+};
+typedef struct mc_fpu mc_fpu_t;
+
+typedef struct {
+	mc_gregset_t	mc_gregs;
+	mc_greg_t	mc_fp;
+	mc_greg_t	mc_i7;
+	mc_fpu_t	mc_fpregs;
+} mcontext_t;
+
+struct ucontext {
+	struct ucontext		*uc_link;
+	unsigned long		uc_flags;
+	sigset_t		uc_sigmask;
+	mcontext_t		uc_mcontext;
+};
+typedef struct ucontext ucontext_t;
+
+#endif /* __SPARC64_UCTX_H */
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
new file mode 100644
index 0000000..45b4bf1
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -0,0 +1,453 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * System calls under the Sparc.
+ *
+ * Don't be scared by the ugly clobbers, it is the only way I can
+ * think of right now to force the arguments into fixed registers
+ * before the trap into the system call with gcc 'asm' statements.
+ *
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+ *
+ * SunOS compatibility based upon preliminary work which is:
+ *
+ * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
+ */
+#ifndef _UAPI_SPARC_UNISTD_H
+#define _UAPI_SPARC_UNISTD_H
+
+#ifndef __32bit_syscall_numbers__
+#ifndef __arch64__
+#define __32bit_syscall_numbers__
+#endif
+#endif
+
+#define __NR_restart_syscall      0 /* Linux Specific				   */
+#define __NR_exit                 1 /* Common                                      */
+#define __NR_fork                 2 /* Common                                      */
+#define __NR_read                 3 /* Common                                      */
+#define __NR_write                4 /* Common                                      */
+#define __NR_open                 5 /* Common                                      */
+#define __NR_close                6 /* Common                                      */
+#define __NR_wait4                7 /* Common                                      */
+#define __NR_creat                8 /* Common                                      */
+#define __NR_link                 9 /* Common                                      */
+#define __NR_unlink              10 /* Common                                      */
+#define __NR_execv               11 /* SunOS Specific                              */
+#define __NR_chdir               12 /* Common                                      */
+#define __NR_chown		 13 /* Common					   */
+#define __NR_mknod               14 /* Common                                      */
+#define __NR_chmod               15 /* Common                                      */
+#define __NR_lchown              16 /* Common                                      */
+#define __NR_brk                 17 /* Common                                      */
+#define __NR_perfctr             18 /* Performance counter operations              */
+#define __NR_lseek               19 /* Common                                      */
+#define __NR_getpid              20 /* Common                                      */
+#define __NR_capget		 21 /* Linux Specific				   */
+#define __NR_capset		 22 /* Linux Specific				   */
+#define __NR_setuid              23 /* Implemented via setreuid in SunOS           */
+#define __NR_getuid              24 /* Common                                      */
+#define __NR_vmsplice	         25 /* ENOSYS under SunOS			   */
+#define __NR_ptrace              26 /* Common                                      */
+#define __NR_alarm               27 /* Implemented via setitimer in SunOS          */
+#define __NR_sigaltstack	 28 /* Common					   */
+#define __NR_pause               29 /* Is sigblock(0)->sigpause() in SunOS         */
+#define __NR_utime               30 /* Implemented via utimes() under SunOS        */
+#ifdef __32bit_syscall_numbers__
+#define __NR_lchown32            31 /* Linux sparc32 specific                      */
+#define __NR_fchown32            32 /* Linux sparc32 specific                      */
+#endif
+#define __NR_access              33 /* Common                                      */
+#define __NR_nice                34 /* Implemented via get/setpriority() in SunOS  */
+#ifdef __32bit_syscall_numbers__
+#define __NR_chown32             35 /* Linux sparc32 specific                      */
+#endif
+#define __NR_sync                36 /* Common                                      */
+#define __NR_kill                37 /* Common                                      */
+#define __NR_stat                38 /* Common                                      */
+#define __NR_sendfile		 39 /* Linux Specific				   */
+#define __NR_lstat               40 /* Common                                      */
+#define __NR_dup                 41 /* Common                                      */
+#define __NR_pipe                42 /* Common                                      */
+#define __NR_times               43 /* Implemented via getrusage() in SunOS        */
+#ifdef __32bit_syscall_numbers__
+#define __NR_getuid32            44 /* Linux sparc32 specific                      */
+#endif
+#define __NR_umount2             45 /* Linux Specific                              */
+#define __NR_setgid              46 /* Implemented via setregid() in SunOS         */
+#define __NR_getgid              47 /* Common                                      */
+#define __NR_signal              48 /* Implemented via sigvec() in SunOS           */
+#define __NR_geteuid             49 /* SunOS calls getuid()                        */
+#define __NR_getegid             50 /* SunOS calls getgid()                        */
+#define __NR_acct                51 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_getgid32            53 /* Linux sparc32 specific                      */
+#else
+#define __NR_memory_ordering	 52 /* Linux Specific				   */
+#endif
+#define __NR_ioctl               54 /* Common                                      */
+#define __NR_reboot              55 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_mmap2		 56 /* Linux sparc32 Specific			   */
+#endif
+#define __NR_symlink             57 /* Common                                      */
+#define __NR_readlink            58 /* Common                                      */
+#define __NR_execve              59 /* Common                                      */
+#define __NR_umask               60 /* Common                                      */
+#define __NR_chroot              61 /* Common                                      */
+#define __NR_fstat               62 /* Common                                      */
+#define __NR_fstat64		 63 /* Linux Specific			           */
+#define __NR_getpagesize         64 /* Common                                      */
+#define __NR_msync               65 /* Common in newer 1.3.x revs...               */
+#define __NR_vfork               66 /* Common                                      */
+#define __NR_pread64             67 /* Linux Specific                              */
+#define __NR_pwrite64            68 /* Linux Specific                              */
+#ifdef __32bit_syscall_numbers__
+#define __NR_geteuid32           69 /* Linux sparc32, sbrk under SunOS             */
+#define __NR_getegid32           70 /* Linux sparc32, sstk under SunOS             */
+#endif
+#define __NR_mmap                71 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_setreuid32          72 /* Linux sparc32, vadvise under SunOS          */
+#endif
+#define __NR_munmap              73 /* Common                                      */
+#define __NR_mprotect            74 /* Common                                      */
+#define __NR_madvise             75 /* Common                                      */
+#define __NR_vhangup             76 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_truncate64		 77 /* Linux sparc32 Specific			   */
+#endif
+#define __NR_mincore             78 /* Common                                      */
+#define __NR_getgroups           79 /* Common                                      */
+#define __NR_setgroups           80 /* Common                                      */
+#define __NR_getpgrp             81 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_setgroups32         82 /* Linux sparc32, setpgrp under SunOS          */
+#endif
+#define __NR_setitimer           83 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_ftruncate64	 84 /* Linux sparc32 Specific			   */
+#endif
+#define __NR_swapon              85 /* Common                                      */
+#define __NR_getitimer           86 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_setuid32            87 /* Linux sparc32, gethostname under SunOS      */
+#endif
+#define __NR_sethostname         88 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_setgid32            89 /* Linux sparc32, getdtablesize under SunOS    */
+#endif
+#define __NR_dup2                90 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_setfsuid32          91 /* Linux sparc32, getdopt under SunOS          */
+#endif
+#define __NR_fcntl               92 /* Common                                      */
+#define __NR_select              93 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_setfsgid32          94 /* Linux sparc32, setdopt under SunOS          */
+#endif
+#define __NR_fsync               95 /* Common                                      */
+#define __NR_setpriority         96 /* Common                                      */
+#define __NR_socket              97 /* Common                                      */
+#define __NR_connect             98 /* Common                                      */
+#define __NR_accept              99 /* Common                                      */
+#define __NR_getpriority        100 /* Common                                      */
+#define __NR_rt_sigreturn       101 /* Linux Specific                              */
+#define __NR_rt_sigaction       102 /* Linux Specific                              */
+#define __NR_rt_sigprocmask     103 /* Linux Specific                              */
+#define __NR_rt_sigpending      104 /* Linux Specific                              */
+#define __NR_rt_sigtimedwait    105 /* Linux Specific                              */
+#define __NR_rt_sigqueueinfo    106 /* Linux Specific                              */
+#define __NR_rt_sigsuspend      107 /* Linux Specific                              */
+#ifdef __32bit_syscall_numbers__
+#define __NR_setresuid32        108 /* Linux Specific, sigvec under SunOS	   */
+#define __NR_getresuid32        109 /* Linux Specific, sigblock under SunOS	   */
+#define __NR_setresgid32        110 /* Linux Specific, sigsetmask under SunOS	   */
+#define __NR_getresgid32        111 /* Linux Specific, sigpause under SunOS	   */
+#define __NR_setregid32         112 /* Linux sparc32, sigstack under SunOS         */
+#else
+#define __NR_setresuid          108 /* Linux Specific, sigvec under SunOS	   */
+#define __NR_getresuid          109 /* Linux Specific, sigblock under SunOS	   */
+#define __NR_setresgid          110 /* Linux Specific, sigsetmask under SunOS	   */
+#define __NR_getresgid          111 /* Linux Specific, sigpause under SunOS	   */
+#endif
+#define __NR_recvmsg            113 /* Common                                      */
+#define __NR_sendmsg            114 /* Common                                      */
+#ifdef __32bit_syscall_numbers__
+#define __NR_getgroups32        115 /* Linux sparc32, vtrace under SunOS           */
+#endif
+#define __NR_gettimeofday       116 /* Common                                      */
+#define __NR_getrusage          117 /* Common                                      */
+#define __NR_getsockopt         118 /* Common                                      */
+#define __NR_getcwd		119 /* Linux Specific				   */
+#define __NR_readv              120 /* Common                                      */
+#define __NR_writev             121 /* Common                                      */
+#define __NR_settimeofday       122 /* Common                                      */
+#define __NR_fchown             123 /* Common                                      */
+#define __NR_fchmod             124 /* Common                                      */
+#define __NR_recvfrom           125 /* Common                                      */
+#define __NR_setreuid           126 /* Common                                      */
+#define __NR_setregid           127 /* Common                                      */
+#define __NR_rename             128 /* Common                                      */
+#define __NR_truncate           129 /* Common                                      */
+#define __NR_ftruncate          130 /* Common                                      */
+#define __NR_flock              131 /* Common                                      */
+#define __NR_lstat64		132 /* Linux Specific			           */
+#define __NR_sendto             133 /* Common                                      */
+#define __NR_shutdown           134 /* Common                                      */
+#define __NR_socketpair         135 /* Common                                      */
+#define __NR_mkdir              136 /* Common                                      */
+#define __NR_rmdir              137 /* Common                                      */
+#define __NR_utimes             138 /* SunOS Specific                              */
+#define __NR_stat64		139 /* Linux Specific			           */
+#define __NR_sendfile64         140 /* adjtime under SunOS                         */
+#define __NR_getpeername        141 /* Common                                      */
+#define __NR_futex              142 /* gethostid under SunOS                       */
+#define __NR_gettid             143 /* ENOSYS under SunOS                          */
+#define __NR_getrlimit		144 /* Common                                      */
+#define __NR_setrlimit          145 /* Common                                      */
+#define __NR_pivot_root		146 /* Linux Specific, killpg under SunOS          */
+#define __NR_prctl		147 /* ENOSYS under SunOS                          */
+#define __NR_pciconfig_read	148 /* ENOSYS under SunOS                          */
+#define __NR_pciconfig_write	149 /* ENOSYS under SunOS                          */
+#define __NR_getsockname        150 /* Common                                      */
+#define __NR_inotify_init       151 /* Linux specific                              */
+#define __NR_inotify_add_watch  152 /* Linux specific                              */
+#define __NR_poll               153 /* Common                                      */
+#define __NR_getdents64		154 /* Linux specific				   */
+#ifdef __32bit_syscall_numbers__
+#define __NR_fcntl64		155 /* Linux sparc32 Specific                      */
+#endif
+#define __NR_inotify_rm_watch   156 /* Linux specific				   */
+#define __NR_statfs             157 /* Common                                      */
+#define __NR_fstatfs            158 /* Common                                      */
+#define __NR_umount             159 /* Common                                      */
+#define __NR_sched_set_affinity 160 /* Linux specific, async_daemon under SunOS    */
+#define __NR_sched_get_affinity 161 /* Linux specific, getfh under SunOS           */
+#define __NR_getdomainname      162 /* SunOS Specific                              */
+#define __NR_setdomainname      163 /* Common                                      */
+#ifndef __32bit_syscall_numbers__
+#define __NR_utrap_install	164 /* SYSV ABI/v9 required			   */
+#endif
+#define __NR_quotactl           165 /* Common                                      */
+#define __NR_set_tid_address    166 /* Linux specific, exportfs under SunOS        */
+#define __NR_mount              167 /* Common                                      */
+#define __NR_ustat              168 /* Common                                      */
+#define __NR_setxattr           169 /* SunOS: semsys                               */
+#define __NR_lsetxattr          170 /* SunOS: msgsys                               */
+#define __NR_fsetxattr          171 /* SunOS: shmsys                               */
+#define __NR_getxattr           172 /* SunOS: auditsys                             */
+#define __NR_lgetxattr          173 /* SunOS: rfssys                               */
+#define __NR_getdents           174 /* Common                                      */
+#define __NR_setsid             175 /* Common                                      */
+#define __NR_fchdir             176 /* Common                                      */
+#define __NR_fgetxattr          177 /* SunOS: fchroot                              */
+#define __NR_listxattr          178 /* SunOS: vpixsys                              */
+#define __NR_llistxattr         179 /* SunOS: aioread                              */
+#define __NR_flistxattr         180 /* SunOS: aiowrite                             */
+#define __NR_removexattr        181 /* SunOS: aiowait                              */
+#define __NR_lremovexattr       182 /* SunOS: aiocancel                            */
+#define __NR_sigpending         183 /* Common                                      */
+#define __NR_query_module	184 /* Linux Specific				   */
+#define __NR_setpgid            185 /* Common                                      */
+#define __NR_fremovexattr       186 /* SunOS: pathconf                             */
+#define __NR_tkill              187 /* SunOS: fpathconf                            */
+#define __NR_exit_group		188 /* Linux specific, sysconf undef SunOS         */
+#define __NR_uname              189 /* Linux Specific                              */
+#define __NR_init_module        190 /* Linux Specific                              */
+#define __NR_personality        191 /* Linux Specific                              */
+#define __NR_remap_file_pages   192 /* Linux Specific                              */
+#define __NR_epoll_create       193 /* Linux Specific                              */
+#define __NR_epoll_ctl          194 /* Linux Specific                              */
+#define __NR_epoll_wait         195 /* Linux Specific                              */
+#define __NR_ioprio_set         196 /* Linux Specific                              */
+#define __NR_getppid            197 /* Linux Specific                              */
+#define __NR_sigaction          198 /* Linux Specific                              */
+#define __NR_sgetmask           199 /* Linux Specific                              */
+#define __NR_ssetmask           200 /* Linux Specific                              */
+#define __NR_sigsuspend         201 /* Linux Specific                              */
+#define __NR_oldlstat           202 /* Linux Specific                              */
+#define __NR_uselib             203 /* Linux Specific                              */
+#define __NR_readdir            204 /* Linux Specific                              */
+#define __NR_readahead          205 /* Linux Specific                              */
+#define __NR_socketcall         206 /* Linux Specific                              */
+#define __NR_syslog             207 /* Linux Specific                              */
+#define __NR_lookup_dcookie     208 /* Linux Specific                              */
+#define __NR_fadvise64          209 /* Linux Specific                              */
+#define __NR_fadvise64_64       210 /* Linux Specific                              */
+#define __NR_tgkill             211 /* Linux Specific                              */
+#define __NR_waitpid            212 /* Linux Specific                              */
+#define __NR_swapoff            213 /* Linux Specific                              */
+#define __NR_sysinfo            214 /* Linux Specific                              */
+#define __NR_ipc                215 /* Linux Specific                              */
+#define __NR_sigreturn          216 /* Linux Specific                              */
+#define __NR_clone              217 /* Linux Specific                              */
+#define __NR_ioprio_get         218 /* Linux Specific                              */
+#define __NR_adjtimex           219 /* Linux Specific                              */
+#define __NR_sigprocmask        220 /* Linux Specific                              */
+#define __NR_create_module      221 /* Linux Specific                              */
+#define __NR_delete_module      222 /* Linux Specific                              */
+#define __NR_get_kernel_syms    223 /* Linux Specific                              */
+#define __NR_getpgid            224 /* Linux Specific                              */
+#define __NR_bdflush            225 /* Linux Specific                              */
+#define __NR_sysfs              226 /* Linux Specific                              */
+#define __NR_afs_syscall        227 /* Linux Specific                              */
+#define __NR_setfsuid           228 /* Linux Specific                              */
+#define __NR_setfsgid           229 /* Linux Specific                              */
+#define __NR__newselect         230 /* Linux Specific                              */
+#ifdef __32bit_syscall_numbers__
+#define __NR_time               231 /* Linux Specific                              */
+#else
+#endif
+#define __NR_splice             232 /* Linux Specific                              */
+#define __NR_stime              233 /* Linux Specific                              */
+#define __NR_statfs64           234 /* Linux Specific                              */
+#define __NR_fstatfs64          235 /* Linux Specific                              */
+#define __NR__llseek            236 /* Linux Specific                              */
+#define __NR_mlock              237
+#define __NR_munlock            238
+#define __NR_mlockall           239
+#define __NR_munlockall         240
+#define __NR_sched_setparam     241
+#define __NR_sched_getparam     242
+#define __NR_sched_setscheduler 243
+#define __NR_sched_getscheduler 244
+#define __NR_sched_yield        245
+#define __NR_sched_get_priority_max 246
+#define __NR_sched_get_priority_min 247
+#define __NR_sched_rr_get_interval  248
+#define __NR_nanosleep          249
+#define __NR_mremap             250
+#define __NR__sysctl            251
+#define __NR_getsid             252
+#define __NR_fdatasync          253
+#define __NR_nfsservctl         254
+#define __NR_sync_file_range	255
+#define __NR_clock_settime	256
+#define __NR_clock_gettime	257
+#define __NR_clock_getres	258
+#define __NR_clock_nanosleep	259
+#define __NR_sched_getaffinity	260
+#define __NR_sched_setaffinity	261
+#define __NR_timer_settime	262
+#define __NR_timer_gettime	263
+#define __NR_timer_getoverrun	264
+#define __NR_timer_delete	265
+#define __NR_timer_create	266
+/* #define __NR_vserver		267 Reserved for VSERVER */
+#define __NR_io_setup		268
+#define __NR_io_destroy		269
+#define __NR_io_submit		270
+#define __NR_io_cancel		271
+#define __NR_io_getevents	272
+#define __NR_mq_open		273
+#define __NR_mq_unlink		274
+#define __NR_mq_timedsend	275
+#define __NR_mq_timedreceive	276
+#define __NR_mq_notify		277
+#define __NR_mq_getsetattr	278
+#define __NR_waitid		279
+#define __NR_tee		280
+#define __NR_add_key		281
+#define __NR_request_key	282
+#define __NR_keyctl		283
+#define __NR_openat		284
+#define __NR_mkdirat		285
+#define __NR_mknodat		286
+#define __NR_fchownat		287
+#define __NR_futimesat		288
+#define __NR_fstatat64		289
+#define __NR_unlinkat		290
+#define __NR_renameat		291
+#define __NR_linkat		292
+#define __NR_symlinkat		293
+#define __NR_readlinkat		294
+#define __NR_fchmodat		295
+#define __NR_faccessat		296
+#define __NR_pselect6		297
+#define __NR_ppoll		298
+#define __NR_unshare		299
+#define __NR_set_robust_list	300
+#define __NR_get_robust_list	301
+#define __NR_migrate_pages	302
+#define __NR_mbind		303
+#define __NR_get_mempolicy	304
+#define __NR_set_mempolicy	305
+#define __NR_kexec_load		306
+#define __NR_move_pages		307
+#define __NR_getcpu		308
+#define __NR_epoll_pwait	309
+#define __NR_utimensat		310
+#define __NR_signalfd		311
+#define __NR_timerfd_create	312
+#define __NR_eventfd		313
+#define __NR_fallocate		314
+#define __NR_timerfd_settime	315
+#define __NR_timerfd_gettime	316
+#define __NR_signalfd4		317
+#define __NR_eventfd2		318
+#define __NR_epoll_create1	319
+#define __NR_dup3		320
+#define __NR_pipe2		321
+#define __NR_inotify_init1	322
+#define __NR_accept4		323
+#define __NR_preadv		324
+#define __NR_pwritev		325
+#define __NR_rt_tgsigqueueinfo	326
+#define __NR_perf_event_open	327
+#define __NR_recvmmsg		328
+#define __NR_fanotify_init	329
+#define __NR_fanotify_mark	330
+#define __NR_prlimit64		331
+#define __NR_name_to_handle_at	332
+#define __NR_open_by_handle_at	333
+#define __NR_clock_adjtime	334
+#define __NR_syncfs		335
+#define __NR_sendmmsg		336
+#define __NR_setns		337
+#define __NR_process_vm_readv	338
+#define __NR_process_vm_writev	339
+#define __NR_kern_features	340
+#define __NR_kcmp		341
+#define __NR_finit_module	342
+#define __NR_sched_setattr	343
+#define __NR_sched_getattr	344
+#define __NR_renameat2		345
+#define __NR_seccomp		346
+#define __NR_getrandom		347
+#define __NR_memfd_create	348
+#define __NR_bpf		349
+#define __NR_execveat		350
+#define __NR_membarrier		351
+#define __NR_userfaultfd	352
+#define __NR_bind		353
+#define __NR_listen		354
+#define __NR_setsockopt		355
+#define __NR_mlock2		356
+#define __NR_copy_file_range	357
+#define __NR_preadv2		358
+#define __NR_pwritev2		359
+#define __NR_statx		360
+#define __NR_io_pgetevents	361
+
+#define NR_syscalls		362
+
+/* Bitmask values returned from kern_features system call.  */
+#define KERN_FEATURE_MIXED_MODE_STACK	0x00000001
+
+#ifdef __32bit_syscall_numbers__
+/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
+ * it never had the plain ones and there is no value to adding those
+ * old versions into the syscall table.
+ */
+#define __IGNORE_setresuid
+#define __IGNORE_getresuid
+#define __IGNORE_setresgid
+#define __IGNORE_getresgid
+#endif
+
+/* Sparc doesn't have protection keys. */
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
+
+#endif /* _UAPI_SPARC_UNISTD_H */
diff --git a/arch/sparc/include/uapi/asm/utrap.h b/arch/sparc/include/uapi/asm/utrap.h
new file mode 100644
index 0000000..d890b7f
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/utrap.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * include/asm/utrap.h
+ *
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#ifndef __ASM_SPARC64_UTRAP_H
+#define __ASM_SPARC64_UTRAP_H
+
+#define UT_INSTRUCTION_EXCEPTION		1
+#define UT_INSTRUCTION_ERROR			2
+#define UT_INSTRUCTION_PROTECTION		3
+#define UT_ILLTRAP_INSTRUCTION			4
+#define UT_ILLEGAL_INSTRUCTION			5
+#define UT_PRIVILEGED_OPCODE			6
+#define UT_FP_DISABLED				7
+#define UT_FP_EXCEPTION_IEEE_754		8
+#define UT_FP_EXCEPTION_OTHER			9
+#define UT_TAG_OVERVIEW				10
+#define UT_DIVISION_BY_ZERO			11
+#define UT_DATA_EXCEPTION			12
+#define UT_DATA_ERROR				13
+#define UT_DATA_PROTECTION			14
+#define UT_MEM_ADDRESS_NOT_ALIGNED		15
+#define UT_PRIVILEGED_ACTION			16
+#define UT_ASYNC_DATA_ERROR			17
+#define UT_TRAP_INSTRUCTION_16			18
+#define UT_TRAP_INSTRUCTION_17			19
+#define UT_TRAP_INSTRUCTION_18			20
+#define UT_TRAP_INSTRUCTION_19			21
+#define UT_TRAP_INSTRUCTION_20			22
+#define UT_TRAP_INSTRUCTION_21			23
+#define UT_TRAP_INSTRUCTION_22			24
+#define UT_TRAP_INSTRUCTION_23			25
+#define UT_TRAP_INSTRUCTION_24			26
+#define UT_TRAP_INSTRUCTION_25			27
+#define UT_TRAP_INSTRUCTION_26			28
+#define UT_TRAP_INSTRUCTION_27			29
+#define UT_TRAP_INSTRUCTION_28			30
+#define UT_TRAP_INSTRUCTION_29			31
+#define UT_TRAP_INSTRUCTION_30			32
+#define UT_TRAP_INSTRUCTION_31			33
+
+#define	UTH_NOCHANGE				(-1)
+
+#ifndef __ASSEMBLY__
+typedef int utrap_entry_t;
+typedef void *utrap_handler_t;
+#endif /* __ASSEMBLY__ */
+
+#endif /* !(__ASM_SPARC64_PROCESSOR_H) */
diff --git a/arch/sparc/include/uapi/asm/watchdog.h b/arch/sparc/include/uapi/asm/watchdog.h
new file mode 100644
index 0000000..497ac19
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/watchdog.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * watchdog - Driver interface for the hardware watchdog timers
+ * present on Sun Microsystems boardsets
+ *
+ * Copyright (c) 2000 Eric Brower <ebrower@usa.net>
+ *
+ */
+
+#ifndef _SPARC64_WATCHDOG_H
+#define _SPARC64_WATCHDOG_H
+
+#include <linux/watchdog.h>
+
+/* Solaris compatibility ioctls--
+ * Ref. <linux/watchdog.h> for standard linux watchdog ioctls
+ */
+#define WIOCSTART _IO (WATCHDOG_IOCTL_BASE, 10)		/* Start Timer		*/
+#define WIOCSTOP  _IO (WATCHDOG_IOCTL_BASE, 11)		/* Stop Timer		*/
+#define WIOCGSTAT _IOR(WATCHDOG_IOCTL_BASE, 12, int)/* Get Timer Status	*/
+
+/* Status flags from WIOCGSTAT ioctl
+ */
+#define WD_FREERUN	0x01	/* timer is running, interrupts disabled	*/
+#define WD_EXPIRED	0x02	/* timer has expired						*/
+#define WD_RUNNING	0x04	/* timer is running, interrupts enabled		*/
+#define WD_STOPPED	0x08	/* timer has not been started				*/
+#define WD_SERVICED 0x10	/* timer interrupt was serviced				*/
+
+#endif /* ifndef _SPARC64_WATCHDOG_H */
+
diff --git a/arch/sparc/kernel/.gitignore b/arch/sparc/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/sparc/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
new file mode 100644
index 0000000..cf86408
--- /dev/null
+++ b/arch/sparc/kernel/Makefile
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Makefile for the linux kernel.
+#
+
+asflags-y := -ansi
+ccflags-y := -Werror
+
+extra-y     := head_$(BITS).o
+
+# Undefine sparc when processing vmlinux.lds - it is used
+# And teach CPP we are doing $(BITS) builds (for this case)
+CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS)
+extra-y              += vmlinux.lds
+
+ifdef CONFIG_FUNCTION_TRACER
+# Do not profile debug and lowlevel utilities
+CFLAGS_REMOVE_ftrace.o := -pg
+CFLAGS_REMOVE_time_$(BITS).o := -pg
+CFLAGS_REMOVE_perf_event.o := -pg
+CFLAGS_REMOVE_pcr.o := -pg
+endif
+
+obj-$(CONFIG_SPARC64)   += urtt_fill.o
+obj-$(CONFIG_SPARC32)   += entry.o wof.o wuf.o
+obj-$(CONFIG_SPARC32)   += etrap_32.o
+obj-$(CONFIG_SPARC32)   += rtrap_32.o
+obj-y                   += traps_$(BITS).o
+
+# IRQ
+obj-y                   += irq_$(BITS).o
+obj-$(CONFIG_SPARC32)   += sun4m_irq.o sun4d_irq.o
+
+obj-y                   += process_$(BITS).o
+obj-y                   += signal_$(BITS).o
+obj-y                   += sigutil_$(BITS).o
+obj-$(CONFIG_SPARC32)   += ioport.o
+obj-y                   += setup_$(BITS).o
+obj-y                   += idprom.o
+obj-y                   += sys_sparc_$(BITS).o
+obj-$(CONFIG_SPARC32)   += systbls_32.o
+obj-y                   += time_$(BITS).o
+obj-$(CONFIG_SPARC32)   += windows.o
+obj-y                   += cpu.o
+obj-$(CONFIG_SPARC64)	+= vdso.o
+obj-$(CONFIG_SPARC32)   += devices.o
+obj-y                   += ptrace_$(BITS).o
+obj-y                   += unaligned_$(BITS).o
+obj-y                   += una_asm_$(BITS).o
+obj-y                   += prom_common.o
+obj-y                   += prom_$(BITS).o
+obj-y                   += of_device_common.o
+obj-y                   += of_device_$(BITS).o
+obj-$(CONFIG_SPARC64)   += prom_irqtrans.o
+
+obj-$(CONFIG_SPARC32)   += leon_kernel.o
+obj-$(CONFIG_SPARC32)   += leon_pmc.o
+
+obj-$(CONFIG_SPARC64)   += reboot.o
+obj-$(CONFIG_SPARC64)   += sysfs.o
+obj-$(CONFIG_SPARC64)   += iommu.o iommu-common.o
+obj-$(CONFIG_SPARC64)   += central.o
+obj-$(CONFIG_SPARC64)   += starfire.o
+obj-$(CONFIG_SPARC64)   += power.o
+obj-$(CONFIG_SPARC64)   += sbus.o
+obj-$(CONFIG_SPARC64)   += ebus.o
+obj-$(CONFIG_SPARC64)   += visemul.o
+obj-$(CONFIG_SPARC64)   += hvapi.o
+obj-$(CONFIG_SPARC64)   += sstate.o
+obj-$(CONFIG_SPARC64)   += mdesc.o
+obj-$(CONFIG_SPARC64)   += adi_64.o
+obj-$(CONFIG_SPARC64)	+= pcr.o
+obj-$(CONFIG_SPARC64)	+= nmi.o
+obj-$(CONFIG_SPARC64_SMP) += cpumap.o
+
+obj-$(CONFIG_PCIC_PCI)    += pcic.o
+obj-$(CONFIG_LEON_PCI)    += leon_pci.o
+obj-$(CONFIG_SPARC_GRPCI2)+= leon_pci_grpci2.o
+obj-$(CONFIG_SPARC_GRPCI1)+= leon_pci_grpci1.o
+
+obj-$(CONFIG_SMP)         += trampoline_$(BITS).o smp_$(BITS).o
+obj-$(CONFIG_SPARC32_SMP) += sun4m_smp.o sun4d_smp.o leon_smp.o
+obj-$(CONFIG_SPARC64_SMP) += hvtramp.o
+
+obj-y                     += auxio_$(BITS).o
+obj-$(CONFIG_SUN_PM)      += apc.o pmc.o
+
+obj-$(CONFIG_MODULES)     += module.o
+obj-$(CONFIG_MODULES)     += sparc_ksyms.o
+obj-$(CONFIG_SPARC_LED)   += led.o
+obj-$(CONFIG_KGDB)        += kgdb_$(BITS).o
+
+
+obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+
+obj-$(CONFIG_EARLYFB) += btext.o
+obj-$(CONFIG_STACKTRACE)     += stacktrace.o
+# sparc64 PCI
+obj-$(CONFIG_SPARC64_PCI)    += pci.o pci_common.o psycho_common.o
+obj-$(CONFIG_SPARC64_PCI)    += pci_psycho.o pci_sabre.o pci_schizo.o
+obj-$(CONFIG_SPARC64_PCI)    += pci_sun4v.o pci_sun4v_asm.o pci_fire.o
+obj-$(CONFIG_SPARC64_PCI_MSI) += pci_msi.o
+
+obj-$(CONFIG_COMPAT)         += sys32.o sys_sparc32.o signal32.o
+
+obj-$(CONFIG_US3_MC)    += chmc.o
+
+obj-$(CONFIG_KPROBES)   += kprobes.o
+obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
+
+obj-$(CONFIG_AUDIT)     += audit.o
+audit--$(CONFIG_AUDIT)  := compat_audit.o
+obj-$(CONFIG_COMPAT)    += $(audit--y)
+
+pc--$(CONFIG_PERF_EVENTS) := perf_event.o
+obj-$(CONFIG_SPARC64)	+= $(pc--y)
+
+obj-$(CONFIG_UPROBES)	+= uprobes.o
+obj-$(CONFIG_SPARC64)	+= jump_label.o
diff --git a/arch/sparc/kernel/adi_64.c b/arch/sparc/kernel/adi_64.c
new file mode 100644
index 0000000..d0a2ac9
--- /dev/null
+++ b/arch/sparc/kernel/adi_64.c
@@ -0,0 +1,397 @@
+/* adi_64.c: support for ADI (Application Data Integrity) feature on
+ * sparc m7 and newer processors. This feature is also known as
+ * SSM (Silicon Secured Memory).
+ *
+ * Copyright (C) 2016 Oracle and/or its affiliates. All rights reserved.
+ * Author: Khalid Aziz (khalid.aziz@oracle.com)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+#include <asm/mdesc.h>
+#include <asm/adi_64.h>
+#include <asm/mmu_64.h>
+#include <asm/pgtable_64.h>
+
+/* Each page of storage for ADI tags can accommodate tags for 128
+ * pages. When ADI enabled pages are being swapped out, it would be
+ * prudent to allocate at least enough tag storage space to accommodate
+ * SWAPFILE_CLUSTER number of pages. Allocate enough tag storage to
+ * store tags for four SWAPFILE_CLUSTER pages to reduce need for
+ * further allocations for same vma.
+ */
+#define TAG_STORAGE_PAGES	8
+
+struct adi_config adi_state;
+EXPORT_SYMBOL(adi_state);
+
+/* mdesc_adi_init() : Parse machine description provided by the
+ *	hypervisor to detect ADI capabilities
+ *
+ * Hypervisor reports ADI capabilities of platform in "hwcap-list" property
+ * for "cpu" node. If the platform supports ADI, "hwcap-list" property
+ * contains the keyword "adp". If the platform supports ADI, "platform"
+ * node will contain "adp-blksz", "adp-nbits" and "ue-on-adp" properties
+ * to describe the ADI capabilities.
+ */
+void __init mdesc_adi_init(void)
+{
+	struct mdesc_handle *hp = mdesc_grab();
+	const char *prop;
+	u64 pn, *val;
+	int len;
+
+	if (!hp)
+		goto adi_not_found;
+
+	pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "cpu");
+	if (pn == MDESC_NODE_NULL)
+		goto adi_not_found;
+
+	prop = mdesc_get_property(hp, pn, "hwcap-list", &len);
+	if (!prop)
+		goto adi_not_found;
+
+	/*
+	 * Look for "adp" keyword in hwcap-list which would indicate
+	 * ADI support
+	 */
+	adi_state.enabled = false;
+	while (len) {
+		int plen;
+
+		if (!strcmp(prop, "adp")) {
+			adi_state.enabled = true;
+			break;
+		}
+
+		plen = strlen(prop) + 1;
+		prop += plen;
+		len -= plen;
+	}
+
+	if (!adi_state.enabled)
+		goto adi_not_found;
+
+	/* Find the ADI properties in "platform" node. If all ADI
+	 * properties are not found, ADI support is incomplete and
+	 * do not enable ADI in the kernel.
+	 */
+	pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+	if (pn == MDESC_NODE_NULL)
+		goto adi_not_found;
+
+	val = (u64 *) mdesc_get_property(hp, pn, "adp-blksz", &len);
+	if (!val)
+		goto adi_not_found;
+	adi_state.caps.blksz = *val;
+
+	val = (u64 *) mdesc_get_property(hp, pn, "adp-nbits", &len);
+	if (!val)
+		goto adi_not_found;
+	adi_state.caps.nbits = *val;
+
+	val = (u64 *) mdesc_get_property(hp, pn, "ue-on-adp", &len);
+	if (!val)
+		goto adi_not_found;
+	adi_state.caps.ue_on_adi = *val;
+
+	/* Some of the code to support swapping ADI tags is written
+	 * assumption that two ADI tags can fit inside one byte. If
+	 * this assumption is broken by a future architecture change,
+	 * that code will have to be revisited. If that were to happen,
+	 * disable ADI support so we do not get unpredictable results
+	 * with programs trying to use ADI and their pages getting
+	 * swapped out
+	 */
+	if (adi_state.caps.nbits > 4) {
+		pr_warn("WARNING: ADI tag size >4 on this platform. Disabling AADI support\n");
+		adi_state.enabled = false;
+	}
+
+	mdesc_release(hp);
+	return;
+
+adi_not_found:
+	adi_state.enabled = false;
+	adi_state.caps.blksz = 0;
+	adi_state.caps.nbits = 0;
+	if (hp)
+		mdesc_release(hp);
+}
+
+tag_storage_desc_t *find_tag_store(struct mm_struct *mm,
+				   struct vm_area_struct *vma,
+				   unsigned long addr)
+{
+	tag_storage_desc_t *tag_desc = NULL;
+	unsigned long i, max_desc, flags;
+
+	/* Check if this vma already has tag storage descriptor
+	 * allocated for it.
+	 */
+	max_desc = PAGE_SIZE/sizeof(tag_storage_desc_t);
+	if (mm->context.tag_store) {
+		tag_desc = mm->context.tag_store;
+		spin_lock_irqsave(&mm->context.tag_lock, flags);
+		for (i = 0; i < max_desc; i++) {
+			if ((addr >= tag_desc->start) &&
+			    ((addr + PAGE_SIZE - 1) <= tag_desc->end))
+				break;
+			tag_desc++;
+		}
+		spin_unlock_irqrestore(&mm->context.tag_lock, flags);
+
+		/* If no matching entries were found, this must be a
+		 * freshly allocated page
+		 */
+		if (i >= max_desc)
+			tag_desc = NULL;
+	}
+
+	return tag_desc;
+}
+
+tag_storage_desc_t *alloc_tag_store(struct mm_struct *mm,
+				    struct vm_area_struct *vma,
+				    unsigned long addr)
+{
+	unsigned char *tags;
+	unsigned long i, size, max_desc, flags;
+	tag_storage_desc_t *tag_desc, *open_desc;
+	unsigned long end_addr, hole_start, hole_end;
+
+	max_desc = PAGE_SIZE/sizeof(tag_storage_desc_t);
+	open_desc = NULL;
+	hole_start = 0;
+	hole_end = ULONG_MAX;
+	end_addr = addr + PAGE_SIZE - 1;
+
+	/* Check if this vma already has tag storage descriptor
+	 * allocated for it.
+	 */
+	spin_lock_irqsave(&mm->context.tag_lock, flags);
+	if (mm->context.tag_store) {
+		tag_desc = mm->context.tag_store;
+
+		/* Look for a matching entry for this address. While doing
+		 * that, look for the first open slot as well and find
+		 * the hole in already allocated range where this request
+		 * will fit in.
+		 */
+		for (i = 0; i < max_desc; i++) {
+			if (tag_desc->tag_users == 0) {
+				if (open_desc == NULL)
+					open_desc = tag_desc;
+			} else {
+				if ((addr >= tag_desc->start) &&
+				    (tag_desc->end >= (addr + PAGE_SIZE - 1))) {
+					tag_desc->tag_users++;
+					goto out;
+				}
+			}
+			if ((tag_desc->start > end_addr) &&
+			    (tag_desc->start < hole_end))
+				hole_end = tag_desc->start;
+			if ((tag_desc->end < addr) &&
+			    (tag_desc->end > hole_start))
+				hole_start = tag_desc->end;
+			tag_desc++;
+		}
+
+	} else {
+		size = sizeof(tag_storage_desc_t)*max_desc;
+		mm->context.tag_store = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN);
+		if (mm->context.tag_store == NULL) {
+			tag_desc = NULL;
+			goto out;
+		}
+		tag_desc = mm->context.tag_store;
+		for (i = 0; i < max_desc; i++, tag_desc++)
+			tag_desc->tag_users = 0;
+		open_desc = mm->context.tag_store;
+		i = 0;
+	}
+
+	/* Check if we ran out of tag storage descriptors */
+	if (open_desc == NULL) {
+		tag_desc = NULL;
+		goto out;
+	}
+
+	/* Mark this tag descriptor slot in use and then initialize it */
+	tag_desc = open_desc;
+	tag_desc->tag_users = 1;
+
+	/* Tag storage has not been allocated for this vma and space
+	 * is available in tag storage descriptor. Since this page is
+	 * being swapped out, there is high probability subsequent pages
+	 * in the VMA will be swapped out as well. Allocate pages to
+	 * store tags for as many pages in this vma as possible but not
+	 * more than TAG_STORAGE_PAGES. Each byte in tag space holds
+	 * two ADI tags since each ADI tag is 4 bits. Each ADI tag
+	 * covers adi_blksize() worth of addresses. Check if the hole is
+	 * big enough to accommodate full address range for using
+	 * TAG_STORAGE_PAGES number of tag pages.
+	 */
+	size = TAG_STORAGE_PAGES * PAGE_SIZE;
+	end_addr = addr + (size*2*adi_blksize()) - 1;
+	/* Check for overflow. If overflow occurs, allocate only one page */
+	if (end_addr < addr) {
+		size = PAGE_SIZE;
+		end_addr = addr + (size*2*adi_blksize()) - 1;
+		/* If overflow happens with the minimum tag storage
+		 * allocation as well, adjust ending address for this
+		 * tag storage.
+		 */
+		if (end_addr < addr)
+			end_addr = ULONG_MAX;
+	}
+	if (hole_end < end_addr) {
+		/* Available hole is too small on the upper end of
+		 * address. Can we expand the range towards the lower
+		 * address and maximize use of this slot?
+		 */
+		unsigned long tmp_addr;
+
+		end_addr = hole_end - 1;
+		tmp_addr = end_addr - (size*2*adi_blksize()) + 1;
+		/* Check for underflow. If underflow occurs, allocate
+		 * only one page for storing ADI tags
+		 */
+		if (tmp_addr > addr) {
+			size = PAGE_SIZE;
+			tmp_addr = end_addr - (size*2*adi_blksize()) - 1;
+			/* If underflow happens with the minimum tag storage
+			 * allocation as well, adjust starting address for
+			 * this tag storage.
+			 */
+			if (tmp_addr > addr)
+				tmp_addr = 0;
+		}
+		if (tmp_addr < hole_start) {
+			/* Available hole is restricted on lower address
+			 * end as well
+			 */
+			tmp_addr = hole_start + 1;
+		}
+		addr = tmp_addr;
+		size = (end_addr + 1 - addr)/(2*adi_blksize());
+		size = (size + (PAGE_SIZE-adi_blksize()))/PAGE_SIZE;
+		size = size * PAGE_SIZE;
+	}
+	tags = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN);
+	if (tags == NULL) {
+		tag_desc->tag_users = 0;
+		tag_desc = NULL;
+		goto out;
+	}
+	tag_desc->start = addr;
+	tag_desc->tags = tags;
+	tag_desc->end = end_addr;
+
+out:
+	spin_unlock_irqrestore(&mm->context.tag_lock, flags);
+	return tag_desc;
+}
+
+void del_tag_store(tag_storage_desc_t *tag_desc, struct mm_struct *mm)
+{
+	unsigned long flags;
+	unsigned char *tags = NULL;
+
+	spin_lock_irqsave(&mm->context.tag_lock, flags);
+	tag_desc->tag_users--;
+	if (tag_desc->tag_users == 0) {
+		tag_desc->start = tag_desc->end = 0;
+		/* Do not free up the tag storage space allocated
+		 * by the first descriptor. This is persistent
+		 * emergency tag storage space for the task.
+		 */
+		if (tag_desc != mm->context.tag_store) {
+			tags = tag_desc->tags;
+			tag_desc->tags = NULL;
+		}
+	}
+	spin_unlock_irqrestore(&mm->context.tag_lock, flags);
+	kfree(tags);
+}
+
+#define tag_start(addr, tag_desc)		\
+	((tag_desc)->tags + ((addr - (tag_desc)->start)/(2*adi_blksize())))
+
+/* Retrieve any saved ADI tags for the page being swapped back in and
+ * restore these tags to the newly allocated physical page.
+ */
+void adi_restore_tags(struct mm_struct *mm, struct vm_area_struct *vma,
+		      unsigned long addr, pte_t pte)
+{
+	unsigned char *tag;
+	tag_storage_desc_t *tag_desc;
+	unsigned long paddr, tmp, version1, version2;
+
+	/* Check if the swapped out page has an ADI version
+	 * saved. If yes, restore version tag to the newly
+	 * allocated page.
+	 */
+	tag_desc = find_tag_store(mm, vma, addr);
+	if (tag_desc == NULL)
+		return;
+
+	tag = tag_start(addr, tag_desc);
+	paddr = pte_val(pte) & _PAGE_PADDR_4V;
+	for (tmp = paddr; tmp < (paddr+PAGE_SIZE); tmp += adi_blksize()) {
+		version1 = (*tag) >> 4;
+		version2 = (*tag) & 0x0f;
+		*tag++ = 0;
+		asm volatile("stxa %0, [%1] %2\n\t"
+			:
+			: "r" (version1), "r" (tmp),
+			  "i" (ASI_MCD_REAL));
+		tmp += adi_blksize();
+		asm volatile("stxa %0, [%1] %2\n\t"
+			:
+			: "r" (version2), "r" (tmp),
+			  "i" (ASI_MCD_REAL));
+	}
+	asm volatile("membar #Sync\n\t");
+
+	/* Check and mark this tag space for release later if
+	 * the swapped in page was the last user of tag space
+	 */
+	del_tag_store(tag_desc, mm);
+}
+
+/* A page is about to be swapped out. Save any ADI tags associated with
+ * this physical page so they can be restored later when the page is swapped
+ * back in.
+ */
+int adi_save_tags(struct mm_struct *mm, struct vm_area_struct *vma,
+		  unsigned long addr, pte_t oldpte)
+{
+	unsigned char *tag;
+	tag_storage_desc_t *tag_desc;
+	unsigned long version1, version2, paddr, tmp;
+
+	tag_desc = alloc_tag_store(mm, vma, addr);
+	if (tag_desc == NULL)
+		return -1;
+
+	tag = tag_start(addr, tag_desc);
+	paddr = pte_val(oldpte) & _PAGE_PADDR_4V;
+	for (tmp = paddr; tmp < (paddr+PAGE_SIZE); tmp += adi_blksize()) {
+		asm volatile("ldxa [%1] %2, %0\n\t"
+				: "=r" (version1)
+				: "r" (tmp), "i" (ASI_MCD_REAL));
+		tmp += adi_blksize();
+		asm volatile("ldxa [%1] %2, %0\n\t"
+				: "=r" (version2)
+				: "r" (tmp), "i" (ASI_MCD_REAL));
+		*tag = (version1 << 4) | version2;
+		tag++;
+	}
+
+	return 0;
+}
diff --git a/arch/sparc/kernel/apc.c b/arch/sparc/kernel/apc.c
new file mode 100644
index 0000000..ecd05bc
--- /dev/null
+++ b/arch/sparc/kernel/apc.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* apc - Driver implementation for power management functions
+ * of Aurora Personality Chip (APC) on SPARCstation-4/5 and
+ * derivatives.
+ *
+ * Copyright (c) 2002 Eric Brower (ebrower@usa.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/oplib.h>
+#include <linux/uaccess.h>
+#include <asm/auxio.h>
+#include <asm/apc.h>
+#include <asm/processor.h>
+
+/* Debugging
+ * 
+ * #define APC_DEBUG_LED
+ */
+
+#define APC_MINOR	MISC_DYNAMIC_MINOR
+#define APC_OBPNAME	"power-management"
+#define APC_DEVNAME "apc"
+
+static u8 __iomem *regs;
+static int apc_no_idle = 0;
+
+#define apc_readb(offs)		(sbus_readb(regs+offs))
+#define apc_writeb(val, offs) 	(sbus_writeb(val, regs+offs))
+
+/* Specify "apc=noidle" on the kernel command line to 
+ * disable APC CPU standby support.  Certain prototype
+ * systems (SPARCstation-Fox) do not play well with APC
+ * CPU idle, so disable this if your system has APC and 
+ * crashes randomly.
+ */
+static int __init apc_setup(char *str) 
+{
+	if(!strncmp(str, "noidle", strlen("noidle"))) {
+		apc_no_idle = 1;
+		return 1;
+	}
+	return 0;
+}
+__setup("apc=", apc_setup);
+
+/* 
+ * CPU idle callback function
+ * See .../arch/sparc/kernel/process.c
+ */
+static void apc_swift_idle(void)
+{
+#ifdef APC_DEBUG_LED
+	set_auxio(0x00, AUXIO_LED); 
+#endif
+
+	apc_writeb(apc_readb(APC_IDLE_REG) | APC_IDLE_ON, APC_IDLE_REG);
+
+#ifdef APC_DEBUG_LED
+	set_auxio(AUXIO_LED, 0x00); 
+#endif
+} 
+
+static inline void apc_free(struct platform_device *op)
+{
+	of_iounmap(&op->resource[0], regs, resource_size(&op->resource[0]));
+}
+
+static int apc_open(struct inode *inode, struct file *f)
+{
+	return 0;
+}
+
+static int apc_release(struct inode *inode, struct file *f)
+{
+	return 0;
+}
+
+static long apc_ioctl(struct file *f, unsigned int cmd, unsigned long __arg)
+{
+	__u8 inarg, __user *arg = (__u8 __user *) __arg;
+
+	switch (cmd) {
+	case APCIOCGFANCTL:
+		if (put_user(apc_readb(APC_FANCTL_REG) & APC_REGMASK, arg))
+			return -EFAULT;
+		break;
+
+	case APCIOCGCPWR:
+		if (put_user(apc_readb(APC_CPOWER_REG) & APC_REGMASK, arg))
+			return -EFAULT;
+		break;
+
+	case APCIOCGBPORT:
+		if (put_user(apc_readb(APC_BPORT_REG) & APC_BPMASK, arg))
+			return -EFAULT;
+		break;
+
+	case APCIOCSFANCTL:
+		if (get_user(inarg, arg))
+			return -EFAULT;
+		apc_writeb(inarg & APC_REGMASK, APC_FANCTL_REG);
+		break;
+
+	case APCIOCSCPWR:
+		if (get_user(inarg, arg))
+			return -EFAULT;
+		apc_writeb(inarg & APC_REGMASK, APC_CPOWER_REG);
+		break;
+
+	case APCIOCSBPORT:
+		if (get_user(inarg, arg))
+			return -EFAULT;
+		apc_writeb(inarg & APC_BPMASK, APC_BPORT_REG);
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct file_operations apc_fops = {
+	.unlocked_ioctl =	apc_ioctl,
+	.open =			apc_open,
+	.release =		apc_release,
+	.llseek =		noop_llseek,
+};
+
+static struct miscdevice apc_miscdev = { APC_MINOR, APC_DEVNAME, &apc_fops };
+
+static int apc_probe(struct platform_device *op)
+{
+	int err;
+
+	regs = of_ioremap(&op->resource[0], 0,
+			  resource_size(&op->resource[0]), APC_OBPNAME);
+	if (!regs) {
+		printk(KERN_ERR "%s: unable to map registers\n", APC_DEVNAME);
+		return -ENODEV;
+	}
+
+	err = misc_register(&apc_miscdev);
+	if (err) {
+		printk(KERN_ERR "%s: unable to register device\n", APC_DEVNAME);
+		apc_free(op);
+		return -ENODEV;
+	}
+
+	/* Assign power management IDLE handler */
+	if (!apc_no_idle)
+		sparc_idle = apc_swift_idle;
+
+	printk(KERN_INFO "%s: power management initialized%s\n", 
+	       APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : "");
+
+	return 0;
+}
+
+static const struct of_device_id apc_match[] = {
+	{
+		.name = APC_OBPNAME,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, apc_match);
+
+static struct platform_driver apc_driver = {
+	.driver = {
+		.name = "apc",
+		.of_match_table = apc_match,
+	},
+	.probe		= apc_probe,
+};
+
+static int __init apc_init(void)
+{
+	return platform_driver_register(&apc_driver);
+}
+
+/* This driver is not critical to the boot process
+ * and is easiest to ioremap when SBus is already
+ * initialized, so we install ourselves thusly:
+ */
+__initcall(apc_init);
diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
new file mode 100644
index 0000000..5784f2d
--- /dev/null
+++ b/arch/sparc/kernel/asm-offsets.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ * We use the technique used in the OSF Mach kernel code:
+ * generate asm statements containing #defines,
+ * compile this file to assembler, and then extract the
+ * #defines from the assembly-language output.
+ *
+ * On sparc, thread_info data is static and TI_XXX offsets are computed by hand.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+// #include <linux/mm.h>
+#include <linux/kbuild.h>
+
+#include <asm/hibernate.h>
+
+#ifdef CONFIG_SPARC32
+int sparc32_foo(void)
+{
+	DEFINE(AOFF_thread_fork_kpsr,
+			offsetof(struct thread_struct, fork_kpsr));
+	return 0;
+}
+#else
+int sparc64_foo(void)
+{
+#ifdef CONFIG_HIBERNATION
+	BLANK();
+	OFFSET(SC_REG_FP, saved_context, fp);
+	OFFSET(SC_REG_CWP, saved_context, cwp);
+	OFFSET(SC_REG_WSTATE, saved_context, wstate);
+
+	OFFSET(SC_REG_TICK, saved_context, tick);
+	OFFSET(SC_REG_PSTATE, saved_context, pstate);
+
+	OFFSET(SC_REG_G4, saved_context, g4);
+	OFFSET(SC_REG_G5, saved_context, g5);
+	OFFSET(SC_REG_G6, saved_context, g6);
+#endif
+	return 0;
+}
+#endif
+
+int foo(void)
+{
+	BLANK();
+	DEFINE(AOFF_task_thread, offsetof(struct task_struct, thread));
+	BLANK();
+	DEFINE(AOFF_mm_context, offsetof(struct mm_struct, context));
+	BLANK();
+	DEFINE(VMA_VM_MM,    offsetof(struct vm_area_struct, vm_mm));
+
+	/* DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); */
+	return 0;
+}
+
diff --git a/arch/sparc/kernel/audit.c b/arch/sparc/kernel/audit.c
new file mode 100644
index 0000000..a6e91bf
--- /dev/null
+++ b/arch/sparc/kernel/audit.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+#include "kernel.h"
+
+static unsigned int dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned int read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned int write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned int chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned int signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+	if (arch == AUDIT_ARCH_SPARC)
+		return 1;
+#endif
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned int syscall)
+{
+#ifdef CONFIG_COMPAT
+	if (abi == AUDIT_ARCH_SPARC)
+		return sparc32_classify_syscall(syscall);
+#endif
+	switch(syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_socketcall:
+		return 4;
+	case __NR_execve:
+		return 5;
+	default:
+		return 0;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+	audit_register_class(AUDIT_CLASS_WRITE_32, sparc32_write_class);
+	audit_register_class(AUDIT_CLASS_READ_32, sparc32_read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, sparc32_dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR_32, sparc32_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, sparc32_signal_class);
+#endif
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/sparc/kernel/auxio_32.c b/arch/sparc/kernel/auxio_32.c
new file mode 100644
index 0000000..a32d588
--- /dev/null
+++ b/arch/sparc/kernel/auxio_32.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* auxio.c: Probing for the Sparc AUXIO register at boot time.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/export.h>
+
+#include <asm/oplib.h>
+#include <asm/io.h>
+#include <asm/auxio.h>
+#include <asm/string.h>		/* memset(), Linux has no bzero() */
+#include <asm/cpu_type.h>
+
+#include "kernel.h"
+
+/* Probe and map in the Auxiliary I/O register */
+
+/* auxio_register is not static because it is referenced 
+ * in entry.S::floppy_tdone
+ */
+void __iomem *auxio_register = NULL;
+static DEFINE_SPINLOCK(auxio_lock);
+
+void __init auxio_probe(void)
+{
+	phandle node, auxio_nd;
+	struct linux_prom_registers auxregs[1];
+	struct resource r;
+
+	switch (sparc_cpu_model) {
+	case sparc_leon:
+	case sun4d:
+		return;
+	default:
+		break;
+	}
+	node = prom_getchild(prom_root_node);
+	auxio_nd = prom_searchsiblings(node, "auxiliary-io");
+	if(!auxio_nd) {
+		node = prom_searchsiblings(node, "obio");
+		node = prom_getchild(node);
+		auxio_nd = prom_searchsiblings(node, "auxio");
+		if(!auxio_nd) {
+#ifdef CONFIG_PCI
+			/* There may be auxio on Ebus */
+			return;
+#else
+			if(prom_searchsiblings(node, "leds")) {
+				/* VME chassis sun4m machine, no auxio exists. */
+				return;
+			}
+			prom_printf("Cannot find auxio node, cannot continue...\n");
+			prom_halt();
+#endif
+		}
+	}
+	if(prom_getproperty(auxio_nd, "reg", (char *) auxregs, sizeof(auxregs)) <= 0)
+		return;
+	prom_apply_obio_ranges(auxregs, 0x1);
+	/* Map the register both read and write */
+	r.flags = auxregs[0].which_io & 0xF;
+	r.start = auxregs[0].phys_addr;
+	r.end = auxregs[0].phys_addr + auxregs[0].reg_size - 1;
+	auxio_register = of_ioremap(&r, 0, auxregs[0].reg_size, "auxio");
+	/* Fix the address on sun4m. */
+	if ((((unsigned long) auxregs[0].phys_addr) & 3) == 3)
+		auxio_register += (3 - ((unsigned long)auxio_register & 3));
+
+	set_auxio(AUXIO_LED, 0);
+}
+
+unsigned char get_auxio(void)
+{
+	if(auxio_register) 
+		return sbus_readb(auxio_register);
+	return 0;
+}
+EXPORT_SYMBOL(get_auxio);
+
+void set_auxio(unsigned char bits_on, unsigned char bits_off)
+{
+	unsigned char regval;
+	unsigned long flags;
+	spin_lock_irqsave(&auxio_lock, flags);
+	switch (sparc_cpu_model) {
+	case sun4m:
+		if(!auxio_register)
+			break;     /* VME chassis sun4m, no auxio. */
+		regval = sbus_readb(auxio_register);
+		sbus_writeb(((regval | bits_on) & ~bits_off) | AUXIO_ORMEIN4M,
+			auxio_register);
+		break;
+	case sun4d:
+		break;
+	default:
+		panic("Can't set AUXIO register on this machine.");
+	}
+	spin_unlock_irqrestore(&auxio_lock, flags);
+}
+EXPORT_SYMBOL(set_auxio);
+
+/* sun4m power control register (AUXIO2) */
+
+volatile u8 __iomem *auxio_power_register = NULL;
+
+void __init auxio_power_probe(void)
+{
+	struct linux_prom_registers regs;
+	phandle node;
+	struct resource r;
+
+	/* Attempt to find the sun4m power control node. */
+	node = prom_getchild(prom_root_node);
+	node = prom_searchsiblings(node, "obio");
+	node = prom_getchild(node);
+	node = prom_searchsiblings(node, "power");
+	if (node == 0 || (s32)node == -1)
+		return;
+
+	/* Map the power control register. */
+	if (prom_getproperty(node, "reg", (char *)&regs, sizeof(regs)) <= 0)
+		return;
+	prom_apply_obio_ranges(&regs, 1);
+	memset(&r, 0, sizeof(r));
+	r.flags = regs.which_io & 0xF;
+	r.start = regs.phys_addr;
+	r.end = regs.phys_addr + regs.reg_size - 1;
+	auxio_power_register =
+		(u8 __iomem *)of_ioremap(&r, 0, regs.reg_size, "auxpower");
+
+	/* Display a quick message on the console. */
+	if (auxio_power_register)
+		printk(KERN_INFO "Power off control detected.\n");
+}
diff --git a/arch/sparc/kernel/auxio_64.c b/arch/sparc/kernel/auxio_64.c
new file mode 100644
index 0000000..4e8f56c
--- /dev/null
+++ b/arch/sparc/kernel/auxio_64.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/* auxio.c: Probing for the Sparc AUXIO register at boot time.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * Refactoring for unified NCR/PCIO support 2002 Eric Brower (ebrower@usa.net)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+#include <asm/auxio.h>
+
+void __iomem *auxio_register = NULL;
+EXPORT_SYMBOL(auxio_register);
+
+enum auxio_type {
+	AUXIO_TYPE_NODEV,
+	AUXIO_TYPE_SBUS,
+	AUXIO_TYPE_EBUS
+};
+
+static enum auxio_type auxio_devtype = AUXIO_TYPE_NODEV;
+static DEFINE_SPINLOCK(auxio_lock);
+
+static void __auxio_rmw(u8 bits_on, u8 bits_off, int ebus)
+{
+	if (auxio_register) {
+		unsigned long flags;
+		u8 regval, newval;
+
+		spin_lock_irqsave(&auxio_lock, flags);
+
+		regval = (ebus ?
+			  (u8) readl(auxio_register) :
+			  sbus_readb(auxio_register));
+		newval =  regval | bits_on;
+		newval &= ~bits_off;
+		if (!ebus)
+			newval &= ~AUXIO_AUX1_MASK;
+		if (ebus)
+			writel((u32) newval, auxio_register);
+		else
+			sbus_writeb(newval, auxio_register);
+		
+		spin_unlock_irqrestore(&auxio_lock, flags);
+	}
+}
+
+static void __auxio_set_bit(u8 bit, int on, int ebus)
+{
+	u8 bits_on = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+	u8 bits_off = 0;
+
+	if (!on) {
+		u8 tmp = bits_off;
+		bits_off = bits_on;
+		bits_on = tmp;
+	}
+	__auxio_rmw(bits_on, bits_off, ebus);
+}
+
+void auxio_set_led(int on)
+{
+	int ebus = auxio_devtype == AUXIO_TYPE_EBUS;
+	u8 bit;
+
+	bit = (ebus ? AUXIO_PCIO_LED : AUXIO_AUX1_LED);
+	__auxio_set_bit(bit, on, ebus);
+}
+EXPORT_SYMBOL(auxio_set_led);
+
+static void __auxio_sbus_set_lte(int on)
+{
+	__auxio_set_bit(AUXIO_AUX1_LTE, on, 0);
+}
+
+void auxio_set_lte(int on)
+{
+	switch(auxio_devtype) {
+	case AUXIO_TYPE_SBUS:
+		__auxio_sbus_set_lte(on);
+		break;
+	case AUXIO_TYPE_EBUS:
+		/* FALL-THROUGH */
+	default:
+		break;
+	}
+}
+EXPORT_SYMBOL(auxio_set_lte);
+
+static const struct of_device_id auxio_match[] = {
+	{
+		.name = "auxio",
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, auxio_match);
+
+static int auxio_probe(struct platform_device *dev)
+{
+	struct device_node *dp = dev->dev.of_node;
+	unsigned long size;
+
+	if (!strcmp(dp->parent->name, "ebus")) {
+		auxio_devtype = AUXIO_TYPE_EBUS;
+		size = sizeof(u32);
+	} else if (!strcmp(dp->parent->name, "sbus")) {
+		auxio_devtype = AUXIO_TYPE_SBUS;
+		size = 1;
+	} else {
+		printk("auxio: Unknown parent bus type [%s]\n",
+		       dp->parent->name);
+		return -ENODEV;
+	}
+	auxio_register = of_ioremap(&dev->resource[0], 0, size, "auxio");
+	if (!auxio_register)
+		return -ENODEV;
+
+	printk(KERN_INFO "AUXIO: Found device at %s\n",
+	       dp->full_name);
+
+	if (auxio_devtype == AUXIO_TYPE_EBUS)
+		auxio_set_led(AUXIO_LED_ON);
+
+	return 0;
+}
+
+static struct platform_driver auxio_driver = {
+	.probe		= auxio_probe,
+	.driver = {
+		.name = "auxio",
+		.of_match_table = auxio_match,
+	},
+};
+
+static int __init auxio_init(void)
+{
+	return platform_driver_register(&auxio_driver);
+}
+
+/* Must be after subsys_initcall() so that busses are probed.  Must
+ * be before device_initcall() because things like the floppy driver
+ * need to use the AUXIO register.
+ */
+fs_initcall(auxio_init);
diff --git a/arch/sparc/kernel/btext.c b/arch/sparc/kernel/btext.c
new file mode 100644
index 0000000..5869773
--- /dev/null
+++ b/arch/sparc/kernel/btext.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Procedures for drawing on the screen early on in the boot process.
+ *
+ * Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ */
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/console.h>
+
+#include <asm/btext.h>
+#include <asm/oplib.h>
+#include <asm/io.h>
+
+#define NO_SCROLL
+
+#ifndef NO_SCROLL
+static void scrollscreen(void);
+#endif
+
+static void draw_byte(unsigned char c, long locX, long locY);
+static void draw_byte_32(unsigned char *bits, unsigned int *base, int rb);
+static void draw_byte_16(unsigned char *bits, unsigned int *base, int rb);
+static void draw_byte_8(unsigned char *bits, unsigned int *base, int rb);
+
+#define __force_data __attribute__((__section__(".data")))
+
+static int g_loc_X __force_data;
+static int g_loc_Y __force_data;
+static int g_max_loc_X __force_data;
+static int g_max_loc_Y __force_data;
+
+static int dispDeviceRowBytes __force_data;
+static int dispDeviceDepth  __force_data;
+static int dispDeviceRect[4] __force_data;
+static unsigned char *dispDeviceBase __force_data;
+
+#define cmapsz	(16*256)
+
+static unsigned char vga_font[cmapsz];
+
+static int __init btext_initialize(phandle node)
+{
+	unsigned int width, height, depth, pitch;
+	unsigned long address = 0;
+	u32 prop;
+
+	if (prom_getproperty(node, "width", (char *)&width, 4) < 0)
+		return -EINVAL;
+	if (prom_getproperty(node, "height", (char *)&height, 4) < 0)
+		return -EINVAL;
+	if (prom_getproperty(node, "depth", (char *)&depth, 4) < 0)
+		return -EINVAL;
+	pitch = width * ((depth + 7) / 8);
+
+	if (prom_getproperty(node, "linebytes", (char *)&prop, 4) >= 0 &&
+	    prop != 0xffffffffu)
+		pitch = prop;
+
+	if (pitch == 1)
+		pitch = 0x1000;
+
+	if (prom_getproperty(node, "address", (char *)&prop, 4) >= 0)
+		address = prop;
+
+	/* FIXME: Add support for PCI reg properties. Right now, only
+	 * reliable on macs
+	 */
+	if (address == 0)
+		return -EINVAL;
+
+	g_loc_X = 0;
+	g_loc_Y = 0;
+	g_max_loc_X = width / 8;
+	g_max_loc_Y = height / 16;
+	dispDeviceBase = (unsigned char *)address;
+	dispDeviceRowBytes = pitch;
+	dispDeviceDepth = depth == 15 ? 16 : depth;
+	dispDeviceRect[0] = dispDeviceRect[1] = 0;
+	dispDeviceRect[2] = width;
+	dispDeviceRect[3] = height;
+
+	return 0;
+}
+
+/* Calc the base address of a given point (x,y) */
+static unsigned char * calc_base(int x, int y)
+{
+	unsigned char *base = dispDeviceBase;
+
+	base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3);
+	base += (y + dispDeviceRect[1]) * dispDeviceRowBytes;
+	return base;
+}
+
+static void btext_clearscreen(void)
+{
+	unsigned int *base	= (unsigned int *)calc_base(0, 0);
+	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
+					(dispDeviceDepth >> 3)) >> 2;
+	int i,j;
+
+	for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1]); i++)
+	{
+		unsigned int *ptr = base;
+		for(j=width; j; --j)
+			*(ptr++) = 0;
+		base += (dispDeviceRowBytes >> 2);
+	}
+}
+
+#ifndef NO_SCROLL
+static void scrollscreen(void)
+{
+	unsigned int *src     	= (unsigned int *)calc_base(0,16);
+	unsigned int *dst     	= (unsigned int *)calc_base(0,0);
+	unsigned long width    	= ((dispDeviceRect[2] - dispDeviceRect[0]) *
+				   (dispDeviceDepth >> 3)) >> 2;
+	int i,j;
+
+	for (i=0; i<(dispDeviceRect[3] - dispDeviceRect[1] - 16); i++)
+	{
+		unsigned int *src_ptr = src;
+		unsigned int *dst_ptr = dst;
+		for(j=width; j; --j)
+			*(dst_ptr++) = *(src_ptr++);
+		src += (dispDeviceRowBytes >> 2);
+		dst += (dispDeviceRowBytes >> 2);
+	}
+	for (i=0; i<16; i++)
+	{
+		unsigned int *dst_ptr = dst;
+		for(j=width; j; --j)
+			*(dst_ptr++) = 0;
+		dst += (dispDeviceRowBytes >> 2);
+	}
+}
+#endif /* ndef NO_SCROLL */
+
+static void btext_drawchar(char c)
+{
+	int cline = 0;
+#ifdef NO_SCROLL
+	int x;
+#endif
+	switch (c) {
+	case '\b':
+		if (g_loc_X > 0)
+			--g_loc_X;
+		break;
+	case '\t':
+		g_loc_X = (g_loc_X & -8) + 8;
+		break;
+	case '\r':
+		g_loc_X = 0;
+		break;
+	case '\n':
+		g_loc_X = 0;
+		g_loc_Y++;
+		cline = 1;
+		break;
+	default:
+		draw_byte(c, g_loc_X++, g_loc_Y);
+	}
+	if (g_loc_X >= g_max_loc_X) {
+		g_loc_X = 0;
+		g_loc_Y++;
+		cline = 1;
+	}
+#ifndef NO_SCROLL
+	while (g_loc_Y >= g_max_loc_Y) {
+		scrollscreen();
+		g_loc_Y--;
+	}
+#else
+	/* wrap around from bottom to top of screen so we don't
+	   waste time scrolling each line.  -- paulus. */
+	if (g_loc_Y >= g_max_loc_Y)
+		g_loc_Y = 0;
+	if (cline) {
+		for (x = 0; x < g_max_loc_X; ++x)
+			draw_byte(' ', x, g_loc_Y);
+	}
+#endif
+}
+
+static void btext_drawtext(const char *c, unsigned int len)
+{
+	while (len--)
+		btext_drawchar(*c++);
+}
+
+static void draw_byte(unsigned char c, long locX, long locY)
+{
+	unsigned char *base	= calc_base(locX << 3, locY << 4);
+	unsigned char *font	= &vga_font[((unsigned int)c) * 16];
+	int rb			= dispDeviceRowBytes;
+
+	switch(dispDeviceDepth) {
+	case 24:
+	case 32:
+		draw_byte_32(font, (unsigned int *)base, rb);
+		break;
+	case 15:
+	case 16:
+		draw_byte_16(font, (unsigned int *)base, rb);
+		break;
+	case 8:
+		draw_byte_8(font, (unsigned int *)base, rb);
+		break;
+	}
+}
+
+static unsigned int expand_bits_8[16] = {
+	0x00000000,
+	0x000000ff,
+	0x0000ff00,
+	0x0000ffff,
+	0x00ff0000,
+	0x00ff00ff,
+	0x00ffff00,
+	0x00ffffff,
+	0xff000000,
+	0xff0000ff,
+	0xff00ff00,
+	0xff00ffff,
+	0xffff0000,
+	0xffff00ff,
+	0xffffff00,
+	0xffffffff
+};
+
+static unsigned int expand_bits_16[4] = {
+	0x00000000,
+	0x0000ffff,
+	0xffff0000,
+	0xffffffff
+};
+
+
+static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
+{
+	int l, bits;
+	int fg = 0xFFFFFFFFUL;
+	int bg = 0x00000000UL;
+
+	for (l = 0; l < 16; ++l)
+	{
+		bits = *font++;
+		base[0] = (-(bits >> 7) & fg) ^ bg;
+		base[1] = (-((bits >> 6) & 1) & fg) ^ bg;
+		base[2] = (-((bits >> 5) & 1) & fg) ^ bg;
+		base[3] = (-((bits >> 4) & 1) & fg) ^ bg;
+		base[4] = (-((bits >> 3) & 1) & fg) ^ bg;
+		base[5] = (-((bits >> 2) & 1) & fg) ^ bg;
+		base[6] = (-((bits >> 1) & 1) & fg) ^ bg;
+		base[7] = (-(bits & 1) & fg) ^ bg;
+		base = (unsigned int *) ((char *)base + rb);
+	}
+}
+
+static void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+{
+	int l, bits;
+	int fg = 0xFFFFFFFFUL;
+	int bg = 0x00000000UL;
+	unsigned int *eb = (int *)expand_bits_16;
+
+	for (l = 0; l < 16; ++l)
+	{
+		bits = *font++;
+		base[0] = (eb[bits >> 6] & fg) ^ bg;
+		base[1] = (eb[(bits >> 4) & 3] & fg) ^ bg;
+		base[2] = (eb[(bits >> 2) & 3] & fg) ^ bg;
+		base[3] = (eb[bits & 3] & fg) ^ bg;
+		base = (unsigned int *) ((char *)base + rb);
+	}
+}
+
+static void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+{
+	int l, bits;
+	int fg = 0x0F0F0F0FUL;
+	int bg = 0x00000000UL;
+	unsigned int *eb = (int *)expand_bits_8;
+
+	for (l = 0; l < 16; ++l)
+	{
+		bits = *font++;
+		base[0] = (eb[bits >> 4] & fg) ^ bg;
+		base[1] = (eb[bits & 0xf] & fg) ^ bg;
+		base = (unsigned int *) ((char *)base + rb);
+	}
+}
+
+static void btext_console_write(struct console *con, const char *s,
+				unsigned int n)
+{
+	btext_drawtext(s, n);
+}
+
+static struct console btext_console = {
+	.name	= "btext",
+	.write	= btext_console_write,
+	.flags	= CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME,
+	.index	= 0,
+};
+
+int __init btext_find_display(void)
+{
+	phandle node;
+	char type[32];
+	int ret;
+
+	node = prom_inst2pkg(prom_stdout);
+	if (prom_getproperty(node, "device_type", type, 32) < 0)
+		return -ENODEV;
+	if (strcmp(type, "display"))
+		return -ENODEV;
+
+	ret = btext_initialize(node);
+	if (!ret) {
+		btext_clearscreen();
+		register_console(&btext_console);
+	}
+	return ret;
+}
+
+static unsigned char vga_font[cmapsz] = {
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
+0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
+0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
+0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
+0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
+0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
+0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
+0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
+0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
+0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
+0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
+0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
+0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
+0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
+0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
+0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
+0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
+0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
+0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
+0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
+0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
+0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
+0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
+0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
+0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
+0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
+0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
+0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
+0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
+0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
+0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
+0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
+0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
+0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
+0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
+0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
+0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
+0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
+0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
+0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
+0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
+0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
+0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
+0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
+0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
+0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
+0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
+0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
+0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
+0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
+0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
+0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
+0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
+0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
+0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
+0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
+0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
+0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
+0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
+0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
+0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
+0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
+0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
+0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
+0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
+0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
+0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
+0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
+0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
+0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
+0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
+0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
+0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
+0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
+0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
+0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
+0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
+0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
+0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
+0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
+0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
+0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
+0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
+0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
+0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
+0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
+0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
+0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
+0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
+0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
+0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
+0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
+0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
+0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
+0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
+0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
+0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
+0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
+0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
+0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
+0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
+0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
+0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
+0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
+0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
+0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
+0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
+0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
+0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
+0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
+0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+0x00, 0x00, 0x00, 0x00,
+};
diff --git a/arch/sparc/kernel/central.c b/arch/sparc/kernel/central.c
new file mode 100644
index 0000000..38ae4fd
--- /dev/null
+++ b/arch/sparc/kernel/central.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0
+/* central.c: Central FHC driver for Sunfire/Starfire/Wildfire.
+ *
+ * Copyright (C) 1997, 1999, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <asm/fhc.h>
+#include <asm/upa.h>
+
+struct clock_board {
+	void __iomem		*clock_freq_regs;
+	void __iomem		*clock_regs;
+	void __iomem		*clock_ver_reg;
+	int			num_slots;
+	struct resource		leds_resource;
+	struct platform_device	leds_pdev;
+};
+
+struct fhc {
+	void __iomem		*pregs;
+	bool			central;
+	bool			jtag_master;
+	int			board_num;
+	struct resource		leds_resource;
+	struct platform_device	leds_pdev;
+};
+
+static int clock_board_calc_nslots(struct clock_board *p)
+{
+	u8 reg = upa_readb(p->clock_regs + CLOCK_STAT1) & 0xc0;
+
+	switch (reg) {
+	case 0x40:
+		return 16;
+
+	case 0xc0:
+		return 8;
+
+	case 0x80:
+		reg = 0;
+		if (p->clock_ver_reg)
+			reg = upa_readb(p->clock_ver_reg);
+		if (reg) {
+			if (reg & 0x80)
+				return 4;
+			else
+				return 5;
+		}
+		/* Fallthrough */
+	default:
+		return 4;
+	}
+}
+
+static int clock_board_probe(struct platform_device *op)
+{
+	struct clock_board *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	int err = -ENOMEM;
+
+	if (!p) {
+		printk(KERN_ERR "clock_board: Cannot allocate struct clock_board\n");
+		goto out;
+	}
+
+	p->clock_freq_regs = of_ioremap(&op->resource[0], 0,
+					resource_size(&op->resource[0]),
+					"clock_board_freq");
+	if (!p->clock_freq_regs) {
+		printk(KERN_ERR "clock_board: Cannot map clock_freq_regs\n");
+		goto out_free;
+	}
+
+	p->clock_regs = of_ioremap(&op->resource[1], 0,
+				   resource_size(&op->resource[1]),
+				   "clock_board_regs");
+	if (!p->clock_regs) {
+		printk(KERN_ERR "clock_board: Cannot map clock_regs\n");
+		goto out_unmap_clock_freq_regs;
+	}
+
+	if (op->resource[2].flags) {
+		p->clock_ver_reg = of_ioremap(&op->resource[2], 0,
+					      resource_size(&op->resource[2]),
+					      "clock_ver_reg");
+		if (!p->clock_ver_reg) {
+			printk(KERN_ERR "clock_board: Cannot map clock_ver_reg\n");
+			goto out_unmap_clock_regs;
+		}
+	}
+
+	p->num_slots = clock_board_calc_nslots(p);
+
+	p->leds_resource.start = (unsigned long)
+		(p->clock_regs + CLOCK_CTRL);
+	p->leds_resource.end = p->leds_resource.start;
+	p->leds_resource.name = "leds";
+
+	p->leds_pdev.name = "sunfire-clockboard-leds";
+	p->leds_pdev.id = -1;
+	p->leds_pdev.resource = &p->leds_resource;
+	p->leds_pdev.num_resources = 1;
+	p->leds_pdev.dev.parent = &op->dev;
+
+	err = platform_device_register(&p->leds_pdev);
+	if (err) {
+		printk(KERN_ERR "clock_board: Could not register LEDS "
+		       "platform device\n");
+		goto out_unmap_clock_ver_reg;
+	}
+
+	printk(KERN_INFO "clock_board: Detected %d slot Enterprise system.\n",
+	       p->num_slots);
+
+	err = 0;
+out:
+	return err;
+
+out_unmap_clock_ver_reg:
+	if (p->clock_ver_reg)
+		of_iounmap(&op->resource[2], p->clock_ver_reg,
+			   resource_size(&op->resource[2]));
+
+out_unmap_clock_regs:
+	of_iounmap(&op->resource[1], p->clock_regs,
+		   resource_size(&op->resource[1]));
+
+out_unmap_clock_freq_regs:
+	of_iounmap(&op->resource[0], p->clock_freq_regs,
+		   resource_size(&op->resource[0]));
+
+out_free:
+	kfree(p);
+	goto out;
+}
+
+static const struct of_device_id clock_board_match[] = {
+	{
+		.name = "clock-board",
+	},
+	{},
+};
+
+static struct platform_driver clock_board_driver = {
+	.probe		= clock_board_probe,
+	.driver = {
+		.name = "clock_board",
+		.of_match_table = clock_board_match,
+	},
+};
+
+static int fhc_probe(struct platform_device *op)
+{
+	struct fhc *p = kzalloc(sizeof(*p), GFP_KERNEL);
+	int err = -ENOMEM;
+	u32 reg;
+
+	if (!p) {
+		printk(KERN_ERR "fhc: Cannot allocate struct fhc\n");
+		goto out;
+	}
+
+	if (!strcmp(op->dev.of_node->parent->name, "central"))
+		p->central = true;
+
+	p->pregs = of_ioremap(&op->resource[0], 0,
+			      resource_size(&op->resource[0]),
+			      "fhc_pregs");
+	if (!p->pregs) {
+		printk(KERN_ERR "fhc: Cannot map pregs\n");
+		goto out_free;
+	}
+
+	if (p->central) {
+		reg = upa_readl(p->pregs + FHC_PREGS_BSR);
+		p->board_num = ((reg >> 16) & 1) | ((reg >> 12) & 0x0e);
+	} else {
+		p->board_num = of_getintprop_default(op->dev.of_node, "board#", -1);
+		if (p->board_num == -1) {
+			printk(KERN_ERR "fhc: No board# property\n");
+			goto out_unmap_pregs;
+		}
+		if (upa_readl(p->pregs + FHC_PREGS_JCTRL) & FHC_JTAG_CTRL_MENAB)
+			p->jtag_master = true;
+	}
+
+	if (!p->central) {
+		p->leds_resource.start = (unsigned long)
+			(p->pregs + FHC_PREGS_CTRL);
+		p->leds_resource.end = p->leds_resource.start;
+		p->leds_resource.name = "leds";
+
+		p->leds_pdev.name = "sunfire-fhc-leds";
+		p->leds_pdev.id = p->board_num;
+		p->leds_pdev.resource = &p->leds_resource;
+		p->leds_pdev.num_resources = 1;
+		p->leds_pdev.dev.parent = &op->dev;
+
+		err = platform_device_register(&p->leds_pdev);
+		if (err) {
+			printk(KERN_ERR "fhc: Could not register LEDS "
+			       "platform device\n");
+			goto out_unmap_pregs;
+		}
+	}
+	reg = upa_readl(p->pregs + FHC_PREGS_CTRL);
+
+	if (!p->central)
+		reg |= FHC_CONTROL_IXIST;
+
+	reg &= ~(FHC_CONTROL_AOFF |
+		 FHC_CONTROL_BOFF |
+		 FHC_CONTROL_SLINE);
+
+	upa_writel(reg, p->pregs + FHC_PREGS_CTRL);
+	upa_readl(p->pregs + FHC_PREGS_CTRL);
+
+	reg = upa_readl(p->pregs + FHC_PREGS_ID);
+	printk(KERN_INFO "fhc: Board #%d, Version[%x] PartID[%x] Manuf[%x] %s\n",
+	       p->board_num,
+	       (reg & FHC_ID_VERS) >> 28,
+	       (reg & FHC_ID_PARTID) >> 12,
+	       (reg & FHC_ID_MANUF) >> 1,
+	       (p->jtag_master ?
+		"(JTAG Master)" :
+		(p->central ? "(Central)" : "")));
+
+	err = 0;
+
+out:
+	return err;
+
+out_unmap_pregs:
+	of_iounmap(&op->resource[0], p->pregs, resource_size(&op->resource[0]));
+
+out_free:
+	kfree(p);
+	goto out;
+}
+
+static const struct of_device_id fhc_match[] = {
+	{
+		.name = "fhc",
+	},
+	{},
+};
+
+static struct platform_driver fhc_driver = {
+	.probe		= fhc_probe,
+	.driver = {
+		.name = "fhc",
+		.of_match_table = fhc_match,
+	},
+};
+
+static int __init sunfire_init(void)
+{
+	(void) platform_driver_register(&fhc_driver);
+	(void) platform_driver_register(&clock_board_driver);
+	return 0;
+}
+
+fs_initcall(sunfire_init);
diff --git a/arch/sparc/kernel/cherrs.S b/arch/sparc/kernel/cherrs.S
new file mode 100644
index 0000000..7f3d3d2
--- /dev/null
+++ b/arch/sparc/kernel/cherrs.S
@@ -0,0 +1,576 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	/* These get patched into the trap table at boot time
+	 * once we know we have a cheetah processor.
+	 */
+	.globl		cheetah_fecc_trap_vector
+	.type		cheetah_fecc_trap_vector,#function
+cheetah_fecc_trap_vector:
+	membar		#Sync
+	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
+	andn		%g1, DCU_DC | DCU_IC, %g1
+	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
+	membar		#Sync
+	sethi		%hi(cheetah_fast_ecc), %g2
+	jmpl		%g2 + %lo(cheetah_fast_ecc), %g0
+	 mov		0, %g1
+	.size		cheetah_fecc_trap_vector,.-cheetah_fecc_trap_vector
+
+	.globl		cheetah_fecc_trap_vector_tl1
+	.type		cheetah_fecc_trap_vector_tl1,#function
+cheetah_fecc_trap_vector_tl1:
+	membar		#Sync
+	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
+	andn		%g1, DCU_DC | DCU_IC, %g1
+	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
+	membar		#Sync
+	sethi		%hi(cheetah_fast_ecc), %g2
+	jmpl		%g2 + %lo(cheetah_fast_ecc), %g0
+	 mov		1, %g1
+	.size		cheetah_fecc_trap_vector_tl1,.-cheetah_fecc_trap_vector_tl1
+
+	.globl	cheetah_cee_trap_vector
+	.type	cheetah_cee_trap_vector,#function
+cheetah_cee_trap_vector:
+	membar		#Sync
+	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
+	andn		%g1, DCU_IC, %g1
+	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
+	membar		#Sync
+	sethi		%hi(cheetah_cee), %g2
+	jmpl		%g2 + %lo(cheetah_cee), %g0
+	 mov		0, %g1
+	.size		cheetah_cee_trap_vector,.-cheetah_cee_trap_vector
+
+	.globl		cheetah_cee_trap_vector_tl1
+	.type		cheetah_cee_trap_vector_tl1,#function
+cheetah_cee_trap_vector_tl1:
+	membar		#Sync
+	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
+	andn		%g1, DCU_IC, %g1
+	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
+	membar		#Sync
+	sethi		%hi(cheetah_cee), %g2
+	jmpl		%g2 + %lo(cheetah_cee), %g0
+	 mov		1, %g1
+	.size		cheetah_cee_trap_vector_tl1,.-cheetah_cee_trap_vector_tl1
+
+	.globl	cheetah_deferred_trap_vector
+	.type	cheetah_deferred_trap_vector,#function
+cheetah_deferred_trap_vector:
+	membar		#Sync
+	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1;
+	andn		%g1, DCU_DC | DCU_IC, %g1;
+	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG;
+	membar		#Sync;
+	sethi		%hi(cheetah_deferred_trap), %g2
+	jmpl		%g2 + %lo(cheetah_deferred_trap), %g0
+	 mov		0, %g1
+	.size		cheetah_deferred_trap_vector,.-cheetah_deferred_trap_vector
+
+	.globl		cheetah_deferred_trap_vector_tl1
+	.type		cheetah_deferred_trap_vector_tl1,#function
+cheetah_deferred_trap_vector_tl1:
+	membar		#Sync;
+	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1;
+	andn		%g1, DCU_DC | DCU_IC, %g1;
+	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG;
+	membar		#Sync;
+	sethi		%hi(cheetah_deferred_trap), %g2
+	jmpl		%g2 + %lo(cheetah_deferred_trap), %g0
+	 mov		1, %g1
+	.size		cheetah_deferred_trap_vector_tl1,.-cheetah_deferred_trap_vector_tl1
+
+	/* Cheetah+ specific traps. These are for the new I/D cache parity
+	 * error traps.  The first argument to cheetah_plus_parity_handler
+	 * is encoded as follows:
+	 *
+	 * Bit0:	0=dcache,1=icache
+	 * Bit1:	0=recoverable,1=unrecoverable
+	 */
+	.globl		cheetah_plus_dcpe_trap_vector
+	.type		cheetah_plus_dcpe_trap_vector,#function
+cheetah_plus_dcpe_trap_vector:
+	membar		#Sync
+	sethi		%hi(do_cheetah_plus_data_parity), %g7
+	jmpl		%g7 + %lo(do_cheetah_plus_data_parity), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	.size		cheetah_plus_dcpe_trap_vector,.-cheetah_plus_dcpe_trap_vector
+
+	.type		do_cheetah_plus_data_parity,#function
+do_cheetah_plus_data_parity:
+	rdpr		%pil, %g2
+	wrpr		%g0, PIL_NORMAL_MAX, %pil
+	ba,pt		%xcc, etrap_irq
+	 rd		%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	mov		0x0, %o0
+	call		cheetah_plus_parity_error
+	 add		%sp, PTREGS_OFF, %o1
+	ba,a,pt		%xcc, rtrap_irq
+	.size		do_cheetah_plus_data_parity,.-do_cheetah_plus_data_parity
+
+	.globl		cheetah_plus_dcpe_trap_vector_tl1
+	.type		cheetah_plus_dcpe_trap_vector_tl1,#function
+cheetah_plus_dcpe_trap_vector_tl1:
+	membar		#Sync
+	wrpr		PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
+	sethi		%hi(do_dcpe_tl1), %g3
+	jmpl		%g3 + %lo(do_dcpe_tl1), %g0
+	 nop
+	nop
+	nop
+	nop
+	.size		cheetah_plus_dcpe_trap_vector_tl1,.-cheetah_plus_dcpe_trap_vector_tl1
+
+	.globl		cheetah_plus_icpe_trap_vector
+	.type		cheetah_plus_icpe_trap_vector,#function
+cheetah_plus_icpe_trap_vector:
+	membar		#Sync
+	sethi		%hi(do_cheetah_plus_insn_parity), %g7
+	jmpl		%g7 + %lo(do_cheetah_plus_insn_parity), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	.size		cheetah_plus_icpe_trap_vector,.-cheetah_plus_icpe_trap_vector
+
+	.type		do_cheetah_plus_insn_parity,#function
+do_cheetah_plus_insn_parity:
+	rdpr		%pil, %g2
+	wrpr		%g0, PIL_NORMAL_MAX, %pil
+	ba,pt		%xcc, etrap_irq
+	 rd		%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	mov		0x1, %o0
+	call		cheetah_plus_parity_error
+	 add		%sp, PTREGS_OFF, %o1
+	ba,a,pt		%xcc, rtrap_irq
+	.size		do_cheetah_plus_insn_parity,.-do_cheetah_plus_insn_parity
+
+	.globl		cheetah_plus_icpe_trap_vector_tl1
+	.type		cheetah_plus_icpe_trap_vector_tl1,#function
+cheetah_plus_icpe_trap_vector_tl1:
+	membar		#Sync
+	wrpr		PSTATE_IG | PSTATE_PEF | PSTATE_PRIV, %pstate
+	sethi		%hi(do_icpe_tl1), %g3
+	jmpl		%g3 + %lo(do_icpe_tl1), %g0
+	 nop
+	nop
+	nop
+	nop
+	.size		cheetah_plus_icpe_trap_vector_tl1,.-cheetah_plus_icpe_trap_vector_tl1
+
+	/* If we take one of these traps when tl >= 1, then we
+	 * jump to interrupt globals.  If some trap level above us
+	 * was also using interrupt globals, we cannot recover.
+	 * We may use all interrupt global registers except %g6.
+	 */
+	.globl		do_dcpe_tl1
+	.type		do_dcpe_tl1,#function
+do_dcpe_tl1:
+	rdpr		%tl, %g1		! Save original trap level
+	mov		1, %g2			! Setup TSTATE checking loop
+	sethi		%hi(TSTATE_IG), %g3	! TSTATE mask bit
+1:	wrpr		%g2, %tl		! Set trap level to check
+	rdpr		%tstate, %g4		! Read TSTATE for this level
+	andcc		%g4, %g3, %g0		! Interrupt globals in use?
+	bne,a,pn	%xcc, do_dcpe_tl1_fatal	! Yep, irrecoverable
+	 wrpr		%g1, %tl		! Restore original trap level
+	add		%g2, 1, %g2		! Next trap level
+	cmp		%g2, %g1		! Hit them all yet?
+	ble,pt		%icc, 1b		! Not yet
+	 nop
+	wrpr		%g1, %tl		! Restore original trap level
+do_dcpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
+	sethi		%hi(dcache_parity_tl1_occurred), %g2
+	lduw		[%g2 + %lo(dcache_parity_tl1_occurred)], %g1
+	add		%g1, 1, %g1
+	stw		%g1, [%g2 + %lo(dcache_parity_tl1_occurred)]
+	/* Reset D-cache parity */
+	sethi		%hi(1 << 16), %g1	! D-cache size
+	mov		(1 << 5), %g2		! D-cache line size
+	sub		%g1, %g2, %g1		! Move down 1 cacheline
+1:	srl		%g1, 14, %g3		! Compute UTAG
+	membar		#Sync
+	stxa		%g3, [%g1] ASI_DCACHE_UTAG
+	membar		#Sync
+	sub		%g2, 8, %g3		! 64-bit data word within line
+2:	membar		#Sync
+	stxa		%g0, [%g1 + %g3] ASI_DCACHE_DATA
+	membar		#Sync
+	subcc		%g3, 8, %g3		! Next 64-bit data word
+	bge,pt		%icc, 2b
+	 nop
+	subcc		%g1, %g2, %g1		! Next cacheline
+	bge,pt		%icc, 1b
+	 nop
+	ba,a,pt		%xcc, dcpe_icpe_tl1_common
+
+do_dcpe_tl1_fatal:
+	sethi		%hi(1f), %g7
+	ba,pt		%xcc, etraptl1
+1:	or		%g7, %lo(1b), %g7
+	mov		0x2, %o0
+	call		cheetah_plus_parity_error
+	 add		%sp, PTREGS_OFF, %o1
+	ba,a,pt		%xcc, rtrap
+	.size		do_dcpe_tl1,.-do_dcpe_tl1
+
+	.globl		do_icpe_tl1
+	.type		do_icpe_tl1,#function
+do_icpe_tl1:
+	rdpr		%tl, %g1		! Save original trap level
+	mov		1, %g2			! Setup TSTATE checking loop
+	sethi		%hi(TSTATE_IG), %g3	! TSTATE mask bit
+1:	wrpr		%g2, %tl		! Set trap level to check
+	rdpr		%tstate, %g4		! Read TSTATE for this level
+	andcc		%g4, %g3, %g0		! Interrupt globals in use?
+	bne,a,pn	%xcc, do_icpe_tl1_fatal	! Yep, irrecoverable
+	 wrpr		%g1, %tl		! Restore original trap level
+	add		%g2, 1, %g2		! Next trap level
+	cmp		%g2, %g1		! Hit them all yet?
+	ble,pt		%icc, 1b		! Not yet
+	 nop
+	wrpr		%g1, %tl		! Restore original trap level
+do_icpe_tl1_nonfatal:	/* Ok we may use interrupt globals safely. */
+	sethi		%hi(icache_parity_tl1_occurred), %g2
+	lduw		[%g2 + %lo(icache_parity_tl1_occurred)], %g1
+	add		%g1, 1, %g1
+	stw		%g1, [%g2 + %lo(icache_parity_tl1_occurred)]
+	/* Flush I-cache */
+	sethi		%hi(1 << 15), %g1	! I-cache size
+	mov		(1 << 5), %g2		! I-cache line size
+	sub		%g1, %g2, %g1
+1:	or		%g1, (2 << 3), %g3
+	stxa		%g0, [%g3] ASI_IC_TAG
+	membar		#Sync
+	subcc		%g1, %g2, %g1
+	bge,pt		%icc, 1b
+	 nop
+	ba,a,pt		%xcc, dcpe_icpe_tl1_common
+
+do_icpe_tl1_fatal:
+	sethi		%hi(1f), %g7
+	ba,pt		%xcc, etraptl1
+1:	or		%g7, %lo(1b), %g7
+	mov		0x3, %o0
+	call		cheetah_plus_parity_error
+	 add		%sp, PTREGS_OFF, %o1
+	ba,a,pt		%xcc, rtrap
+	.size		do_icpe_tl1,.-do_icpe_tl1
+	
+	.type		dcpe_icpe_tl1_common,#function
+dcpe_icpe_tl1_common:
+	/* Flush D-cache, re-enable D/I caches in DCU and finally
+	 * retry the trapping instruction.
+	 */
+	sethi		%hi(1 << 16), %g1	! D-cache size
+	mov		(1 << 5), %g2		! D-cache line size
+	sub		%g1, %g2, %g1
+1:	stxa		%g0, [%g1] ASI_DCACHE_TAG
+	membar		#Sync
+	subcc		%g1, %g2, %g1
+	bge,pt		%icc, 1b
+	 nop
+	ldxa		[%g0] ASI_DCU_CONTROL_REG, %g1
+	or		%g1, (DCU_DC | DCU_IC), %g1
+	stxa		%g1, [%g0] ASI_DCU_CONTROL_REG
+	membar		#Sync
+	retry
+	.size		dcpe_icpe_tl1_common,.-dcpe_icpe_tl1_common
+
+	/* Capture I/D/E-cache state into per-cpu error scoreboard.
+	 *
+	 * %g1:		(TL>=0) ? 1 : 0
+	 * %g2:		scratch
+	 * %g3:		scratch
+	 * %g4:		AFSR
+	 * %g5:		AFAR
+	 * %g6:		unused, will have current thread ptr after etrap
+	 * %g7:		scratch
+	 */
+	.type		__cheetah_log_error,#function
+__cheetah_log_error:
+	/* Put "TL1" software bit into AFSR. */
+	and		%g1, 0x1, %g1
+	sllx		%g1, 63, %g2
+	or		%g4, %g2, %g4
+
+	/* Get log entry pointer for this cpu at this trap level. */
+	BRANCH_IF_JALAPENO(g2,g3,50f)
+	ldxa		[%g0] ASI_SAFARI_CONFIG, %g2
+	srlx		%g2, 17, %g2
+	ba,pt		%xcc, 60f
+	 and		%g2, 0x3ff, %g2
+
+50:	ldxa		[%g0] ASI_JBUS_CONFIG, %g2
+	srlx		%g2, 17, %g2
+	and		%g2, 0x1f, %g2
+
+60:	sllx		%g2, 9, %g2
+	sethi		%hi(cheetah_error_log), %g3
+	ldx		[%g3 + %lo(cheetah_error_log)], %g3
+	brz,pn		%g3, 80f
+	 nop
+
+	add		%g3, %g2, %g3
+	sllx		%g1, 8, %g1
+	add		%g3, %g1, %g1
+
+	/* %g1 holds pointer to the top of the logging scoreboard */
+	ldx		[%g1 + 0x0], %g7
+	cmp		%g7, -1
+	bne,pn		%xcc, 80f
+	 nop
+
+	stx		%g4, [%g1 + 0x0]
+	stx		%g5, [%g1 + 0x8]
+	add		%g1, 0x10, %g1
+
+	/* %g1 now points to D-cache logging area */
+	set		0x3ff8, %g2	/* DC_addr mask		*/
+	and		%g5, %g2, %g2	/* DC_addr bits of AFAR	*/
+	srlx		%g5, 12, %g3
+	or		%g3, 1, %g3	/* PHYS tag + valid	*/
+
+10:	ldxa		[%g2] ASI_DCACHE_TAG, %g7
+	cmp		%g3, %g7	/* TAG match?		*/
+	bne,pt		%xcc, 13f
+	 nop
+
+	/* Yep, what we want, capture state. */
+	stx		%g2, [%g1 + 0x20]
+	stx		%g7, [%g1 + 0x28]
+
+	/* A membar Sync is required before and after utag access. */
+	membar		#Sync
+	ldxa		[%g2] ASI_DCACHE_UTAG, %g7
+	membar		#Sync
+	stx		%g7, [%g1 + 0x30]
+	ldxa		[%g2] ASI_DCACHE_SNOOP_TAG, %g7
+	stx		%g7, [%g1 + 0x38]
+	clr		%g3
+
+12:	ldxa		[%g2 + %g3] ASI_DCACHE_DATA, %g7
+	stx		%g7, [%g1]
+	add		%g3, (1 << 5), %g3
+	cmp		%g3, (4 << 5)
+	bl,pt		%xcc, 12b
+	 add		%g1, 0x8, %g1
+
+	ba,pt		%xcc, 20f
+	 add		%g1, 0x20, %g1
+
+13:	sethi		%hi(1 << 14), %g7
+	add		%g2, %g7, %g2
+	srlx		%g2, 14, %g7
+	cmp		%g7, 4
+	bl,pt		%xcc, 10b
+	 nop
+
+	add		%g1, 0x40, %g1
+
+	/* %g1 now points to I-cache logging area */
+20:	set		0x1fe0, %g2	/* IC_addr mask		*/
+	and		%g5, %g2, %g2	/* IC_addr bits of AFAR	*/
+	sllx		%g2, 1, %g2	/* IC_addr[13:6]==VA[12:5] */
+	srlx		%g5, (13 - 8), %g3 /* Make PTAG */
+	andn		%g3, 0xff, %g3	/* Mask off undefined bits */
+
+21:	ldxa		[%g2] ASI_IC_TAG, %g7
+	andn		%g7, 0xff, %g7
+	cmp		%g3, %g7
+	bne,pt		%xcc, 23f
+	 nop
+
+	/* Yep, what we want, capture state. */
+	stx		%g2, [%g1 + 0x40]
+	stx		%g7, [%g1 + 0x48]
+	add		%g2, (1 << 3), %g2
+	ldxa		[%g2] ASI_IC_TAG, %g7
+	add		%g2, (1 << 3), %g2
+	stx		%g7, [%g1 + 0x50]
+	ldxa		[%g2] ASI_IC_TAG, %g7
+	add		%g2, (1 << 3), %g2
+	stx		%g7, [%g1 + 0x60]
+	ldxa		[%g2] ASI_IC_TAG, %g7
+	stx		%g7, [%g1 + 0x68]
+	sub		%g2, (3 << 3), %g2
+	ldxa		[%g2] ASI_IC_STAG, %g7
+	stx		%g7, [%g1 + 0x58]
+	clr		%g3
+	srlx		%g2, 2, %g2
+
+22:	ldxa		[%g2 + %g3] ASI_IC_INSTR, %g7
+	stx		%g7, [%g1]
+	add		%g3, (1 << 3), %g3
+	cmp		%g3, (8 << 3)
+	bl,pt		%xcc, 22b
+	 add		%g1, 0x8, %g1
+
+	ba,pt		%xcc, 30f
+	 add		%g1, 0x30, %g1
+
+23:	sethi		%hi(1 << 14), %g7
+	add		%g2, %g7, %g2
+	srlx		%g2, 14, %g7
+	cmp		%g7, 4
+	bl,pt		%xcc, 21b
+	 nop
+
+	add		%g1, 0x70, %g1
+
+	/* %g1 now points to E-cache logging area */
+30:	andn		%g5, (32 - 1), %g2
+	stx		%g2, [%g1 + 0x20]
+	ldxa		[%g2] ASI_EC_TAG_DATA, %g7
+	stx		%g7, [%g1 + 0x28]
+	ldxa		[%g2] ASI_EC_R, %g0
+	clr		%g3
+
+31:	ldxa		[%g3] ASI_EC_DATA, %g7
+	stx		%g7, [%g1 + %g3]
+	add		%g3, 0x8, %g3
+	cmp		%g3, 0x20
+
+	bl,pt		%xcc, 31b
+	 nop
+80:
+	rdpr		%tt, %g2
+	cmp		%g2, 0x70
+	be		c_fast_ecc
+	 cmp		%g2, 0x63
+	be		c_cee
+	 nop
+	ba,a,pt		%xcc, c_deferred
+	.size		__cheetah_log_error,.-__cheetah_log_error
+
+	/* Cheetah FECC trap handling, we get here from tl{0,1}_fecc
+	 * in the trap table.  That code has done a memory barrier
+	 * and has disabled both the I-cache and D-cache in the DCU
+	 * control register.  The I-cache is disabled so that we may
+	 * capture the corrupted cache line, and the D-cache is disabled
+	 * because corrupt data may have been placed there and we don't
+	 * want to reference it.
+	 *
+	 * %g1 is one if this trap occurred at %tl >= 1.
+	 *
+	 * Next, we turn off error reporting so that we don't recurse.
+	 */
+	.globl		cheetah_fast_ecc
+	.type		cheetah_fast_ecc,#function
+cheetah_fast_ecc:
+	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g2
+	andn		%g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
+	stxa		%g2, [%g0] ASI_ESTATE_ERROR_EN
+	membar		#Sync
+
+	/* Fetch and clear AFSR/AFAR */
+	ldxa		[%g0] ASI_AFSR, %g4
+	ldxa		[%g0] ASI_AFAR, %g5
+	stxa		%g4, [%g0] ASI_AFSR
+	membar		#Sync
+
+	ba,pt		%xcc, __cheetah_log_error
+	 nop
+	.size		cheetah_fast_ecc,.-cheetah_fast_ecc
+
+	.type		c_fast_ecc,#function
+c_fast_ecc:
+	rdpr		%pil, %g2
+	wrpr		%g0, PIL_NORMAL_MAX, %pil
+	ba,pt		%xcc, etrap_irq
+	 rd		%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		cheetah_fecc_handler
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap_irq
+	.size		c_fast_ecc,.-c_fast_ecc
+
+	/* Our caller has disabled I-cache and performed membar Sync. */
+	.globl		cheetah_cee
+	.type		cheetah_cee,#function
+cheetah_cee:
+	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g2
+	andn		%g2, ESTATE_ERROR_CEEN, %g2
+	stxa		%g2, [%g0] ASI_ESTATE_ERROR_EN
+	membar		#Sync
+
+	/* Fetch and clear AFSR/AFAR */
+	ldxa		[%g0] ASI_AFSR, %g4
+	ldxa		[%g0] ASI_AFAR, %g5
+	stxa		%g4, [%g0] ASI_AFSR
+	membar		#Sync
+
+	ba,pt		%xcc, __cheetah_log_error
+	 nop
+	.size		cheetah_cee,.-cheetah_cee
+
+	.type		c_cee,#function
+c_cee:
+	rdpr		%pil, %g2
+	wrpr		%g0, PIL_NORMAL_MAX, %pil
+	ba,pt		%xcc, etrap_irq
+	 rd		%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		cheetah_cee_handler
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap_irq
+	.size		c_cee,.-c_cee
+
+	/* Our caller has disabled I-cache+D-cache and performed membar Sync. */
+	.globl		cheetah_deferred_trap
+	.type		cheetah_deferred_trap,#function
+cheetah_deferred_trap:
+	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g2
+	andn		%g2, ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN, %g2
+	stxa		%g2, [%g0] ASI_ESTATE_ERROR_EN
+	membar		#Sync
+
+	/* Fetch and clear AFSR/AFAR */
+	ldxa		[%g0] ASI_AFSR, %g4
+	ldxa		[%g0] ASI_AFAR, %g5
+	stxa		%g4, [%g0] ASI_AFSR
+	membar		#Sync
+
+	ba,pt		%xcc, __cheetah_log_error
+	 nop
+	.size		cheetah_deferred_trap,.-cheetah_deferred_trap
+
+	.type		c_deferred,#function
+c_deferred:
+	rdpr		%pil, %g2
+	wrpr		%g0, PIL_NORMAL_MAX, %pil
+	ba,pt		%xcc, etrap_irq
+	 rd		%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		cheetah_deferred_handler
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap_irq
+	.size		c_deferred,.-c_deferred
diff --git a/arch/sparc/kernel/chmc.c b/arch/sparc/kernel/chmc.c
new file mode 100644
index 0000000..0de4bcb
--- /dev/null
+++ b/arch/sparc/kernel/chmc.c
@@ -0,0 +1,863 @@
+/* chmc.c: Driver for UltraSPARC-III memory controller.
+ *
+ * Copyright (C) 2001, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <asm/spitfire.h>
+#include <asm/chmctrl.h>
+#include <asm/cpudata.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/head.h>
+#include <asm/io.h>
+#include <asm/memctrl.h>
+
+#define DRV_MODULE_NAME		"chmc"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"0.2"
+
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("UltraSPARC-III memory controller driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+static int mc_type;
+#define MC_TYPE_SAFARI		1
+#define MC_TYPE_JBUS		2
+
+static dimm_printer_t us3mc_dimm_printer;
+
+#define CHMCTRL_NDGRPS	2
+#define CHMCTRL_NDIMMS	4
+
+#define CHMC_DIMMS_PER_MC	(CHMCTRL_NDGRPS * CHMCTRL_NDIMMS)
+
+/* OBP memory-layout property format. */
+struct chmc_obp_map {
+	unsigned char	dimm_map[144];
+	unsigned char	pin_map[576];
+};
+
+#define DIMM_LABEL_SZ	8
+
+struct chmc_obp_mem_layout {
+	/* One max 8-byte string label per DIMM.  Usually
+	 * this matches the label on the motherboard where
+	 * that DIMM resides.
+	 */
+	char			dimm_labels[CHMC_DIMMS_PER_MC][DIMM_LABEL_SZ];
+
+	/* If symmetric use map[0], else it is
+	 * asymmetric and map[1] should be used.
+	 */
+	char			symmetric;
+
+	struct chmc_obp_map	map[2];
+};
+
+#define CHMCTRL_NBANKS	4
+
+struct chmc_bank_info {
+	struct chmc		*p;
+	int			bank_id;
+
+	u64			raw_reg;
+	int			valid;
+	int			uk;
+	int			um;
+	int			lk;
+	int			lm;
+	int			interleave;
+	unsigned long		base;
+	unsigned long		size;
+};
+
+struct chmc {
+	struct list_head		list;
+	int				portid;
+
+	struct chmc_obp_mem_layout	layout_prop;
+	int				layout_size;
+
+	void __iomem			*regs;
+
+	u64				timing_control1;
+	u64				timing_control2;
+	u64				timing_control3;
+	u64				timing_control4;
+	u64				memaddr_control;
+
+	struct chmc_bank_info		logical_banks[CHMCTRL_NBANKS];
+};
+
+#define JBUSMC_REGS_SIZE		8
+
+#define JB_MC_REG1_DIMM2_BANK3		0x8000000000000000UL
+#define JB_MC_REG1_DIMM1_BANK1		0x4000000000000000UL
+#define JB_MC_REG1_DIMM2_BANK2		0x2000000000000000UL
+#define JB_MC_REG1_DIMM1_BANK0		0x1000000000000000UL
+#define JB_MC_REG1_XOR			0x0000010000000000UL
+#define JB_MC_REG1_ADDR_GEN_2		0x000000e000000000UL
+#define JB_MC_REG1_ADDR_GEN_2_SHIFT	37
+#define JB_MC_REG1_ADDR_GEN_1		0x0000001c00000000UL
+#define JB_MC_REG1_ADDR_GEN_1_SHIFT	34
+#define JB_MC_REG1_INTERLEAVE		0x0000000001800000UL
+#define JB_MC_REG1_INTERLEAVE_SHIFT	23
+#define JB_MC_REG1_DIMM2_PTYPE		0x0000000000200000UL
+#define JB_MC_REG1_DIMM2_PTYPE_SHIFT	21
+#define JB_MC_REG1_DIMM1_PTYPE		0x0000000000100000UL
+#define JB_MC_REG1_DIMM1_PTYPE_SHIFT	20
+
+#define PART_TYPE_X8		0
+#define PART_TYPE_X4		1
+
+#define INTERLEAVE_NONE		0
+#define INTERLEAVE_SAME		1
+#define INTERLEAVE_INTERNAL	2
+#define INTERLEAVE_BOTH		3
+
+#define ADDR_GEN_128MB		0
+#define ADDR_GEN_256MB		1
+#define ADDR_GEN_512MB		2
+#define ADDR_GEN_1GB		3
+
+#define JB_NUM_DIMM_GROUPS	2
+#define JB_NUM_DIMMS_PER_GROUP	2
+#define JB_NUM_DIMMS		(JB_NUM_DIMM_GROUPS * JB_NUM_DIMMS_PER_GROUP)
+
+struct jbusmc_obp_map {
+	unsigned char	dimm_map[18];
+	unsigned char	pin_map[144];
+};
+
+struct jbusmc_obp_mem_layout {
+	/* One max 8-byte string label per DIMM.  Usually
+	 * this matches the label on the motherboard where
+	 * that DIMM resides.
+	 */
+	char		dimm_labels[JB_NUM_DIMMS][DIMM_LABEL_SZ];
+
+	/* If symmetric use map[0], else it is
+	 * asymmetric and map[1] should be used.
+	 */
+	char			symmetric;
+
+	struct jbusmc_obp_map	map;
+
+	char			_pad;
+};
+
+struct jbusmc_dimm_group {
+	struct jbusmc			*controller;
+	int				index;
+	u64				base_addr;
+	u64				size;
+};
+
+struct jbusmc {
+	void __iomem			*regs;
+	u64				mc_reg_1;
+	u32				portid;
+	struct jbusmc_obp_mem_layout	layout;
+	int				layout_len;
+	int				num_dimm_groups;
+	struct jbusmc_dimm_group	dimm_groups[JB_NUM_DIMM_GROUPS];
+	struct list_head		list;
+};
+
+static DEFINE_SPINLOCK(mctrl_list_lock);
+static LIST_HEAD(mctrl_list);
+
+static void mc_list_add(struct list_head *list)
+{
+	spin_lock(&mctrl_list_lock);
+	list_add(list, &mctrl_list);
+	spin_unlock(&mctrl_list_lock);
+}
+
+static void mc_list_del(struct list_head *list)
+{
+	spin_lock(&mctrl_list_lock);
+	list_del_init(list);
+	spin_unlock(&mctrl_list_lock);
+}
+
+#define SYNDROME_MIN	-1
+#define SYNDROME_MAX	144
+
+/* Covert syndrome code into the way the bits are positioned
+ * on the bus.
+ */
+static int syndrome_to_qword_code(int syndrome_code)
+{
+	if (syndrome_code < 128)
+		syndrome_code += 16;
+	else if (syndrome_code < 128 + 9)
+		syndrome_code -= (128 - 7);
+	else if (syndrome_code < (128 + 9 + 3))
+		syndrome_code -= (128 + 9 - 4);
+	else
+		syndrome_code -= (128 + 9 + 3);
+	return syndrome_code;
+}
+
+/* All this magic has to do with how a cache line comes over the wire
+ * on Safari and JBUS.  A 64-bit line comes over in 1 or more quadword
+ * cycles, each of which transmit ECC/MTAG info as well as the actual
+ * data.
+ */
+#define L2_LINE_SIZE		64
+#define L2_LINE_ADDR_MSK	(L2_LINE_SIZE - 1)
+#define QW_PER_LINE		4
+#define QW_BYTES		(L2_LINE_SIZE / QW_PER_LINE)
+#define QW_BITS			144
+#define SAFARI_LAST_BIT		(576 - 1)
+#define JBUS_LAST_BIT		(144 - 1)
+
+static void get_pin_and_dimm_str(int syndrome_code, unsigned long paddr,
+				 int *pin_p, char **dimm_str_p, void *_prop,
+				 int base_dimm_offset)
+{
+	int qword_code = syndrome_to_qword_code(syndrome_code);
+	int cache_line_offset;
+	int offset_inverse;
+	int dimm_map_index;
+	int map_val;
+
+	if (mc_type == MC_TYPE_JBUS) {
+		struct jbusmc_obp_mem_layout *p = _prop;
+
+		/* JBUS */
+		cache_line_offset = qword_code;
+		offset_inverse = (JBUS_LAST_BIT - cache_line_offset);
+		dimm_map_index = offset_inverse / 8;
+		map_val = p->map.dimm_map[dimm_map_index];
+		map_val = ((map_val >> ((7 - (offset_inverse & 7)))) & 1);
+		*dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+		*pin_p = p->map.pin_map[cache_line_offset];
+	} else {
+		struct chmc_obp_mem_layout *p = _prop;
+		struct chmc_obp_map *mp;
+		int qword;
+
+		/* Safari */
+		if (p->symmetric)
+			mp = &p->map[0];
+		else
+			mp = &p->map[1];
+
+		qword = (paddr & L2_LINE_ADDR_MSK) / QW_BYTES;
+		cache_line_offset = ((3 - qword) * QW_BITS) + qword_code;
+		offset_inverse = (SAFARI_LAST_BIT - cache_line_offset);
+		dimm_map_index = offset_inverse >> 2;
+		map_val = mp->dimm_map[dimm_map_index];
+		map_val = ((map_val >> ((3 - (offset_inverse & 3)) << 1)) & 0x3);
+		*dimm_str_p = p->dimm_labels[base_dimm_offset + map_val];
+		*pin_p = mp->pin_map[cache_line_offset];
+	}
+}
+
+static struct jbusmc_dimm_group *jbusmc_find_dimm_group(unsigned long phys_addr)
+{
+	struct jbusmc *p;
+
+	list_for_each_entry(p, &mctrl_list, list) {
+		int i;
+
+		for (i = 0; i < p->num_dimm_groups; i++) {
+			struct jbusmc_dimm_group *dp = &p->dimm_groups[i];
+
+			if (phys_addr < dp->base_addr ||
+			    (dp->base_addr + dp->size) <= phys_addr)
+				continue;
+
+			return dp;
+		}
+	}
+	return NULL;
+}
+
+static int jbusmc_print_dimm(int syndrome_code,
+			     unsigned long phys_addr,
+			     char *buf, int buflen)
+{
+	struct jbusmc_obp_mem_layout *prop;
+	struct jbusmc_dimm_group *dp;
+	struct jbusmc *p;
+	int first_dimm;
+
+	dp = jbusmc_find_dimm_group(phys_addr);
+	if (dp == NULL ||
+	    syndrome_code < SYNDROME_MIN ||
+	    syndrome_code > SYNDROME_MAX) {
+		buf[0] = '?';
+		buf[1] = '?';
+		buf[2] = '?';
+		buf[3] = '\0';
+		return 0;
+	}
+	p = dp->controller;
+	prop = &p->layout;
+
+	first_dimm = dp->index * JB_NUM_DIMMS_PER_GROUP;
+
+	if (syndrome_code != SYNDROME_MIN) {
+		char *dimm_str;
+		int pin;
+
+		get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+				     &dimm_str, prop, first_dimm);
+		sprintf(buf, "%s, pin %3d", dimm_str, pin);
+	} else {
+		int dimm;
+
+		/* Multi-bit error, we just dump out all the
+		 * dimm labels associated with this dimm group.
+		 */
+		for (dimm = 0; dimm < JB_NUM_DIMMS_PER_GROUP; dimm++) {
+			sprintf(buf, "%s ",
+				prop->dimm_labels[first_dimm + dimm]);
+			buf += strlen(buf);
+		}
+	}
+
+	return 0;
+}
+
+static u64 jbusmc_dimm_group_size(u64 base,
+				  const struct linux_prom64_registers *mem_regs,
+				  int num_mem_regs)
+{
+	u64 max = base + (8UL * 1024 * 1024 * 1024);
+	u64 max_seen = base;
+	int i;
+
+	for (i = 0; i < num_mem_regs; i++) {
+		const struct linux_prom64_registers *ent;
+		u64 this_base;
+		u64 this_end;
+
+		ent = &mem_regs[i];
+		this_base = ent->phys_addr;
+		this_end = this_base + ent->reg_size;
+		if (base < this_base || base >= this_end)
+			continue;
+		if (this_end > max)
+			this_end = max;
+		if (this_end > max_seen)
+			max_seen = this_end;
+	}
+
+	return max_seen - base;
+}
+
+static void jbusmc_construct_one_dimm_group(struct jbusmc *p,
+					    unsigned long index,
+					    const struct linux_prom64_registers *mem_regs,
+					    int num_mem_regs)
+{
+	struct jbusmc_dimm_group *dp = &p->dimm_groups[index];
+
+	dp->controller = p;
+	dp->index = index;
+
+	dp->base_addr  = (p->portid * (64UL * 1024 * 1024 * 1024));
+	dp->base_addr += (index * (8UL * 1024 * 1024 * 1024));
+	dp->size = jbusmc_dimm_group_size(dp->base_addr, mem_regs, num_mem_regs);
+}
+
+static void jbusmc_construct_dimm_groups(struct jbusmc *p,
+					 const struct linux_prom64_registers *mem_regs,
+					 int num_mem_regs)
+{
+	if (p->mc_reg_1 & JB_MC_REG1_DIMM1_BANK0) {
+		jbusmc_construct_one_dimm_group(p, 0, mem_regs, num_mem_regs);
+		p->num_dimm_groups++;
+	}
+	if (p->mc_reg_1 & JB_MC_REG1_DIMM2_BANK2) {
+		jbusmc_construct_one_dimm_group(p, 1, mem_regs, num_mem_regs);
+		p->num_dimm_groups++;
+	}
+}
+
+static int jbusmc_probe(struct platform_device *op)
+{
+	const struct linux_prom64_registers *mem_regs;
+	struct device_node *mem_node;
+	int err, len, num_mem_regs;
+	struct jbusmc *p;
+	const u32 *prop;
+	const void *ml;
+
+	err = -ENODEV;
+	mem_node = of_find_node_by_path("/memory");
+	if (!mem_node) {
+		printk(KERN_ERR PFX "Cannot find /memory node.\n");
+		goto out;
+	}
+	mem_regs = of_get_property(mem_node, "reg", &len);
+	if (!mem_regs) {
+		printk(KERN_ERR PFX "Cannot get reg property of /memory node.\n");
+		goto out;
+	}
+	num_mem_regs = len / sizeof(*mem_regs);
+
+	err = -ENOMEM;
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		printk(KERN_ERR PFX "Cannot allocate struct jbusmc.\n");
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&p->list);
+
+	err = -ENODEV;
+	prop = of_get_property(op->dev.of_node, "portid", &len);
+	if (!prop || len != 4) {
+		printk(KERN_ERR PFX "Cannot find portid.\n");
+		goto out_free;
+	}
+
+	p->portid = *prop;
+
+	prop = of_get_property(op->dev.of_node, "memory-control-register-1", &len);
+	if (!prop || len != 8) {
+		printk(KERN_ERR PFX "Cannot get memory control register 1.\n");
+		goto out_free;
+	}
+
+	p->mc_reg_1 = ((u64)prop[0] << 32) | (u64) prop[1];
+
+	err = -ENOMEM;
+	p->regs = of_ioremap(&op->resource[0], 0, JBUSMC_REGS_SIZE, "jbusmc");
+	if (!p->regs) {
+		printk(KERN_ERR PFX "Cannot map jbusmc regs.\n");
+		goto out_free;
+	}
+
+	err = -ENODEV;
+	ml = of_get_property(op->dev.of_node, "memory-layout", &p->layout_len);
+	if (!ml) {
+		printk(KERN_ERR PFX "Cannot get memory layout property.\n");
+		goto out_iounmap;
+	}
+	if (p->layout_len > sizeof(p->layout)) {
+		printk(KERN_ERR PFX "Unexpected memory-layout size %d\n",
+		       p->layout_len);
+		goto out_iounmap;
+	}
+	memcpy(&p->layout, ml, p->layout_len);
+
+	jbusmc_construct_dimm_groups(p, mem_regs, num_mem_regs);
+
+	mc_list_add(&p->list);
+
+	printk(KERN_INFO PFX "UltraSPARC-IIIi memory controller at %s\n",
+	       op->dev.of_node->full_name);
+
+	dev_set_drvdata(&op->dev, p);
+
+	err = 0;
+
+out:
+	return err;
+
+out_iounmap:
+	of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+
+out_free:
+	kfree(p);
+	goto out;
+}
+
+/* Does BANK decode PHYS_ADDR? */
+static int chmc_bank_match(struct chmc_bank_info *bp, unsigned long phys_addr)
+{
+	unsigned long upper_bits = (phys_addr & PA_UPPER_BITS) >> PA_UPPER_BITS_SHIFT;
+	unsigned long lower_bits = (phys_addr & PA_LOWER_BITS) >> PA_LOWER_BITS_SHIFT;
+
+	/* Bank must be enabled to match. */
+	if (bp->valid == 0)
+		return 0;
+
+	/* Would BANK match upper bits? */
+	upper_bits ^= bp->um;		/* What bits are different? */
+	upper_bits  = ~upper_bits;	/* Invert. */
+	upper_bits |= bp->uk;		/* What bits don't matter for matching? */
+	upper_bits  = ~upper_bits;	/* Invert. */
+
+	if (upper_bits)
+		return 0;
+
+	/* Would BANK match lower bits? */
+	lower_bits ^= bp->lm;		/* What bits are different? */
+	lower_bits  = ~lower_bits;	/* Invert. */
+	lower_bits |= bp->lk;		/* What bits don't matter for matching? */
+	lower_bits  = ~lower_bits;	/* Invert. */
+
+	if (lower_bits)
+		return 0;
+
+	/* I always knew you'd be the one. */
+	return 1;
+}
+
+/* Given PHYS_ADDR, search memory controller banks for a match. */
+static struct chmc_bank_info *chmc_find_bank(unsigned long phys_addr)
+{
+	struct chmc *p;
+
+	list_for_each_entry(p, &mctrl_list, list) {
+		int bank_no;
+
+		for (bank_no = 0; bank_no < CHMCTRL_NBANKS; bank_no++) {
+			struct chmc_bank_info *bp;
+
+			bp = &p->logical_banks[bank_no];
+			if (chmc_bank_match(bp, phys_addr))
+				return bp;
+		}
+	}
+
+	return NULL;
+}
+
+/* This is the main purpose of this driver. */
+static int chmc_print_dimm(int syndrome_code,
+			   unsigned long phys_addr,
+			   char *buf, int buflen)
+{
+	struct chmc_bank_info *bp;
+	struct chmc_obp_mem_layout *prop;
+	int bank_in_controller, first_dimm;
+
+	bp = chmc_find_bank(phys_addr);
+	if (bp == NULL ||
+	    syndrome_code < SYNDROME_MIN ||
+	    syndrome_code > SYNDROME_MAX) {
+		buf[0] = '?';
+		buf[1] = '?';
+		buf[2] = '?';
+		buf[3] = '\0';
+		return 0;
+	}
+
+	prop = &bp->p->layout_prop;
+	bank_in_controller = bp->bank_id & (CHMCTRL_NBANKS - 1);
+	first_dimm  = (bank_in_controller & (CHMCTRL_NDGRPS - 1));
+	first_dimm *= CHMCTRL_NDIMMS;
+
+	if (syndrome_code != SYNDROME_MIN) {
+		char *dimm_str;
+		int pin;
+
+		get_pin_and_dimm_str(syndrome_code, phys_addr, &pin,
+				     &dimm_str, prop, first_dimm);
+		sprintf(buf, "%s, pin %3d", dimm_str, pin);
+	} else {
+		int dimm;
+
+		/* Multi-bit error, we just dump out all the
+		 * dimm labels associated with this bank.
+		 */
+		for (dimm = 0; dimm < CHMCTRL_NDIMMS; dimm++) {
+			sprintf(buf, "%s ",
+				prop->dimm_labels[first_dimm + dimm]);
+			buf += strlen(buf);
+		}
+	}
+	return 0;
+}
+
+/* Accessing the registers is slightly complicated.  If you want
+ * to get at the memory controller which is on the same processor
+ * the code is executing, you must use special ASI load/store else
+ * you go through the global mapping.
+ */
+static u64 chmc_read_mcreg(struct chmc *p, unsigned long offset)
+{
+	unsigned long ret, this_cpu;
+
+	preempt_disable();
+
+	this_cpu = real_hard_smp_processor_id();
+
+	if (p->portid == this_cpu) {
+		__asm__ __volatile__("ldxa	[%1] %2, %0"
+				     : "=r" (ret)
+				     : "r" (offset), "i" (ASI_MCU_CTRL_REG));
+	} else {
+		__asm__ __volatile__("ldxa	[%1] %2, %0"
+				     : "=r" (ret)
+				     : "r" (p->regs + offset),
+				       "i" (ASI_PHYS_BYPASS_EC_E));
+	}
+
+	preempt_enable();
+
+	return ret;
+}
+
+#if 0 /* currently unused */
+static void chmc_write_mcreg(struct chmc *p, unsigned long offset, u64 val)
+{
+	if (p->portid == smp_processor_id()) {
+		__asm__ __volatile__("stxa	%0, [%1] %2"
+				     : : "r" (val),
+				         "r" (offset), "i" (ASI_MCU_CTRL_REG));
+	} else {
+		__asm__ __volatile__("ldxa	%0, [%1] %2"
+				     : : "r" (val),
+				         "r" (p->regs + offset),
+				         "i" (ASI_PHYS_BYPASS_EC_E));
+	}
+}
+#endif
+
+static void chmc_interpret_one_decode_reg(struct chmc *p, int which_bank, u64 val)
+{
+	struct chmc_bank_info *bp = &p->logical_banks[which_bank];
+
+	bp->p = p;
+	bp->bank_id = (CHMCTRL_NBANKS * p->portid) + which_bank;
+	bp->raw_reg = val;
+	bp->valid = (val & MEM_DECODE_VALID) >> MEM_DECODE_VALID_SHIFT;
+	bp->uk = (val & MEM_DECODE_UK) >> MEM_DECODE_UK_SHIFT;
+	bp->um = (val & MEM_DECODE_UM) >> MEM_DECODE_UM_SHIFT;
+	bp->lk = (val & MEM_DECODE_LK) >> MEM_DECODE_LK_SHIFT;
+	bp->lm = (val & MEM_DECODE_LM) >> MEM_DECODE_LM_SHIFT;
+
+	bp->base  =  (bp->um);
+	bp->base &= ~(bp->uk);
+	bp->base <<= PA_UPPER_BITS_SHIFT;
+
+	switch(bp->lk) {
+	case 0xf:
+	default:
+		bp->interleave = 1;
+		break;
+
+	case 0xe:
+		bp->interleave = 2;
+		break;
+
+	case 0xc:
+		bp->interleave = 4;
+		break;
+
+	case 0x8:
+		bp->interleave = 8;
+		break;
+
+	case 0x0:
+		bp->interleave = 16;
+		break;
+	}
+
+	/* UK[10] is reserved, and UK[11] is not set for the SDRAM
+	 * bank size definition.
+	 */
+	bp->size = (((unsigned long)bp->uk &
+		     ((1UL << 10UL) - 1UL)) + 1UL) << PA_UPPER_BITS_SHIFT;
+	bp->size /= bp->interleave;
+}
+
+static void chmc_fetch_decode_regs(struct chmc *p)
+{
+	if (p->layout_size == 0)
+		return;
+
+	chmc_interpret_one_decode_reg(p, 0,
+				      chmc_read_mcreg(p, CHMCTRL_DECODE1));
+	chmc_interpret_one_decode_reg(p, 1,
+				      chmc_read_mcreg(p, CHMCTRL_DECODE2));
+	chmc_interpret_one_decode_reg(p, 2,
+				      chmc_read_mcreg(p, CHMCTRL_DECODE3));
+	chmc_interpret_one_decode_reg(p, 3,
+				      chmc_read_mcreg(p, CHMCTRL_DECODE4));
+}
+
+static int chmc_probe(struct platform_device *op)
+{
+	struct device_node *dp = op->dev.of_node;
+	unsigned long ver;
+	const void *pval;
+	int len, portid;
+	struct chmc *p;
+	int err;
+
+	err = -ENODEV;
+	__asm__ ("rdpr %%ver, %0" : "=r" (ver));
+	if ((ver >> 32UL) == __JALAPENO_ID ||
+	    (ver >> 32UL) == __SERRANO_ID)
+		goto out;
+
+	portid = of_getintprop_default(dp, "portid", -1);
+	if (portid == -1)
+		goto out;
+
+	pval = of_get_property(dp, "memory-layout", &len);
+	if (pval && len > sizeof(p->layout_prop)) {
+		printk(KERN_ERR PFX "Unexpected memory-layout property "
+		       "size %d.\n", len);
+		goto out;
+	}
+
+	err = -ENOMEM;
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		printk(KERN_ERR PFX "Could not allocate struct chmc.\n");
+		goto out;
+	}
+
+	p->portid = portid;
+	p->layout_size = len;
+	if (!pval)
+		p->layout_size = 0;
+	else
+		memcpy(&p->layout_prop, pval, len);
+
+	p->regs = of_ioremap(&op->resource[0], 0, 0x48, "chmc");
+	if (!p->regs) {
+		printk(KERN_ERR PFX "Could not map registers.\n");
+		goto out_free;
+	}
+
+	if (p->layout_size != 0UL) {
+		p->timing_control1 = chmc_read_mcreg(p, CHMCTRL_TCTRL1);
+		p->timing_control2 = chmc_read_mcreg(p, CHMCTRL_TCTRL2);
+		p->timing_control3 = chmc_read_mcreg(p, CHMCTRL_TCTRL3);
+		p->timing_control4 = chmc_read_mcreg(p, CHMCTRL_TCTRL4);
+		p->memaddr_control = chmc_read_mcreg(p, CHMCTRL_MACTRL);
+	}
+
+	chmc_fetch_decode_regs(p);
+
+	mc_list_add(&p->list);
+
+	printk(KERN_INFO PFX "UltraSPARC-III memory controller at %s [%s]\n",
+	       dp->full_name,
+	       (p->layout_size ? "ACTIVE" : "INACTIVE"));
+
+	dev_set_drvdata(&op->dev, p);
+
+	err = 0;
+
+out:
+	return err;
+
+out_free:
+	kfree(p);
+	goto out;
+}
+
+static int us3mc_probe(struct platform_device *op)
+{
+	if (mc_type == MC_TYPE_SAFARI)
+		return chmc_probe(op);
+	else if (mc_type == MC_TYPE_JBUS)
+		return jbusmc_probe(op);
+	return -ENODEV;
+}
+
+static void chmc_destroy(struct platform_device *op, struct chmc *p)
+{
+	list_del(&p->list);
+	of_iounmap(&op->resource[0], p->regs, 0x48);
+	kfree(p);
+}
+
+static void jbusmc_destroy(struct platform_device *op, struct jbusmc *p)
+{
+	mc_list_del(&p->list);
+	of_iounmap(&op->resource[0], p->regs, JBUSMC_REGS_SIZE);
+	kfree(p);
+}
+
+static int us3mc_remove(struct platform_device *op)
+{
+	void *p = dev_get_drvdata(&op->dev);
+
+	if (p) {
+		if (mc_type == MC_TYPE_SAFARI)
+			chmc_destroy(op, p);
+		else if (mc_type == MC_TYPE_JBUS)
+			jbusmc_destroy(op, p);
+	}
+	return 0;
+}
+
+static const struct of_device_id us3mc_match[] = {
+	{
+		.name = "memory-controller",
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, us3mc_match);
+
+static struct platform_driver us3mc_driver = {
+	.driver = {
+		.name = "us3mc",
+		.of_match_table = us3mc_match,
+	},
+	.probe		= us3mc_probe,
+	.remove		= us3mc_remove,
+};
+
+static inline bool us3mc_platform(void)
+{
+	if (tlb_type == cheetah || tlb_type == cheetah_plus)
+		return true;
+	return false;
+}
+
+static int __init us3mc_init(void)
+{
+	unsigned long ver;
+	int ret;
+
+	if (!us3mc_platform())
+		return -ENODEV;
+
+	__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+	if ((ver >> 32UL) == __JALAPENO_ID ||
+	    (ver >> 32UL) == __SERRANO_ID) {
+		mc_type = MC_TYPE_JBUS;
+		us3mc_dimm_printer = jbusmc_print_dimm;
+	} else {
+		mc_type = MC_TYPE_SAFARI;
+		us3mc_dimm_printer = chmc_print_dimm;
+	}
+
+	ret = register_dimm_printer(us3mc_dimm_printer);
+
+	if (!ret) {
+		ret = platform_driver_register(&us3mc_driver);
+		if (ret)
+			unregister_dimm_printer(us3mc_dimm_printer);
+	}
+	return ret;
+}
+
+static void __exit us3mc_cleanup(void)
+{
+	if (us3mc_platform()) {
+		unregister_dimm_printer(us3mc_dimm_printer);
+		platform_driver_unregister(&us3mc_driver);
+	}
+}
+
+module_init(us3mc_init);
+module_exit(us3mc_cleanup);
diff --git a/arch/sparc/kernel/compat_audit.c b/arch/sparc/kernel/compat_audit.c
new file mode 100644
index 0000000..10eeb4f
--- /dev/null
+++ b/arch/sparc/kernel/compat_audit.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#define __32bit_syscall_numbers__
+#include <asm/unistd.h>
+#include "kernel.h"
+
+unsigned int sparc32_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned int sparc32_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned int sparc32_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned int sparc32_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned int sparc32_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int sparc32_classify_syscall(unsigned int syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return 2;
+	case __NR_openat:
+		return 3;
+	case __NR_socketcall:
+		return 4;
+	case __NR_execve:
+		return 5;
+	default:
+		return 1;
+	}
+}
diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
new file mode 100644
index 0000000..4401dee
--- /dev/null
+++ b/arch/sparc/kernel/cpu.c
@@ -0,0 +1,556 @@
+// SPDX-License-Identifier: GPL-2.0
+/* cpu.c: Dinky routines to look for the kind of Sparc cpu
+ *        we are on.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/threads.h>
+
+#include <asm/spitfire.h>
+#include <asm/pgtable.h>
+#include <asm/oplib.h>
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/head.h>
+#include <asm/psr.h>
+#include <asm/mbus.h>
+#include <asm/cpudata.h>
+
+#include "kernel.h"
+#include "entry.h"
+
+DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
+EXPORT_PER_CPU_SYMBOL(__cpu_data);
+
+int ncpus_probed;
+unsigned int fsr_storage;
+
+struct cpu_info {
+	int psr_vers;
+	const char *name;
+	const char *pmu_name;
+};
+
+struct fpu_info {
+	int fp_vers;
+	const char *name;
+};
+
+#define NOCPU 8
+#define NOFPU 8
+
+struct manufacturer_info {
+	int psr_impl;
+	struct cpu_info cpu_info[NOCPU];
+	struct fpu_info fpu_info[NOFPU];
+};
+
+#define CPU(ver, _name) \
+{ .psr_vers = ver, .name = _name }
+
+#define CPU_PMU(ver, _name, _pmu_name)	\
+{ .psr_vers = ver, .name = _name, .pmu_name = _pmu_name }
+
+#define FPU(ver, _name) \
+{ .fp_vers = ver, .name = _name }
+
+static const struct manufacturer_info __initconst manufacturer_info[] = {
+{
+	0,
+	/* Sun4/100, 4/200, SLC */
+	.cpu_info = {
+		CPU(0, "Fujitsu  MB86900/1A or LSI L64831 SparcKIT-40"),
+		/* borned STP1012PGA */
+		CPU(4,  "Fujitsu  MB86904"),
+		CPU(5, "Fujitsu TurboSparc MB86907"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(0, "Fujitsu MB86910 or Weitek WTL1164/5"),
+		FPU(1, "Fujitsu MB86911 or Weitek WTL1164/5 or LSI L64831"),
+		FPU(2, "LSI Logic L64802 or Texas Instruments ACT8847"),
+		/* SparcStation SLC, SparcStation1 */
+		FPU(3, "Weitek WTL3170/2"),
+		/* SPARCstation-5 */
+		FPU(4, "Lsi Logic/Meiko L64804 or compatible"),
+		FPU(-1, NULL)
+	}
+},{
+	1,
+	.cpu_info = {
+		/* SparcStation2, SparcServer 490 & 690 */
+		CPU(0, "LSI Logic Corporation - L64811"),
+		/* SparcStation2 */
+		CPU(1, "Cypress/ROSS CY7C601"),
+		/* Embedded controller */
+		CPU(3, "Cypress/ROSS CY7C611"),
+		/* Ross Technologies HyperSparc */
+		CPU(0xf, "ROSS HyperSparc RT620"),
+		CPU(0xe, "ROSS HyperSparc RT625 or RT626"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(0, "ROSS HyperSparc combined IU/FPU"),
+		FPU(1, "Lsi Logic L64814"),
+		FPU(2, "Texas Instruments TMS390-C602A"),
+		FPU(3, "Cypress CY7C602 FPU"),
+		FPU(-1, NULL)
+	}
+},{
+	2,
+	.cpu_info = {
+		/* ECL Implementation, CRAY S-MP Supercomputer... AIEEE! */
+		/* Someone please write the code to support this beast! ;) */
+		CPU(0, "Bipolar Integrated Technology - B5010"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(-1, NULL)
+	}
+},{
+	3,
+	.cpu_info = {
+		CPU(0, "LSI Logic Corporation - unknown-type"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(-1, NULL)
+	}
+},{
+	PSR_IMPL_TI,
+	.cpu_info = {
+		CPU(0, "Texas Instruments, Inc. - SuperSparc-(II)"),
+		/* SparcClassic  --  borned STP1010TAB-50*/
+		CPU(1, "Texas Instruments, Inc. - MicroSparc"),
+		CPU(2, "Texas Instruments, Inc. - MicroSparc II"),
+		CPU(3, "Texas Instruments, Inc. - SuperSparc 51"),
+		CPU(4, "Texas Instruments, Inc. - SuperSparc 61"),
+		CPU(5, "Texas Instruments, Inc. - unknown"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		/* SuperSparc 50 module */
+		FPU(0, "SuperSparc on-chip FPU"),
+		/* SparcClassic */
+		FPU(4, "TI MicroSparc on chip FPU"),
+		FPU(-1, NULL)
+	}
+},{
+	5,
+	.cpu_info = {
+		CPU(0, "Matsushita - MN10501"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(0, "Matsushita MN10501"),
+		FPU(-1, NULL)
+	}
+},{
+	6,
+	.cpu_info = {
+		CPU(0, "Philips Corporation - unknown"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(-1, NULL)
+	}
+},{
+	7,
+	.cpu_info = {
+		CPU(0, "Harvest VLSI Design Center, Inc. - unknown"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(-1, NULL)
+	}
+},{
+	8,
+	.cpu_info = {
+		CPU(0, "Systems and Processes Engineering Corporation (SPEC)"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(-1, NULL)
+	}
+},{
+	9,
+	.cpu_info = {
+		/* Gallium arsenide 200MHz, BOOOOGOOOOMIPS!!! */
+		CPU(0, "Fujitsu or Weitek Power-UP"),
+		CPU(1, "Fujitsu or Weitek Power-UP"),
+		CPU(2, "Fujitsu or Weitek Power-UP"),
+		CPU(3, "Fujitsu or Weitek Power-UP"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(3, "Fujitsu or Weitek on-chip FPU"),
+		FPU(-1, NULL)
+	}
+},{
+	PSR_IMPL_LEON,		/* Aeroflex Gaisler */
+	.cpu_info = {
+		CPU(3, "LEON"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(2, "GRFPU"),
+		FPU(3, "GRFPU-Lite"),
+		FPU(-1, NULL)
+	}
+},{
+	0x17,
+	.cpu_info = {
+		CPU_PMU(0x10, "TI UltraSparc I   (SpitFire)", "ultra12"),
+		CPU_PMU(0x11, "TI UltraSparc II  (BlackBird)", "ultra12"),
+		CPU_PMU(0x12, "TI UltraSparc IIi (Sabre)", "ultra12"),
+		CPU_PMU(0x13, "TI UltraSparc IIe (Hummingbird)", "ultra12"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(0x10, "UltraSparc I integrated FPU"),
+		FPU(0x11, "UltraSparc II integrated FPU"),
+		FPU(0x12, "UltraSparc IIi integrated FPU"),
+		FPU(0x13, "UltraSparc IIe integrated FPU"),
+		FPU(-1, NULL)
+	}
+},{
+	0x22,
+	.cpu_info = {
+		CPU_PMU(0x10, "TI UltraSparc I   (SpitFire)", "ultra12"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(0x10, "UltraSparc I integrated FPU"),
+		FPU(-1, NULL)
+	}
+},{
+	0x3e,
+	.cpu_info = {
+		CPU_PMU(0x14, "TI UltraSparc III (Cheetah)", "ultra3"),
+		CPU_PMU(0x15, "TI UltraSparc III+ (Cheetah+)", "ultra3+"),
+		CPU_PMU(0x16, "TI UltraSparc IIIi (Jalapeno)", "ultra3i"),
+		CPU_PMU(0x18, "TI UltraSparc IV (Jaguar)", "ultra3+"),
+		CPU_PMU(0x19, "TI UltraSparc IV+ (Panther)", "ultra4+"),
+		CPU_PMU(0x22, "TI UltraSparc IIIi+ (Serrano)", "ultra3i"),
+		CPU(-1, NULL)
+	},
+	.fpu_info = {
+		FPU(0x14, "UltraSparc III integrated FPU"),
+		FPU(0x15, "UltraSparc III+ integrated FPU"),
+		FPU(0x16, "UltraSparc IIIi integrated FPU"),
+		FPU(0x18, "UltraSparc IV integrated FPU"),
+		FPU(0x19, "UltraSparc IV+ integrated FPU"),
+		FPU(0x22, "UltraSparc IIIi+ integrated FPU"),
+		FPU(-1, NULL)
+	}
+}};
+
+/* In order to get the fpu type correct, you need to take the IDPROM's
+ * machine type value into consideration too.  I will fix this.
+ */
+
+static const char *sparc_cpu_type;
+static const char *sparc_fpu_type;
+const char *sparc_pmu_type;
+
+
+static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
+{
+	const struct manufacturer_info *manuf;
+	int i;
+
+	sparc_cpu_type = NULL;
+	sparc_fpu_type = NULL;
+	sparc_pmu_type = NULL;
+	manuf = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(manufacturer_info); i++)
+	{
+		if (psr_impl == manufacturer_info[i].psr_impl) {
+			manuf = &manufacturer_info[i];
+			break;
+		}
+	}
+	if (manuf != NULL)
+	{
+		const struct cpu_info *cpu;
+		const struct fpu_info *fpu;
+
+		cpu = &manuf->cpu_info[0];
+		while (cpu->psr_vers != -1)
+		{
+			if (cpu->psr_vers == psr_vers) {
+				sparc_cpu_type = cpu->name;
+				sparc_pmu_type = cpu->pmu_name;
+				sparc_fpu_type = "No FPU";
+				break;
+			}
+			cpu++;
+		}
+		fpu =  &manuf->fpu_info[0];
+		while (fpu->fp_vers != -1)
+		{
+			if (fpu->fp_vers == fpu_vers) {
+				sparc_fpu_type = fpu->name;
+				break;
+			}
+			fpu++;
+		}
+	}
+	if (sparc_cpu_type == NULL)
+	{
+		printk(KERN_ERR "CPU: Unknown chip, impl[0x%x] vers[0x%x]\n",
+		       psr_impl, psr_vers);
+		sparc_cpu_type = "Unknown CPU";
+	}
+	if (sparc_fpu_type == NULL)
+	{
+		printk(KERN_ERR "FPU: Unknown chip, impl[0x%x] vers[0x%x]\n",
+		       psr_impl, fpu_vers);
+		sparc_fpu_type = "Unknown FPU";
+	}
+	if (sparc_pmu_type == NULL)
+		sparc_pmu_type = "Unknown PMU";
+}
+
+#ifdef CONFIG_SPARC32
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+	seq_printf(m,
+		   "cpu\t\t: %s\n"
+		   "fpu\t\t: %s\n"
+		   "promlib\t\t: Version %d Revision %d\n"
+		   "prom\t\t: %d.%d\n"
+		   "type\t\t: %s\n"
+		   "ncpus probed\t: %d\n"
+		   "ncpus active\t: %d\n"
+#ifndef CONFIG_SMP
+		   "CPU0Bogo\t: %lu.%02lu\n"
+		   "CPU0ClkTck\t: %ld\n"
+#endif
+		   ,
+		   sparc_cpu_type,
+		   sparc_fpu_type ,
+		   romvec->pv_romvers,
+		   prom_rev,
+		   romvec->pv_printrev >> 16,
+		   romvec->pv_printrev & 0xffff,
+		   &cputypval[0],
+		   ncpus_probed,
+		   num_online_cpus()
+#ifndef CONFIG_SMP
+		   , cpu_data(0).udelay_val/(500000/HZ),
+		   (cpu_data(0).udelay_val/(5000/HZ)) % 100,
+		   cpu_data(0).clock_tick
+#endif
+		);
+
+#ifdef CONFIG_SMP
+	smp_bogo(m);
+#endif
+	mmu_info(m);
+#ifdef CONFIG_SMP
+	smp_info(m);
+#endif
+	return 0;
+}
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
+unsigned int dcache_parity_tl1_occurred;
+unsigned int icache_parity_tl1_occurred;
+
+
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+	seq_printf(m,
+		   "cpu\t\t: %s\n"
+		   "fpu\t\t: %s\n"
+		   "pmu\t\t: %s\n"
+		   "prom\t\t: %s\n"
+		   "type\t\t: %s\n"
+		   "ncpus probed\t: %d\n"
+		   "ncpus active\t: %d\n"
+		   "D$ parity tl1\t: %u\n"
+		   "I$ parity tl1\t: %u\n"
+#ifndef CONFIG_SMP
+		   "Cpu0ClkTck\t: %016lx\n"
+#endif
+		   ,
+		   sparc_cpu_type,
+		   sparc_fpu_type,
+		   sparc_pmu_type,
+		   prom_version,
+		   ((tlb_type == hypervisor) ?
+		    "sun4v" :
+		    "sun4u"),
+		   ncpus_probed,
+		   num_online_cpus(),
+		   dcache_parity_tl1_occurred,
+		   icache_parity_tl1_occurred
+#ifndef CONFIG_SMP
+		   , cpu_data(0).clock_tick
+#endif
+		);
+	cpucap_info(m);
+#ifdef CONFIG_SMP
+	smp_bogo(m);
+#endif
+	mmu_info(m);
+#ifdef CONFIG_SMP
+	smp_info(m);
+#endif
+	return 0;
+}
+#endif /* CONFIG_SPARC64 */
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	/* The pointer we are returning is arbitrary,
+	 * it just has to be non-NULL and not IS_ERR
+	 * in the success case.
+	 */
+	return *pos == 0 ? &c_start : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start =c_start,
+	.next =	c_next,
+	.stop =	c_stop,
+	.show =	show_cpuinfo,
+};
+
+#ifdef CONFIG_SPARC32
+static int __init cpu_type_probe(void)
+{
+	int psr_impl, psr_vers, fpu_vers;
+	int psr;
+
+	psr_impl = ((get_psr() >> PSR_IMPL_SHIFT) & PSR_IMPL_SHIFTED_MASK);
+	psr_vers = ((get_psr() >> PSR_VERS_SHIFT) & PSR_VERS_SHIFTED_MASK);
+
+	psr = get_psr();
+	put_psr(psr | PSR_EF);
+
+	if (psr_impl == PSR_IMPL_LEON)
+		fpu_vers = get_psr() & PSR_EF ? ((get_fsr() >> 17) & 0x7) : 7;
+	else
+		fpu_vers = ((get_fsr() >> 17) & 0x7);
+
+	put_psr(psr);
+
+	set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
+
+	return 0;
+}
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
+static void __init sun4v_cpu_probe(void)
+{
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_NIAGARA1:
+		sparc_cpu_type = "UltraSparc T1 (Niagara)";
+		sparc_fpu_type = "UltraSparc T1 integrated FPU";
+		sparc_pmu_type = "niagara";
+		break;
+
+	case SUN4V_CHIP_NIAGARA2:
+		sparc_cpu_type = "UltraSparc T2 (Niagara2)";
+		sparc_fpu_type = "UltraSparc T2 integrated FPU";
+		sparc_pmu_type = "niagara2";
+		break;
+
+	case SUN4V_CHIP_NIAGARA3:
+		sparc_cpu_type = "UltraSparc T3 (Niagara3)";
+		sparc_fpu_type = "UltraSparc T3 integrated FPU";
+		sparc_pmu_type = "niagara3";
+		break;
+
+	case SUN4V_CHIP_NIAGARA4:
+		sparc_cpu_type = "UltraSparc T4 (Niagara4)";
+		sparc_fpu_type = "UltraSparc T4 integrated FPU";
+		sparc_pmu_type = "niagara4";
+		break;
+
+	case SUN4V_CHIP_NIAGARA5:
+		sparc_cpu_type = "UltraSparc T5 (Niagara5)";
+		sparc_fpu_type = "UltraSparc T5 integrated FPU";
+		sparc_pmu_type = "niagara5";
+		break;
+
+	case SUN4V_CHIP_SPARC_M6:
+		sparc_cpu_type = "SPARC-M6";
+		sparc_fpu_type = "SPARC-M6 integrated FPU";
+		sparc_pmu_type = "sparc-m6";
+		break;
+
+	case SUN4V_CHIP_SPARC_M7:
+		sparc_cpu_type = "SPARC-M7";
+		sparc_fpu_type = "SPARC-M7 integrated FPU";
+		sparc_pmu_type = "sparc-m7";
+		break;
+
+	case SUN4V_CHIP_SPARC_M8:
+		sparc_cpu_type = "SPARC-M8";
+		sparc_fpu_type = "SPARC-M8 integrated FPU";
+		sparc_pmu_type = "sparc-m8";
+		break;
+
+	case SUN4V_CHIP_SPARC_SN:
+		sparc_cpu_type = "SPARC-SN";
+		sparc_fpu_type = "SPARC-SN integrated FPU";
+		sparc_pmu_type = "sparc-sn";
+		break;
+
+	case SUN4V_CHIP_SPARC64X:
+		sparc_cpu_type = "SPARC64-X";
+		sparc_fpu_type = "SPARC64-X integrated FPU";
+		sparc_pmu_type = "sparc64-x";
+		break;
+
+	default:
+		printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
+		       prom_cpu_compatible);
+		sparc_cpu_type = "Unknown SUN4V CPU";
+		sparc_fpu_type = "Unknown SUN4V FPU";
+		sparc_pmu_type = "Unknown SUN4V PMU";
+		break;
+	}
+}
+
+static int __init cpu_type_probe(void)
+{
+	if (tlb_type == hypervisor) {
+		sun4v_cpu_probe();
+	} else {
+		unsigned long ver;
+		int manuf, impl;
+
+		__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+
+		manuf = ((ver >> 48) & 0xffff);
+		impl = ((ver >> 32) & 0xffff);
+		set_cpu_and_fpu(manuf, impl, impl);
+	}
+	return 0;
+}
+#endif /* CONFIG_SPARC64 */
+
+early_initcall(cpu_type_probe);
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
new file mode 100644
index 0000000..d1d5282
--- /dev/null
+++ b/arch/sparc/kernel/cpumap.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+/* cpumap.c: used for optimizing CPU assignment
+ *
+ * Copyright (C) 2009 Hong H. Pham <hong.pham@windriver.com>
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <asm/cpudata.h>
+#include "cpumap.h"
+
+
+enum {
+	CPUINFO_LVL_ROOT = 0,
+	CPUINFO_LVL_NODE,
+	CPUINFO_LVL_CORE,
+	CPUINFO_LVL_PROC,
+	CPUINFO_LVL_MAX,
+};
+
+enum {
+	ROVER_NO_OP              = 0,
+	/* Increment rover every time level is visited */
+	ROVER_INC_ON_VISIT       = 1 << 0,
+	/* Increment parent's rover every time rover wraps around */
+	ROVER_INC_PARENT_ON_LOOP = 1 << 1,
+};
+
+struct cpuinfo_node {
+	int id;
+	int level;
+	int num_cpus;    /* Number of CPUs in this hierarchy */
+	int parent_index;
+	int child_start; /* Array index of the first child node */
+	int child_end;   /* Array index of the last child node */
+	int rover;       /* Child node iterator */
+};
+
+struct cpuinfo_level {
+	int start_index; /* Index of first node of a level in a cpuinfo tree */
+	int end_index;   /* Index of last node of a level in a cpuinfo tree */
+	int num_nodes;   /* Number of nodes in a level in a cpuinfo tree */
+};
+
+struct cpuinfo_tree {
+	int total_nodes;
+
+	/* Offsets into nodes[] for each level of the tree */
+	struct cpuinfo_level level[CPUINFO_LVL_MAX];
+	struct cpuinfo_node  nodes[0];
+};
+
+
+static struct cpuinfo_tree *cpuinfo_tree;
+
+static u16 cpu_distribution_map[NR_CPUS];
+static DEFINE_SPINLOCK(cpu_map_lock);
+
+
+/* Niagara optimized cpuinfo tree traversal. */
+static const int niagara_iterate_method[] = {
+	[CPUINFO_LVL_ROOT] = ROVER_NO_OP,
+
+	/* Strands (or virtual CPUs) within a core may not run concurrently
+	 * on the Niagara, as instruction pipeline(s) are shared.  Distribute
+	 * work to strands in different cores first for better concurrency.
+	 * Go to next NUMA node when all cores are used.
+	 */
+	[CPUINFO_LVL_NODE] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
+
+	/* Strands are grouped together by proc_id in cpuinfo_sparc, i.e.
+	 * a proc_id represents an instruction pipeline.  Distribute work to
+	 * strands in different proc_id groups if the core has multiple
+	 * instruction pipelines (e.g. the Niagara 2/2+ has two).
+	 */
+	[CPUINFO_LVL_CORE] = ROVER_INC_ON_VISIT,
+
+	/* Pick the next strand in the proc_id group. */
+	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT,
+};
+
+/* Generic cpuinfo tree traversal.  Distribute work round robin across NUMA
+ * nodes.
+ */
+static const int generic_iterate_method[] = {
+	[CPUINFO_LVL_ROOT] = ROVER_INC_ON_VISIT,
+	[CPUINFO_LVL_NODE] = ROVER_NO_OP,
+	[CPUINFO_LVL_CORE] = ROVER_INC_PARENT_ON_LOOP,
+	[CPUINFO_LVL_PROC] = ROVER_INC_ON_VISIT|ROVER_INC_PARENT_ON_LOOP,
+};
+
+
+static int cpuinfo_id(int cpu, int level)
+{
+	int id;
+
+	switch (level) {
+	case CPUINFO_LVL_ROOT:
+		id = 0;
+		break;
+	case CPUINFO_LVL_NODE:
+		id = cpu_to_node(cpu);
+		break;
+	case CPUINFO_LVL_CORE:
+		id = cpu_data(cpu).core_id;
+		break;
+	case CPUINFO_LVL_PROC:
+		id = cpu_data(cpu).proc_id;
+		break;
+	default:
+		id = -EINVAL;
+	}
+	return id;
+}
+
+/*
+ * Enumerate the CPU information in __cpu_data to determine the start index,
+ * end index, and number of nodes for each level in the cpuinfo tree.  The
+ * total number of cpuinfo nodes required to build the tree is returned.
+ */
+static int enumerate_cpuinfo_nodes(struct cpuinfo_level *tree_level)
+{
+	int prev_id[CPUINFO_LVL_MAX];
+	int i, n, num_nodes;
+
+	for (i = CPUINFO_LVL_ROOT; i < CPUINFO_LVL_MAX; i++) {
+		struct cpuinfo_level *lv = &tree_level[i];
+
+		prev_id[i] = -1;
+		lv->start_index = lv->end_index = lv->num_nodes = 0;
+	}
+
+	num_nodes = 1; /* Include the root node */
+
+	for (i = 0; i < num_possible_cpus(); i++) {
+		if (!cpu_online(i))
+			continue;
+
+		n = cpuinfo_id(i, CPUINFO_LVL_NODE);
+		if (n > prev_id[CPUINFO_LVL_NODE]) {
+			tree_level[CPUINFO_LVL_NODE].num_nodes++;
+			prev_id[CPUINFO_LVL_NODE] = n;
+			num_nodes++;
+		}
+		n = cpuinfo_id(i, CPUINFO_LVL_CORE);
+		if (n > prev_id[CPUINFO_LVL_CORE]) {
+			tree_level[CPUINFO_LVL_CORE].num_nodes++;
+			prev_id[CPUINFO_LVL_CORE] = n;
+			num_nodes++;
+		}
+		n = cpuinfo_id(i, CPUINFO_LVL_PROC);
+		if (n > prev_id[CPUINFO_LVL_PROC]) {
+			tree_level[CPUINFO_LVL_PROC].num_nodes++;
+			prev_id[CPUINFO_LVL_PROC] = n;
+			num_nodes++;
+		}
+	}
+
+	tree_level[CPUINFO_LVL_ROOT].num_nodes = 1;
+
+	n = tree_level[CPUINFO_LVL_NODE].num_nodes;
+	tree_level[CPUINFO_LVL_NODE].start_index = 1;
+	tree_level[CPUINFO_LVL_NODE].end_index   = n;
+
+	n++;
+	tree_level[CPUINFO_LVL_CORE].start_index = n;
+	n += tree_level[CPUINFO_LVL_CORE].num_nodes;
+	tree_level[CPUINFO_LVL_CORE].end_index   = n - 1;
+
+	tree_level[CPUINFO_LVL_PROC].start_index = n;
+	n += tree_level[CPUINFO_LVL_PROC].num_nodes;
+	tree_level[CPUINFO_LVL_PROC].end_index   = n - 1;
+
+	return num_nodes;
+}
+
+/* Build a tree representation of the CPU hierarchy using the per CPU
+ * information in __cpu_data.  Entries in __cpu_data[0..NR_CPUS] are
+ * assumed to be sorted in ascending order based on node, core_id, and
+ * proc_id (in order of significance).
+ */
+static struct cpuinfo_tree *build_cpuinfo_tree(void)
+{
+	struct cpuinfo_tree *new_tree;
+	struct cpuinfo_node *node;
+	struct cpuinfo_level tmp_level[CPUINFO_LVL_MAX];
+	int num_cpus[CPUINFO_LVL_MAX];
+	int level_rover[CPUINFO_LVL_MAX];
+	int prev_id[CPUINFO_LVL_MAX];
+	int n, id, cpu, prev_cpu, last_cpu, level;
+
+	n = enumerate_cpuinfo_nodes(tmp_level);
+
+	new_tree = kzalloc(sizeof(struct cpuinfo_tree) +
+	                   (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
+	if (!new_tree)
+		return NULL;
+
+	new_tree->total_nodes = n;
+	memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
+
+	prev_cpu = cpu = cpumask_first(cpu_online_mask);
+
+	/* Initialize all levels in the tree with the first CPU */
+	for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
+		n = new_tree->level[level].start_index;
+
+		level_rover[level] = n;
+		node = &new_tree->nodes[n];
+
+		id = cpuinfo_id(cpu, level);
+		if (unlikely(id < 0)) {
+			kfree(new_tree);
+			return NULL;
+		}
+		node->id = id;
+		node->level = level;
+		node->num_cpus = 1;
+
+		node->parent_index = (level > CPUINFO_LVL_ROOT)
+		    ? new_tree->level[level - 1].start_index : -1;
+
+		node->child_start = node->child_end = node->rover =
+		    (level == CPUINFO_LVL_PROC)
+		    ? cpu : new_tree->level[level + 1].start_index;
+
+		prev_id[level] = node->id;
+		num_cpus[level] = 1;
+	}
+
+	for (last_cpu = (num_possible_cpus() - 1); last_cpu >= 0; last_cpu--) {
+		if (cpu_online(last_cpu))
+			break;
+	}
+
+	while (++cpu <= last_cpu) {
+		if (!cpu_online(cpu))
+			continue;
+
+		for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT;
+		     level--) {
+			id = cpuinfo_id(cpu, level);
+			if (unlikely(id < 0)) {
+				kfree(new_tree);
+				return NULL;
+			}
+
+			if ((id != prev_id[level]) || (cpu == last_cpu)) {
+				prev_id[level] = id;
+				node = &new_tree->nodes[level_rover[level]];
+				node->num_cpus = num_cpus[level];
+				num_cpus[level] = 1;
+
+				if (cpu == last_cpu)
+					node->num_cpus++;
+
+				/* Connect tree node to parent */
+				if (level == CPUINFO_LVL_ROOT)
+					node->parent_index = -1;
+				else
+					node->parent_index =
+					    level_rover[level - 1];
+
+				if (level == CPUINFO_LVL_PROC) {
+					node->child_end =
+					    (cpu == last_cpu) ? cpu : prev_cpu;
+				} else {
+					node->child_end =
+					    level_rover[level + 1] - 1;
+				}
+
+				/* Initialize the next node in the same level */
+				n = ++level_rover[level];
+				if (n <= new_tree->level[level].end_index) {
+					node = &new_tree->nodes[n];
+					node->id = id;
+					node->level = level;
+
+					/* Connect node to child */
+					node->child_start = node->child_end =
+					node->rover =
+					    (level == CPUINFO_LVL_PROC)
+					    ? cpu : level_rover[level + 1];
+				}
+			} else
+				num_cpus[level]++;
+		}
+		prev_cpu = cpu;
+	}
+
+	return new_tree;
+}
+
+static void increment_rover(struct cpuinfo_tree *t, int node_index,
+                            int root_index, const int *rover_inc_table)
+{
+	struct cpuinfo_node *node = &t->nodes[node_index];
+	int top_level, level;
+
+	top_level = t->nodes[root_index].level;
+	for (level = node->level; level >= top_level; level--) {
+		node->rover++;
+		if (node->rover <= node->child_end)
+			return;
+
+		node->rover = node->child_start;
+		/* If parent's rover does not need to be adjusted, stop here. */
+		if ((level == top_level) ||
+		    !(rover_inc_table[level] & ROVER_INC_PARENT_ON_LOOP))
+			return;
+
+		node = &t->nodes[node->parent_index];
+	}
+}
+
+static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
+{
+	const int *rover_inc_table;
+	int level, new_index, index = root_index;
+
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_NIAGARA1:
+	case SUN4V_CHIP_NIAGARA2:
+	case SUN4V_CHIP_NIAGARA3:
+	case SUN4V_CHIP_NIAGARA4:
+	case SUN4V_CHIP_NIAGARA5:
+	case SUN4V_CHIP_SPARC_M6:
+	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
+	case SUN4V_CHIP_SPARC_SN:
+	case SUN4V_CHIP_SPARC64X:
+		rover_inc_table = niagara_iterate_method;
+		break;
+	default:
+		rover_inc_table = generic_iterate_method;
+	}
+
+	for (level = t->nodes[root_index].level; level < CPUINFO_LVL_MAX;
+	     level++) {
+		new_index = t->nodes[index].rover;
+		if (rover_inc_table[level] & ROVER_INC_ON_VISIT)
+			increment_rover(t, index, root_index, rover_inc_table);
+
+		index = new_index;
+	}
+	return index;
+}
+
+static void _cpu_map_rebuild(void)
+{
+	int i;
+
+	if (cpuinfo_tree) {
+		kfree(cpuinfo_tree);
+		cpuinfo_tree = NULL;
+	}
+
+	cpuinfo_tree = build_cpuinfo_tree();
+	if (!cpuinfo_tree)
+		return;
+
+	/* Build CPU distribution map that spans all online CPUs.  No need
+	 * to check if the CPU is online, as that is done when the cpuinfo
+	 * tree is being built.
+	 */
+	for (i = 0; i < cpuinfo_tree->nodes[0].num_cpus; i++)
+		cpu_distribution_map[i] = iterate_cpu(cpuinfo_tree, 0);
+}
+
+/* Fallback if the cpuinfo tree could not be built.  CPU mapping is linear
+ * round robin.
+ */
+static int simple_map_to_cpu(unsigned int index)
+{
+	int i, end, cpu_rover;
+
+	cpu_rover = 0;
+	end = index % num_online_cpus();
+	for (i = 0; i < num_possible_cpus(); i++) {
+		if (cpu_online(cpu_rover)) {
+			if (cpu_rover >= end)
+				return cpu_rover;
+
+			cpu_rover++;
+		}
+	}
+
+	/* Impossible, since num_online_cpus() <= num_possible_cpus() */
+	return cpumask_first(cpu_online_mask);
+}
+
+static int _map_to_cpu(unsigned int index)
+{
+	struct cpuinfo_node *root_node;
+
+	if (unlikely(!cpuinfo_tree)) {
+		_cpu_map_rebuild();
+		if (!cpuinfo_tree)
+			return simple_map_to_cpu(index);
+	}
+
+	root_node = &cpuinfo_tree->nodes[0];
+#ifdef CONFIG_HOTPLUG_CPU
+	if (unlikely(root_node->num_cpus != num_online_cpus())) {
+		_cpu_map_rebuild();
+		if (!cpuinfo_tree)
+			return simple_map_to_cpu(index);
+	}
+#endif
+	return cpu_distribution_map[index % root_node->num_cpus];
+}
+
+int map_to_cpu(unsigned int index)
+{
+	int mapped_cpu;
+	unsigned long flag;
+
+	spin_lock_irqsave(&cpu_map_lock, flag);
+	mapped_cpu = _map_to_cpu(index);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	while (unlikely(!cpu_online(mapped_cpu)))
+		mapped_cpu = _map_to_cpu(index);
+#endif
+	spin_unlock_irqrestore(&cpu_map_lock, flag);
+	return mapped_cpu;
+}
+EXPORT_SYMBOL(map_to_cpu);
+
+void cpu_map_rebuild(void)
+{
+	unsigned long flag;
+
+	spin_lock_irqsave(&cpu_map_lock, flag);
+	_cpu_map_rebuild();
+	spin_unlock_irqrestore(&cpu_map_lock, flag);
+}
diff --git a/arch/sparc/kernel/cpumap.h b/arch/sparc/kernel/cpumap.h
new file mode 100644
index 0000000..7d5b774
--- /dev/null
+++ b/arch/sparc/kernel/cpumap.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _CPUMAP_H
+#define _CPUMAP_H
+
+#ifdef CONFIG_SMP
+void cpu_map_rebuild(void);
+int map_to_cpu(unsigned int index);
+#define cpu_map_init() cpu_map_rebuild()
+#else
+#define cpu_map_init() do {} while (0)
+static inline int map_to_cpu(unsigned int index)
+{
+	return raw_smp_processor_id();
+}
+#endif
+
+#endif
diff --git a/arch/sparc/kernel/devices.c b/arch/sparc/kernel/devices.c
new file mode 100644
index 0000000..23b6e50
--- /dev/null
+++ b/arch/sparc/kernel/devices.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* devices.c: Initial scan of the prom device tree for important
+ *	      Sparc device nodes which we need to find.
+ *
+ * This is based on the sparc64 version, but sun4m doesn't always use
+ * the hardware MIDs, so be careful.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/kernel.h>
+#include <linux/threads.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <asm/page.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/cpudata.h>
+#include <asm/cpu_type.h>
+#include <asm/setup.h>
+
+#include "kernel.h"
+
+static char *cpu_mid_prop(void)
+{
+	if (sparc_cpu_model == sun4d)
+		return "cpu-id";
+	return "mid";
+}
+
+static int check_cpu_node(phandle nd, int *cur_inst,
+		int (*compare)(phandle, int, void *), void *compare_arg,
+		phandle *prom_node, int *mid)
+{
+	if (!compare(nd, *cur_inst, compare_arg)) {
+		if (prom_node)
+			*prom_node = nd;
+		if (mid) {
+			*mid = prom_getintdefault(nd, cpu_mid_prop(), 0);
+			if (sparc_cpu_model == sun4m)
+				*mid &= 3;
+		}
+		return 0;
+	}
+
+	(*cur_inst)++;
+
+	return -ENODEV;
+}
+
+static int __cpu_find_by(int (*compare)(phandle, int, void *),
+		void *compare_arg, phandle *prom_node, int *mid)
+{
+	struct device_node *dp;
+	int cur_inst;
+
+	cur_inst = 0;
+	for_each_node_by_type(dp, "cpu") {
+		int err = check_cpu_node(dp->phandle, &cur_inst,
+					 compare, compare_arg,
+					 prom_node, mid);
+		if (!err) {
+			of_node_put(dp);
+			return 0;
+		}
+	}
+
+	return -ENODEV;
+}
+
+static int cpu_instance_compare(phandle nd, int instance, void *_arg)
+{
+	int desired_instance = (int) _arg;
+
+	if (instance == desired_instance)
+		return 0;
+	return -ENODEV;
+}
+
+int cpu_find_by_instance(int instance, phandle *prom_node, int *mid)
+{
+	return __cpu_find_by(cpu_instance_compare, (void *)instance,
+			     prom_node, mid);
+}
+
+static int cpu_mid_compare(phandle nd, int instance, void *_arg)
+{
+	int desired_mid = (int) _arg;
+	int this_mid;
+
+	this_mid = prom_getintdefault(nd, cpu_mid_prop(), 0);
+	if (this_mid == desired_mid
+	    || (sparc_cpu_model == sun4m && (this_mid & 3) == desired_mid))
+		return 0;
+	return -ENODEV;
+}
+
+int cpu_find_by_mid(int mid, phandle *prom_node)
+{
+	return __cpu_find_by(cpu_mid_compare, (void *)mid,
+			     prom_node, NULL);
+}
+
+/* sun4m uses truncated mids since we base the cpuid on the ttable/irqset
+ * address (0-3).  This gives us the true hardware mid, which might have
+ * some other bits set.  On 4d hardware and software mids are the same.
+ */
+int cpu_get_hwmid(phandle prom_node)
+{
+	return prom_getintdefault(prom_node, cpu_mid_prop(), -ENODEV);
+}
+
+void __init device_scan(void)
+{
+	printk(KERN_NOTICE "Booting Linux...\n");
+
+#ifndef CONFIG_SMP
+	{
+		phandle cpu_node;
+		int err;
+		err = cpu_find_by_instance(0, &cpu_node, NULL);
+		if (err) {
+			/* Probably a sun4e, Sun is trying to trick us ;-) */
+			prom_printf("No cpu nodes, cannot continue\n");
+			prom_halt();
+		}
+		cpu_data(0).clock_tick = prom_getintdefault(cpu_node,
+							    "clock-frequency",
+							    0);
+	}
+#endif /* !CONFIG_SMP */
+
+	auxio_probe();
+	auxio_power_probe();
+}
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
new file mode 100644
index 0000000..f87265a
--- /dev/null
+++ b/arch/sparc/kernel/ds.c
@@ -0,0 +1,1274 @@
+/* ds.c: Domain Services driver for Logical Domains
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
+#include <linux/kthread.h>
+#include <linux/reboot.h>
+#include <linux/cpu.h>
+
+#include <asm/hypervisor.h>
+#include <asm/ldc.h>
+#include <asm/vio.h>
+#include <asm/mdesc.h>
+#include <asm/head.h>
+#include <asm/irq.h>
+
+#include "kernel.h"
+
+#define DRV_MODULE_NAME		"ds"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"1.0"
+#define DRV_MODULE_RELDATE	"Jul 11, 2007"
+
+static char version[] =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_DESCRIPTION("Sun LDOM domain services driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+struct ds_msg_tag {
+	__u32			type;
+#define DS_INIT_REQ		0x00
+#define DS_INIT_ACK		0x01
+#define DS_INIT_NACK		0x02
+#define DS_REG_REQ		0x03
+#define DS_REG_ACK		0x04
+#define DS_REG_NACK		0x05
+#define DS_UNREG_REQ		0x06
+#define DS_UNREG_ACK		0x07
+#define DS_UNREG_NACK		0x08
+#define DS_DATA			0x09
+#define DS_NACK			0x0a
+
+	__u32			len;
+};
+
+/* Result codes */
+#define DS_OK			0x00
+#define DS_REG_VER_NACK		0x01
+#define DS_REG_DUP		0x02
+#define DS_INV_HDL		0x03
+#define DS_TYPE_UNKNOWN		0x04
+
+struct ds_version {
+	__u16			major;
+	__u16			minor;
+};
+
+struct ds_ver_req {
+	struct ds_msg_tag	tag;
+	struct ds_version	ver;
+};
+
+struct ds_ver_ack {
+	struct ds_msg_tag	tag;
+	__u16			minor;
+};
+
+struct ds_ver_nack {
+	struct ds_msg_tag	tag;
+	__u16			major;
+};
+
+struct ds_reg_req {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+	__u16			major;
+	__u16			minor;
+	char			svc_id[0];
+};
+
+struct ds_reg_ack {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+	__u16			minor;
+};
+
+struct ds_reg_nack {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+	__u16			major;
+};
+
+struct ds_unreg_req {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+};
+
+struct ds_unreg_ack {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+};
+
+struct ds_unreg_nack {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+};
+
+struct ds_data {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+};
+
+struct ds_data_nack {
+	struct ds_msg_tag	tag;
+	__u64			handle;
+	__u64			result;
+};
+
+struct ds_info;
+struct ds_cap_state {
+	__u64			handle;
+
+	void			(*data)(struct ds_info *dp,
+					struct ds_cap_state *cp,
+					void *buf, int len);
+
+	const char		*service_id;
+
+	u8			state;
+#define CAP_STATE_UNKNOWN	0x00
+#define CAP_STATE_REG_SENT	0x01
+#define CAP_STATE_REGISTERED	0x02
+};
+
+static void md_update_data(struct ds_info *dp, struct ds_cap_state *cp,
+			   void *buf, int len);
+static void domain_shutdown_data(struct ds_info *dp,
+				 struct ds_cap_state *cp,
+				 void *buf, int len);
+static void domain_panic_data(struct ds_info *dp,
+			      struct ds_cap_state *cp,
+			      void *buf, int len);
+#ifdef CONFIG_HOTPLUG_CPU
+static void dr_cpu_data(struct ds_info *dp,
+			struct ds_cap_state *cp,
+			void *buf, int len);
+#endif
+static void ds_pri_data(struct ds_info *dp,
+			struct ds_cap_state *cp,
+			void *buf, int len);
+static void ds_var_data(struct ds_info *dp,
+			struct ds_cap_state *cp,
+			void *buf, int len);
+
+static struct ds_cap_state ds_states_template[] = {
+	{
+		.service_id	= "md-update",
+		.data		= md_update_data,
+	},
+	{
+		.service_id	= "domain-shutdown",
+		.data		= domain_shutdown_data,
+	},
+	{
+		.service_id	= "domain-panic",
+		.data		= domain_panic_data,
+	},
+#ifdef CONFIG_HOTPLUG_CPU
+	{
+		.service_id	= "dr-cpu",
+		.data		= dr_cpu_data,
+	},
+#endif
+	{
+		.service_id	= "pri",
+		.data		= ds_pri_data,
+	},
+	{
+		.service_id	= "var-config",
+		.data		= ds_var_data,
+	},
+	{
+		.service_id	= "var-config-backup",
+		.data		= ds_var_data,
+	},
+};
+
+static DEFINE_SPINLOCK(ds_lock);
+
+struct ds_info {
+	struct ldc_channel	*lp;
+	u8			hs_state;
+#define DS_HS_START		0x01
+#define DS_HS_DONE		0x02
+
+	u64			id;
+
+	void			*rcv_buf;
+	int			rcv_buf_len;
+
+	struct ds_cap_state	*ds_states;
+	int			num_ds_states;
+
+	struct ds_info		*next;
+};
+
+static struct ds_info *ds_info_list;
+
+static struct ds_cap_state *find_cap(struct ds_info *dp, u64 handle)
+{
+	unsigned int index = handle >> 32;
+
+	if (index >= dp->num_ds_states)
+		return NULL;
+	return &dp->ds_states[index];
+}
+
+static struct ds_cap_state *find_cap_by_string(struct ds_info *dp,
+					       const char *name)
+{
+	int i;
+
+	for (i = 0; i < dp->num_ds_states; i++) {
+		if (strcmp(dp->ds_states[i].service_id, name))
+			continue;
+
+		return &dp->ds_states[i];
+	}
+	return NULL;
+}
+
+static int __ds_send(struct ldc_channel *lp, void *data, int len)
+{
+	int err, limit = 1000;
+
+	err = -EINVAL;
+	while (limit-- > 0) {
+		err = ldc_write(lp, data, len);
+		if (!err || (err != -EAGAIN))
+			break;
+		udelay(1);
+	}
+
+	return err;
+}
+
+static int ds_send(struct ldc_channel *lp, void *data, int len)
+{
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&ds_lock, flags);
+	err = __ds_send(lp, data, len);
+	spin_unlock_irqrestore(&ds_lock, flags);
+
+	return err;
+}
+
+struct ds_md_update_req {
+	__u64				req_num;
+};
+
+struct ds_md_update_res {
+	__u64				req_num;
+	__u32				result;
+};
+
+static void md_update_data(struct ds_info *dp,
+			   struct ds_cap_state *cp,
+			   void *buf, int len)
+{
+	struct ldc_channel *lp = dp->lp;
+	struct ds_data *dpkt = buf;
+	struct ds_md_update_req *rp;
+	struct {
+		struct ds_data		data;
+		struct ds_md_update_res	res;
+	} pkt;
+
+	rp = (struct ds_md_update_req *) (dpkt + 1);
+
+	printk(KERN_INFO "ds-%llu: Machine description update.\n", dp->id);
+
+	mdesc_update();
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.data.tag.type = DS_DATA;
+	pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+	pkt.data.handle = cp->handle;
+	pkt.res.req_num = rp->req_num;
+	pkt.res.result = DS_OK;
+
+	ds_send(lp, &pkt, sizeof(pkt));
+}
+
+struct ds_shutdown_req {
+	__u64				req_num;
+	__u32				ms_delay;
+};
+
+struct ds_shutdown_res {
+	__u64				req_num;
+	__u32				result;
+	char				reason[1];
+};
+
+static void domain_shutdown_data(struct ds_info *dp,
+				 struct ds_cap_state *cp,
+				 void *buf, int len)
+{
+	struct ldc_channel *lp = dp->lp;
+	struct ds_data *dpkt = buf;
+	struct ds_shutdown_req *rp;
+	struct {
+		struct ds_data		data;
+		struct ds_shutdown_res	res;
+	} pkt;
+
+	rp = (struct ds_shutdown_req *) (dpkt + 1);
+
+	printk(KERN_ALERT "ds-%llu: Shutdown request from "
+	       "LDOM manager received.\n", dp->id);
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.data.tag.type = DS_DATA;
+	pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+	pkt.data.handle = cp->handle;
+	pkt.res.req_num = rp->req_num;
+	pkt.res.result = DS_OK;
+	pkt.res.reason[0] = 0;
+
+	ds_send(lp, &pkt, sizeof(pkt));
+
+	orderly_poweroff(true);
+}
+
+struct ds_panic_req {
+	__u64				req_num;
+};
+
+struct ds_panic_res {
+	__u64				req_num;
+	__u32				result;
+	char				reason[1];
+};
+
+static void domain_panic_data(struct ds_info *dp,
+			      struct ds_cap_state *cp,
+			      void *buf, int len)
+{
+	struct ldc_channel *lp = dp->lp;
+	struct ds_data *dpkt = buf;
+	struct ds_panic_req *rp;
+	struct {
+		struct ds_data		data;
+		struct ds_panic_res	res;
+	} pkt;
+
+	rp = (struct ds_panic_req *) (dpkt + 1);
+
+	printk(KERN_ALERT "ds-%llu: Panic request from "
+	       "LDOM manager received.\n", dp->id);
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.data.tag.type = DS_DATA;
+	pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
+	pkt.data.handle = cp->handle;
+	pkt.res.req_num = rp->req_num;
+	pkt.res.result = DS_OK;
+	pkt.res.reason[0] = 0;
+
+	ds_send(lp, &pkt, sizeof(pkt));
+
+	panic("PANIC requested by LDOM manager.");
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+struct dr_cpu_tag {
+	__u64				req_num;
+	__u32				type;
+#define DR_CPU_CONFIGURE		0x43
+#define DR_CPU_UNCONFIGURE		0x55
+#define DR_CPU_FORCE_UNCONFIGURE	0x46
+#define DR_CPU_STATUS			0x53
+
+/* Responses */
+#define DR_CPU_OK			0x6f
+#define DR_CPU_ERROR			0x65
+
+	__u32				num_records;
+};
+
+struct dr_cpu_resp_entry {
+	__u32				cpu;
+	__u32				result;
+#define DR_CPU_RES_OK			0x00
+#define DR_CPU_RES_FAILURE		0x01
+#define DR_CPU_RES_BLOCKED		0x02
+#define DR_CPU_RES_CPU_NOT_RESPONDING	0x03
+#define DR_CPU_RES_NOT_IN_MD		0x04
+
+	__u32				stat;
+#define DR_CPU_STAT_NOT_PRESENT		0x00
+#define DR_CPU_STAT_UNCONFIGURED	0x01
+#define DR_CPU_STAT_CONFIGURED		0x02
+
+	__u32				str_off;
+};
+
+static void __dr_cpu_send_error(struct ds_info *dp,
+				struct ds_cap_state *cp,
+				struct ds_data *data)
+{
+	struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+	struct {
+		struct ds_data		data;
+		struct dr_cpu_tag	tag;
+	} pkt;
+	int msg_len;
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.data.tag.type = DS_DATA;
+	pkt.data.handle = cp->handle;
+	pkt.tag.req_num = tag->req_num;
+	pkt.tag.type = DR_CPU_ERROR;
+	pkt.tag.num_records = 0;
+
+	msg_len = (sizeof(struct ds_data) +
+		   sizeof(struct dr_cpu_tag));
+
+	pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+	__ds_send(dp->lp, &pkt, msg_len);
+}
+
+static void dr_cpu_send_error(struct ds_info *dp,
+			      struct ds_cap_state *cp,
+			      struct ds_data *data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ds_lock, flags);
+	__dr_cpu_send_error(dp, cp, data);
+	spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+#define CPU_SENTINEL	0xffffffff
+
+static void purge_dups(u32 *list, u32 num_ents)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_ents; i++) {
+		u32 cpu = list[i];
+		unsigned int j;
+
+		if (cpu == CPU_SENTINEL)
+			continue;
+
+		for (j = i + 1; j < num_ents; j++) {
+			if (list[j] == cpu)
+				list[j] = CPU_SENTINEL;
+		}
+	}
+}
+
+static int dr_cpu_size_response(int ncpus)
+{
+	return (sizeof(struct ds_data) +
+		sizeof(struct dr_cpu_tag) +
+		(sizeof(struct dr_cpu_resp_entry) * ncpus));
+}
+
+static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
+				 u64 handle, int resp_len, int ncpus,
+				 cpumask_t *mask, u32 default_stat)
+{
+	struct dr_cpu_resp_entry *ent;
+	struct dr_cpu_tag *tag;
+	int i, cpu;
+
+	tag = (struct dr_cpu_tag *) (resp + 1);
+	ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+	resp->tag.type = DS_DATA;
+	resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
+	resp->handle = handle;
+	tag->req_num = req_num;
+	tag->type = DR_CPU_OK;
+	tag->num_records = ncpus;
+
+	i = 0;
+	for_each_cpu(cpu, mask) {
+		ent[i].cpu = cpu;
+		ent[i].result = DR_CPU_RES_OK;
+		ent[i].stat = default_stat;
+		i++;
+	}
+	BUG_ON(i != ncpus);
+}
+
+static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
+			u32 res, u32 stat)
+{
+	struct dr_cpu_resp_entry *ent;
+	struct dr_cpu_tag *tag;
+	int i;
+
+	tag = (struct dr_cpu_tag *) (resp + 1);
+	ent = (struct dr_cpu_resp_entry *) (tag + 1);
+
+	for (i = 0; i < ncpus; i++) {
+		if (ent[i].cpu != cpu)
+			continue;
+		ent[i].result = res;
+		ent[i].stat = stat;
+		break;
+	}
+}
+
+static int dr_cpu_configure(struct ds_info *dp, struct ds_cap_state *cp,
+			    u64 req_num, cpumask_t *mask)
+{
+	struct ds_data *resp;
+	int resp_len, ncpus, cpu;
+	unsigned long flags;
+
+	ncpus = cpumask_weight(mask);
+	resp_len = dr_cpu_size_response(ncpus);
+	resp = kzalloc(resp_len, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	dr_cpu_init_response(resp, req_num, cp->handle,
+			     resp_len, ncpus, mask,
+			     DR_CPU_STAT_CONFIGURED);
+
+	mdesc_populate_present_mask(mask);
+	mdesc_fill_in_cpu_data(mask);
+
+	for_each_cpu(cpu, mask) {
+		int err;
+
+		printk(KERN_INFO "ds-%llu: Starting cpu %d...\n",
+		       dp->id, cpu);
+		err = cpu_up(cpu);
+		if (err) {
+			__u32 res = DR_CPU_RES_FAILURE;
+			__u32 stat = DR_CPU_STAT_UNCONFIGURED;
+
+			if (!cpu_present(cpu)) {
+				/* CPU not present in MD */
+				res = DR_CPU_RES_NOT_IN_MD;
+				stat = DR_CPU_STAT_NOT_PRESENT;
+			} else if (err == -ENODEV) {
+				/* CPU did not call in successfully */
+				res = DR_CPU_RES_CPU_NOT_RESPONDING;
+			}
+
+			printk(KERN_INFO "ds-%llu: CPU startup failed err=%d\n",
+			       dp->id, err);
+			dr_cpu_mark(resp, cpu, ncpus, res, stat);
+		}
+	}
+
+	spin_lock_irqsave(&ds_lock, flags);
+	__ds_send(dp->lp, resp, resp_len);
+	spin_unlock_irqrestore(&ds_lock, flags);
+
+	kfree(resp);
+
+	/* Redistribute IRQs, taking into account the new cpus.  */
+	fixup_irqs();
+
+	return 0;
+}
+
+static int dr_cpu_unconfigure(struct ds_info *dp,
+			      struct ds_cap_state *cp,
+			      u64 req_num,
+			      cpumask_t *mask)
+{
+	struct ds_data *resp;
+	int resp_len, ncpus, cpu;
+	unsigned long flags;
+
+	ncpus = cpumask_weight(mask);
+	resp_len = dr_cpu_size_response(ncpus);
+	resp = kzalloc(resp_len, GFP_KERNEL);
+	if (!resp)
+		return -ENOMEM;
+
+	dr_cpu_init_response(resp, req_num, cp->handle,
+			     resp_len, ncpus, mask,
+			     DR_CPU_STAT_UNCONFIGURED);
+
+	for_each_cpu(cpu, mask) {
+		int err;
+
+		printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n",
+		       dp->id, cpu);
+		err = cpu_down(cpu);
+		if (err)
+			dr_cpu_mark(resp, cpu, ncpus,
+				    DR_CPU_RES_FAILURE,
+				    DR_CPU_STAT_CONFIGURED);
+	}
+
+	spin_lock_irqsave(&ds_lock, flags);
+	__ds_send(dp->lp, resp, resp_len);
+	spin_unlock_irqrestore(&ds_lock, flags);
+
+	kfree(resp);
+
+	return 0;
+}
+
+static void dr_cpu_data(struct ds_info *dp, struct ds_cap_state *cp, void *buf,
+			int len)
+{
+	struct ds_data *data = buf;
+	struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+	u32 *cpu_list = (u32 *) (tag + 1);
+	u64 req_num = tag->req_num;
+	cpumask_t mask;
+	unsigned int i;
+	int err;
+
+	switch (tag->type) {
+	case DR_CPU_CONFIGURE:
+	case DR_CPU_UNCONFIGURE:
+	case DR_CPU_FORCE_UNCONFIGURE:
+		break;
+
+	default:
+		dr_cpu_send_error(dp, cp, data);
+		return;
+	}
+
+	purge_dups(cpu_list, tag->num_records);
+
+	cpumask_clear(&mask);
+	for (i = 0; i < tag->num_records; i++) {
+		if (cpu_list[i] == CPU_SENTINEL)
+			continue;
+
+		if (cpu_list[i] < nr_cpu_ids)
+			cpumask_set_cpu(cpu_list[i], &mask);
+	}
+
+	if (tag->type == DR_CPU_CONFIGURE)
+		err = dr_cpu_configure(dp, cp, req_num, &mask);
+	else
+		err = dr_cpu_unconfigure(dp, cp, req_num, &mask);
+
+	if (err)
+		dr_cpu_send_error(dp, cp, data);
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+struct ds_pri_msg {
+	__u64				req_num;
+	__u64				type;
+#define DS_PRI_REQUEST			0x00
+#define DS_PRI_DATA			0x01
+#define DS_PRI_UPDATE			0x02
+};
+
+static void ds_pri_data(struct ds_info *dp,
+			struct ds_cap_state *cp,
+			void *buf, int len)
+{
+	struct ds_data *dpkt = buf;
+	struct ds_pri_msg *rp;
+
+	rp = (struct ds_pri_msg *) (dpkt + 1);
+
+	printk(KERN_INFO "ds-%llu: PRI REQ [%llx:%llx], len=%d\n",
+	       dp->id, rp->req_num, rp->type, len);
+}
+
+struct ds_var_hdr {
+	__u32				type;
+#define DS_VAR_SET_REQ			0x00
+#define DS_VAR_DELETE_REQ		0x01
+#define DS_VAR_SET_RESP			0x02
+#define DS_VAR_DELETE_RESP		0x03
+};
+
+struct ds_var_set_msg {
+	struct ds_var_hdr		hdr;
+	char				name_and_value[0];
+};
+
+struct ds_var_delete_msg {
+	struct ds_var_hdr		hdr;
+	char				name[0];
+};
+
+struct ds_var_resp {
+	struct ds_var_hdr		hdr;
+	__u32				result;
+#define DS_VAR_SUCCESS			0x00
+#define DS_VAR_NO_SPACE			0x01
+#define DS_VAR_INVALID_VAR		0x02
+#define DS_VAR_INVALID_VAL		0x03
+#define DS_VAR_NOT_PRESENT		0x04
+};
+
+static DEFINE_MUTEX(ds_var_mutex);
+static int ds_var_doorbell;
+static int ds_var_response;
+
+static void ds_var_data(struct ds_info *dp,
+			struct ds_cap_state *cp,
+			void *buf, int len)
+{
+	struct ds_data *dpkt = buf;
+	struct ds_var_resp *rp;
+
+	rp = (struct ds_var_resp *) (dpkt + 1);
+
+	if (rp->hdr.type != DS_VAR_SET_RESP &&
+	    rp->hdr.type != DS_VAR_DELETE_RESP)
+		return;
+
+	ds_var_response = rp->result;
+	wmb();
+	ds_var_doorbell = 1;
+}
+
+void ldom_set_var(const char *var, const char *value)
+{
+	struct ds_cap_state *cp;
+	struct ds_info *dp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ds_lock, flags);
+	cp = NULL;
+	for (dp = ds_info_list; dp; dp = dp->next) {
+		struct ds_cap_state *tmp;
+
+		tmp = find_cap_by_string(dp, "var-config");
+		if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+			cp = tmp;
+			break;
+		}
+	}
+	if (!cp) {
+		for (dp = ds_info_list; dp; dp = dp->next) {
+			struct ds_cap_state *tmp;
+
+			tmp = find_cap_by_string(dp, "var-config-backup");
+			if (tmp && tmp->state == CAP_STATE_REGISTERED) {
+				cp = tmp;
+				break;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&ds_lock, flags);
+
+	if (cp) {
+		union {
+			struct {
+				struct ds_data		data;
+				struct ds_var_set_msg	msg;
+			} header;
+			char			all[512];
+		} pkt;
+		char  *base, *p;
+		int msg_len, loops;
+
+		if (strlen(var) + strlen(value) + 2 >
+		    sizeof(pkt) - sizeof(pkt.header)) {
+			printk(KERN_ERR PFX
+				"contents length: %zu, which more than max: %lu,"
+				"so could not set (%s) variable to (%s).\n",
+				strlen(var) + strlen(value) + 2,
+				sizeof(pkt) - sizeof(pkt.header), var, value);
+			return;
+		}
+
+		memset(&pkt, 0, sizeof(pkt));
+		pkt.header.data.tag.type = DS_DATA;
+		pkt.header.data.handle = cp->handle;
+		pkt.header.msg.hdr.type = DS_VAR_SET_REQ;
+		base = p = &pkt.header.msg.name_and_value[0];
+		strcpy(p, var);
+		p += strlen(var) + 1;
+		strcpy(p, value);
+		p += strlen(value) + 1;
+
+		msg_len = (sizeof(struct ds_data) +
+			   sizeof(struct ds_var_set_msg) +
+			   (p - base));
+		msg_len = (msg_len + 3) & ~3;
+		pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
+
+		mutex_lock(&ds_var_mutex);
+
+		spin_lock_irqsave(&ds_lock, flags);
+		ds_var_doorbell = 0;
+		ds_var_response = -1;
+
+		__ds_send(dp->lp, &pkt, msg_len);
+		spin_unlock_irqrestore(&ds_lock, flags);
+
+		loops = 1000;
+		while (ds_var_doorbell == 0) {
+			if (loops-- < 0)
+				break;
+			barrier();
+			udelay(100);
+		}
+
+		mutex_unlock(&ds_var_mutex);
+
+		if (ds_var_doorbell == 0 ||
+		    ds_var_response != DS_VAR_SUCCESS)
+			printk(KERN_ERR "ds-%llu: var-config [%s:%s] "
+			       "failed, response(%d).\n",
+			       dp->id, var, value,
+			       ds_var_response);
+	} else {
+		printk(KERN_ERR PFX "var-config not registered so "
+		       "could not set (%s) variable to (%s).\n",
+		       var, value);
+	}
+}
+
+static char full_boot_str[256] __attribute__((aligned(32)));
+static int reboot_data_supported;
+
+void ldom_reboot(const char *boot_command)
+{
+	/* Don't bother with any of this if the boot_command
+	 * is empty.
+	 */
+	if (boot_command && strlen(boot_command)) {
+		unsigned long len;
+
+		snprintf(full_boot_str, sizeof(full_boot_str), "boot %s",
+			 boot_command);
+		len = strlen(full_boot_str);
+
+		if (reboot_data_supported) {
+			unsigned long ra = kimage_addr_to_ra(full_boot_str);
+			unsigned long hv_ret;
+
+			hv_ret = sun4v_reboot_data_set(ra, len);
+			if (hv_ret != HV_EOK)
+				pr_err("SUN4V: Unable to set reboot data "
+				       "hv_ret=%lu\n", hv_ret);
+		} else {
+			ldom_set_var("reboot-command", full_boot_str);
+		}
+	}
+	sun4v_mach_sir();
+}
+
+void ldom_power_off(void)
+{
+	sun4v_mach_exit(0);
+}
+
+static void ds_conn_reset(struct ds_info *dp)
+{
+	printk(KERN_ERR "ds-%llu: ds_conn_reset() from %pf\n",
+	       dp->id, __builtin_return_address(0));
+}
+
+static int register_services(struct ds_info *dp)
+{
+	struct ldc_channel *lp = dp->lp;
+	int i;
+
+	for (i = 0; i < dp->num_ds_states; i++) {
+		struct {
+			struct ds_reg_req req;
+			u8 id_buf[256];
+		} pbuf;
+		struct ds_cap_state *cp = &dp->ds_states[i];
+		int err, msg_len;
+		u64 new_count;
+
+		if (cp->state == CAP_STATE_REGISTERED)
+			continue;
+
+		new_count = sched_clock() & 0xffffffff;
+		cp->handle = ((u64) i << 32) | new_count;
+
+		msg_len = (sizeof(struct ds_reg_req) +
+			   strlen(cp->service_id));
+
+		memset(&pbuf, 0, sizeof(pbuf));
+		pbuf.req.tag.type = DS_REG_REQ;
+		pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag));
+		pbuf.req.handle = cp->handle;
+		pbuf.req.major = 1;
+		pbuf.req.minor = 0;
+		strcpy(pbuf.id_buf, cp->service_id);
+
+		err = __ds_send(lp, &pbuf, msg_len);
+		if (err > 0)
+			cp->state = CAP_STATE_REG_SENT;
+	}
+	return 0;
+}
+
+static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
+{
+
+	if (dp->hs_state == DS_HS_START) {
+		if (pkt->type != DS_INIT_ACK)
+			goto conn_reset;
+
+		dp->hs_state = DS_HS_DONE;
+
+		return register_services(dp);
+	}
+
+	if (dp->hs_state != DS_HS_DONE)
+		goto conn_reset;
+
+	if (pkt->type == DS_REG_ACK) {
+		struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt;
+		struct ds_cap_state *cp = find_cap(dp, ap->handle);
+
+		if (!cp) {
+			printk(KERN_ERR "ds-%llu: REG ACK for unknown "
+			       "handle %llx\n", dp->id, ap->handle);
+			return 0;
+		}
+		printk(KERN_INFO "ds-%llu: Registered %s service.\n",
+		       dp->id, cp->service_id);
+		cp->state = CAP_STATE_REGISTERED;
+	} else if (pkt->type == DS_REG_NACK) {
+		struct ds_reg_nack *np = (struct ds_reg_nack *) pkt;
+		struct ds_cap_state *cp = find_cap(dp, np->handle);
+
+		if (!cp) {
+			printk(KERN_ERR "ds-%llu: REG NACK for "
+			       "unknown handle %llx\n",
+			       dp->id, np->handle);
+			return 0;
+		}
+		cp->state = CAP_STATE_UNKNOWN;
+	}
+
+	return 0;
+
+conn_reset:
+	ds_conn_reset(dp);
+	return -ECONNRESET;
+}
+
+static void __send_ds_nack(struct ds_info *dp, u64 handle)
+{
+	struct ds_data_nack nack = {
+		.tag = {
+			.type = DS_NACK,
+			.len = (sizeof(struct ds_data_nack) -
+				sizeof(struct ds_msg_tag)),
+		},
+		.handle = handle,
+		.result = DS_INV_HDL,
+	};
+
+	__ds_send(dp->lp, &nack, sizeof(nack));
+}
+
+static LIST_HEAD(ds_work_list);
+static DECLARE_WAIT_QUEUE_HEAD(ds_wait);
+
+struct ds_queue_entry {
+	struct list_head		list;
+	struct ds_info			*dp;
+	int				req_len;
+	int				__pad;
+	u64				req[0];
+};
+
+static void process_ds_work(void)
+{
+	struct ds_queue_entry *qp, *tmp;
+	unsigned long flags;
+	LIST_HEAD(todo);
+
+	spin_lock_irqsave(&ds_lock, flags);
+	list_splice_init(&ds_work_list, &todo);
+	spin_unlock_irqrestore(&ds_lock, flags);
+
+	list_for_each_entry_safe(qp, tmp, &todo, list) {
+		struct ds_data *dpkt = (struct ds_data *) qp->req;
+		struct ds_info *dp = qp->dp;
+		struct ds_cap_state *cp = find_cap(dp, dpkt->handle);
+		int req_len = qp->req_len;
+
+		if (!cp) {
+			printk(KERN_ERR "ds-%llu: Data for unknown "
+			       "handle %llu\n",
+			       dp->id, dpkt->handle);
+
+			spin_lock_irqsave(&ds_lock, flags);
+			__send_ds_nack(dp, dpkt->handle);
+			spin_unlock_irqrestore(&ds_lock, flags);
+		} else {
+			cp->data(dp, cp, dpkt, req_len);
+		}
+
+		list_del(&qp->list);
+		kfree(qp);
+	}
+}
+
+static int ds_thread(void *__unused)
+{
+	DEFINE_WAIT(wait);
+
+	while (1) {
+		prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE);
+		if (list_empty(&ds_work_list))
+			schedule();
+		finish_wait(&ds_wait, &wait);
+
+		if (kthread_should_stop())
+			break;
+
+		process_ds_work();
+	}
+
+	return 0;
+}
+
+static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len)
+{
+	struct ds_data *dpkt = (struct ds_data *) pkt;
+	struct ds_queue_entry *qp;
+
+	qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC);
+	if (!qp) {
+		__send_ds_nack(dp, dpkt->handle);
+	} else {
+		qp->dp = dp;
+		memcpy(&qp->req, pkt, len);
+		list_add_tail(&qp->list, &ds_work_list);
+		wake_up(&ds_wait);
+	}
+	return 0;
+}
+
+static void ds_up(struct ds_info *dp)
+{
+	struct ldc_channel *lp = dp->lp;
+	struct ds_ver_req req;
+	int err;
+
+	req.tag.type = DS_INIT_REQ;
+	req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag);
+	req.ver.major = 1;
+	req.ver.minor = 0;
+
+	err = __ds_send(lp, &req, sizeof(req));
+	if (err > 0)
+		dp->hs_state = DS_HS_START;
+}
+
+static void ds_reset(struct ds_info *dp)
+{
+	int i;
+
+	dp->hs_state = 0;
+
+	for (i = 0; i < dp->num_ds_states; i++) {
+		struct ds_cap_state *cp = &dp->ds_states[i];
+
+		cp->state = CAP_STATE_UNKNOWN;
+	}
+}
+
+static void ds_event(void *arg, int event)
+{
+	struct ds_info *dp = arg;
+	struct ldc_channel *lp = dp->lp;
+	unsigned long flags;
+	int err;
+
+	spin_lock_irqsave(&ds_lock, flags);
+
+	if (event == LDC_EVENT_UP) {
+		ds_up(dp);
+		spin_unlock_irqrestore(&ds_lock, flags);
+		return;
+	}
+
+	if (event == LDC_EVENT_RESET) {
+		ds_reset(dp);
+		spin_unlock_irqrestore(&ds_lock, flags);
+		return;
+	}
+
+	if (event != LDC_EVENT_DATA_READY) {
+		printk(KERN_WARNING "ds-%llu: Unexpected LDC event %d\n",
+		       dp->id, event);
+		spin_unlock_irqrestore(&ds_lock, flags);
+		return;
+	}
+
+	err = 0;
+	while (1) {
+		struct ds_msg_tag *tag;
+
+		err = ldc_read(lp, dp->rcv_buf, sizeof(*tag));
+
+		if (unlikely(err < 0)) {
+			if (err == -ECONNRESET)
+				ds_conn_reset(dp);
+			break;
+		}
+		if (err == 0)
+			break;
+
+		tag = dp->rcv_buf;
+		err = ldc_read(lp, tag + 1, tag->len);
+
+		if (unlikely(err < 0)) {
+			if (err == -ECONNRESET)
+				ds_conn_reset(dp);
+			break;
+		}
+		if (err < tag->len)
+			break;
+
+		if (tag->type < DS_DATA)
+			err = ds_handshake(dp, dp->rcv_buf);
+		else
+			err = ds_data(dp, dp->rcv_buf,
+				      sizeof(*tag) + err);
+		if (err == -ECONNRESET)
+			break;
+	}
+
+	spin_unlock_irqrestore(&ds_lock, flags);
+}
+
+static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id)
+{
+	static int ds_version_printed;
+	struct ldc_channel_config ds_cfg = {
+		.event		= ds_event,
+		.mtu		= 4096,
+		.mode		= LDC_MODE_STREAM,
+	};
+	struct mdesc_handle *hp;
+	struct ldc_channel *lp;
+	struct ds_info *dp;
+	const u64 *val;
+	int err, i;
+
+	if (ds_version_printed++ == 0)
+		printk(KERN_INFO "%s", version);
+
+	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!dp)
+		goto out_err;
+
+	hp = mdesc_grab();
+	val = mdesc_get_property(hp, vdev->mp, "id", NULL);
+	if (val)
+		dp->id = *val;
+	mdesc_release(hp);
+
+	dp->rcv_buf = kzalloc(4096, GFP_KERNEL);
+	if (!dp->rcv_buf)
+		goto out_free_dp;
+
+	dp->rcv_buf_len = 4096;
+
+	dp->ds_states = kmemdup(ds_states_template,
+				sizeof(ds_states_template), GFP_KERNEL);
+	if (!dp->ds_states)
+		goto out_free_rcv_buf;
+
+	dp->num_ds_states = ARRAY_SIZE(ds_states_template);
+
+	for (i = 0; i < dp->num_ds_states; i++)
+		dp->ds_states[i].handle = ((u64)i << 32);
+
+	ds_cfg.tx_irq = vdev->tx_irq;
+	ds_cfg.rx_irq = vdev->rx_irq;
+
+	lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS");
+	if (IS_ERR(lp)) {
+		err = PTR_ERR(lp);
+		goto out_free_ds_states;
+	}
+	dp->lp = lp;
+
+	err = ldc_bind(lp);
+	if (err)
+		goto out_free_ldc;
+
+	spin_lock_irq(&ds_lock);
+	dp->next = ds_info_list;
+	ds_info_list = dp;
+	spin_unlock_irq(&ds_lock);
+
+	return err;
+
+out_free_ldc:
+	ldc_free(dp->lp);
+
+out_free_ds_states:
+	kfree(dp->ds_states);
+
+out_free_rcv_buf:
+	kfree(dp->rcv_buf);
+
+out_free_dp:
+	kfree(dp);
+
+out_err:
+	return err;
+}
+
+static int ds_remove(struct vio_dev *vdev)
+{
+	return 0;
+}
+
+static const struct vio_device_id ds_match[] = {
+	{
+		.type = "domain-services-port",
+	},
+	{},
+};
+
+static struct vio_driver ds_driver = {
+	.id_table	= ds_match,
+	.probe		= ds_probe,
+	.remove		= ds_remove,
+	.name		= "ds",
+};
+
+static int __init ds_init(void)
+{
+	unsigned long hv_ret, major, minor;
+
+	if (tlb_type == hypervisor) {
+		hv_ret = sun4v_get_version(HV_GRP_REBOOT_DATA, &major, &minor);
+		if (hv_ret == HV_EOK) {
+			pr_info("SUN4V: Reboot data supported (maj=%lu,min=%lu).\n",
+				major, minor);
+			reboot_data_supported = 1;
+		}
+	}
+	kthread_run(ds_thread, NULL, "kldomd");
+
+	return vio_register_driver(&ds_driver);
+}
+
+fs_initcall(ds_init);
diff --git a/arch/sparc/kernel/dtlb_miss.S b/arch/sparc/kernel/dtlb_miss.S
new file mode 100644
index 0000000..fb9c788
--- /dev/null
+++ b/arch/sparc/kernel/dtlb_miss.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* DTLB ** ICACHE line 1: Context 0 check and TSB load	*/
+	ldxa	[%g0] ASI_DMMU_TSB_8KB_PTR, %g1	! Get TSB 8K pointer
+	ldxa	[%g0] ASI_DMMU, %g6		! Get TAG TARGET
+	srlx	%g6, 48, %g5			! Get context
+	sllx	%g6, 22, %g6			! Zero out context
+	brz,pn	%g5, kvmap_dtlb			! Context 0 processing
+	 srlx	%g6, 22, %g6			! Delay slot
+	TSB_LOAD_QUAD(%g1, %g4)			! Load TSB entry
+	cmp	%g4, %g6			! Compare TAG
+
+/* DTLB ** ICACHE line 2: TSB compare and TLB load	*/
+	bne,pn	%xcc, tsb_miss_dtlb		! Miss
+	 mov	FAULT_CODE_DTLB, %g3
+	stxa	%g5, [%g0] ASI_DTLB_DATA_IN	! Load TLB
+	retry					! Trap done
+	nop
+	nop
+	nop
+	nop
+
+/* DTLB ** ICACHE line 3:				*/
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+/* DTLB ** ICACHE line 4: 				*/
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S
new file mode 100644
index 0000000..9f94577
--- /dev/null
+++ b/arch/sparc/kernel/dtlb_prot.S
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * dtlb_prot.S: DTLB protection trap strategy.
+ *              This is included directly into the trap table.
+ *
+ * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz)
+ */
+
+/* Ways we can get here:
+ *
+ * [TL == 0] 1) User stores to readonly pages.
+ * [TL == 0] 2) Nucleus stores to user readonly pages.
+ * [TL >  0] 3) Nucleus stores to user readonly stack frame.
+ */
+
+/* PROT ** ICACHE line 1: User DTLB protection trap	*/
+	mov		TLB_SFSR, %g1
+	stxa		%g0, [%g1] ASI_DMMU		! Clear FaultValid bit
+	membar		#Sync				! Synchronize stores
+	rdpr		%pstate, %g5			! Move into alt-globals
+	wrpr		%g5, PSTATE_AG|PSTATE_MG, %pstate
+	rdpr		%tl, %g1			! Need a winfixup?
+	cmp		%g1, 1				! Trap level >1?
+	mov		TLB_TAG_ACCESS, %g4		! For reload of vaddr
+
+/* PROT ** ICACHE line 2: More real fault processing */
+	ldxa		[%g4] ASI_DMMU, %g5		! Put tagaccess in %g5
+	srlx		%g5, PAGE_SHIFT, %g5
+	sllx		%g5, PAGE_SHIFT, %g5		! Clear context ID bits
+	bgu,pn		%xcc, winfix_trampoline		! Yes, perform winfixup
+	 mov		FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
+	ba,pt		%xcc, sparc64_realfault_common	! Nope, normal fault
+	 nop
+	nop
+
+/* PROT ** ICACHE line 3: Unused...	*/
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+/* PROT ** ICACHE line 4: Unused...	*/
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
diff --git a/arch/sparc/kernel/ebus.c b/arch/sparc/kernel/ebus.c
new file mode 100644
index 0000000..264b186
--- /dev/null
+++ b/arch/sparc/kernel/ebus.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* ebus.c: EBUS DMA library code.
+ *
+ * Copyright (C) 1997  Eddie C. Dost  (ecd@skynet.be)
+ * Copyright (C) 1999  David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include <asm/ebus_dma.h>
+#include <asm/io.h>
+
+#define EBDMA_CSR	0x00UL	/* Control/Status */
+#define EBDMA_ADDR	0x04UL	/* DMA Address */
+#define EBDMA_COUNT	0x08UL	/* DMA Count */
+
+#define EBDMA_CSR_INT_PEND	0x00000001
+#define EBDMA_CSR_ERR_PEND	0x00000002
+#define EBDMA_CSR_DRAIN		0x00000004
+#define EBDMA_CSR_INT_EN	0x00000010
+#define EBDMA_CSR_RESET		0x00000080
+#define EBDMA_CSR_WRITE		0x00000100
+#define EBDMA_CSR_EN_DMA	0x00000200
+#define EBDMA_CSR_CYC_PEND	0x00000400
+#define EBDMA_CSR_DIAG_RD_DONE	0x00000800
+#define EBDMA_CSR_DIAG_WR_DONE	0x00001000
+#define EBDMA_CSR_EN_CNT	0x00002000
+#define EBDMA_CSR_TC		0x00004000
+#define EBDMA_CSR_DIS_CSR_DRN	0x00010000
+#define EBDMA_CSR_BURST_SZ_MASK	0x000c0000
+#define EBDMA_CSR_BURST_SZ_1	0x00080000
+#define EBDMA_CSR_BURST_SZ_4	0x00000000
+#define EBDMA_CSR_BURST_SZ_8	0x00040000
+#define EBDMA_CSR_BURST_SZ_16	0x000c0000
+#define EBDMA_CSR_DIAG_EN	0x00100000
+#define EBDMA_CSR_DIS_ERR_PEND	0x00400000
+#define EBDMA_CSR_TCI_DIS	0x00800000
+#define EBDMA_CSR_EN_NEXT	0x01000000
+#define EBDMA_CSR_DMA_ON	0x02000000
+#define EBDMA_CSR_A_LOADED	0x04000000
+#define EBDMA_CSR_NA_LOADED	0x08000000
+#define EBDMA_CSR_DEV_ID_MASK	0xf0000000
+
+#define EBUS_DMA_RESET_TIMEOUT	10000
+
+static void __ebus_dma_reset(struct ebus_dma_info *p, int no_drain)
+{
+	int i;
+	u32 val = 0;
+
+	writel(EBDMA_CSR_RESET, p->regs + EBDMA_CSR);
+	udelay(1);
+
+	if (no_drain)
+		return;
+
+	for (i = EBUS_DMA_RESET_TIMEOUT; i > 0; i--) {
+		val = readl(p->regs + EBDMA_CSR);
+
+		if (!(val & (EBDMA_CSR_DRAIN | EBDMA_CSR_CYC_PEND)))
+			break;
+		udelay(10);
+	}
+}
+
+static irqreturn_t ebus_dma_irq(int irq, void *dev_id)
+{
+	struct ebus_dma_info *p = dev_id;
+	unsigned long flags;
+	u32 csr = 0;
+
+	spin_lock_irqsave(&p->lock, flags);
+	csr = readl(p->regs + EBDMA_CSR);
+	writel(csr, p->regs + EBDMA_CSR);
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (csr & EBDMA_CSR_ERR_PEND) {
+		printk(KERN_CRIT "ebus_dma(%s): DMA error!\n", p->name);
+		p->callback(p, EBUS_DMA_EVENT_ERROR, p->client_cookie);
+		return IRQ_HANDLED;
+	} else if (csr & EBDMA_CSR_INT_PEND) {
+		p->callback(p,
+			    (csr & EBDMA_CSR_TC) ?
+			    EBUS_DMA_EVENT_DMA : EBUS_DMA_EVENT_DEVICE,
+			    p->client_cookie);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+
+}
+
+int ebus_dma_register(struct ebus_dma_info *p)
+{
+	u32 csr;
+
+	if (!p->regs)
+		return -EINVAL;
+	if (p->flags & ~(EBUS_DMA_FLAG_USE_EBDMA_HANDLER |
+			 EBUS_DMA_FLAG_TCI_DISABLE))
+		return -EINVAL;
+	if ((p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) && !p->callback)
+		return -EINVAL;
+	if (!strlen(p->name))
+		return -EINVAL;
+
+	__ebus_dma_reset(p, 1);
+
+	csr = EBDMA_CSR_BURST_SZ_16 | EBDMA_CSR_EN_CNT;
+
+	if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
+		csr |= EBDMA_CSR_TCI_DIS;
+
+	writel(csr, p->regs + EBDMA_CSR);
+
+	return 0;
+}
+EXPORT_SYMBOL(ebus_dma_register);
+
+int ebus_dma_irq_enable(struct ebus_dma_info *p, int on)
+{
+	unsigned long flags;
+	u32 csr;
+
+	if (on) {
+		if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
+			if (request_irq(p->irq, ebus_dma_irq, IRQF_SHARED, p->name, p))
+				return -EBUSY;
+		}
+
+		spin_lock_irqsave(&p->lock, flags);
+		csr = readl(p->regs + EBDMA_CSR);
+		csr |= EBDMA_CSR_INT_EN;
+		writel(csr, p->regs + EBDMA_CSR);
+		spin_unlock_irqrestore(&p->lock, flags);
+	} else {
+		spin_lock_irqsave(&p->lock, flags);
+		csr = readl(p->regs + EBDMA_CSR);
+		csr &= ~EBDMA_CSR_INT_EN;
+		writel(csr, p->regs + EBDMA_CSR);
+		spin_unlock_irqrestore(&p->lock, flags);
+
+		if (p->flags & EBUS_DMA_FLAG_USE_EBDMA_HANDLER) {
+			free_irq(p->irq, p);
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ebus_dma_irq_enable);
+
+void ebus_dma_unregister(struct ebus_dma_info *p)
+{
+	unsigned long flags;
+	u32 csr;
+	int irq_on = 0;
+
+	spin_lock_irqsave(&p->lock, flags);
+	csr = readl(p->regs + EBDMA_CSR);
+	if (csr & EBDMA_CSR_INT_EN) {
+		csr &= ~EBDMA_CSR_INT_EN;
+		writel(csr, p->regs + EBDMA_CSR);
+		irq_on = 1;
+	}
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	if (irq_on)
+		free_irq(p->irq, p);
+}
+EXPORT_SYMBOL(ebus_dma_unregister);
+
+int ebus_dma_request(struct ebus_dma_info *p, dma_addr_t bus_addr, size_t len)
+{
+	unsigned long flags;
+	u32 csr;
+	int err;
+
+	if (len >= (1 << 24))
+		return -EINVAL;
+
+	spin_lock_irqsave(&p->lock, flags);
+	csr = readl(p->regs + EBDMA_CSR);
+	err = -EINVAL;
+	if (!(csr & EBDMA_CSR_EN_DMA))
+		goto out;
+	err = -EBUSY;
+	if (csr & EBDMA_CSR_NA_LOADED)
+		goto out;
+
+	writel(len,      p->regs + EBDMA_COUNT);
+	writel(bus_addr, p->regs + EBDMA_ADDR);
+	err = 0;
+
+out:
+	spin_unlock_irqrestore(&p->lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL(ebus_dma_request);
+
+void ebus_dma_prepare(struct ebus_dma_info *p, int write)
+{
+	unsigned long flags;
+	u32 csr;
+
+	spin_lock_irqsave(&p->lock, flags);
+	__ebus_dma_reset(p, 0);
+
+	csr = (EBDMA_CSR_INT_EN |
+	       EBDMA_CSR_EN_CNT |
+	       EBDMA_CSR_BURST_SZ_16 |
+	       EBDMA_CSR_EN_NEXT);
+
+	if (write)
+		csr |= EBDMA_CSR_WRITE;
+	if (p->flags & EBUS_DMA_FLAG_TCI_DISABLE)
+		csr |= EBDMA_CSR_TCI_DIS;
+
+	writel(csr, p->regs + EBDMA_CSR);
+
+	spin_unlock_irqrestore(&p->lock, flags);
+}
+EXPORT_SYMBOL(ebus_dma_prepare);
+
+unsigned int ebus_dma_residue(struct ebus_dma_info *p)
+{
+	return readl(p->regs + EBDMA_COUNT);
+}
+EXPORT_SYMBOL(ebus_dma_residue);
+
+unsigned int ebus_dma_addr(struct ebus_dma_info *p)
+{
+	return readl(p->regs + EBDMA_ADDR);
+}
+EXPORT_SYMBOL(ebus_dma_addr);
+
+void ebus_dma_enable(struct ebus_dma_info *p, int on)
+{
+	unsigned long flags;
+	u32 orig_csr, csr;
+
+	spin_lock_irqsave(&p->lock, flags);
+	orig_csr = csr = readl(p->regs + EBDMA_CSR);
+	if (on)
+		csr |= EBDMA_CSR_EN_DMA;
+	else
+		csr &= ~EBDMA_CSR_EN_DMA;
+	if ((orig_csr & EBDMA_CSR_EN_DMA) !=
+	    (csr & EBDMA_CSR_EN_DMA))
+		writel(csr, p->regs + EBDMA_CSR);
+	spin_unlock_irqrestore(&p->lock, flags);
+}
+EXPORT_SYMBOL(ebus_dma_enable);
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
new file mode 100644
index 0000000..4d36969
--- /dev/null
+++ b/arch/sparc/kernel/entry.S
@@ -0,0 +1,1372 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* arch/sparc/kernel/entry.S:  Sparc trap low-level entry points.
+ *
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996 Eddie C. Dost   (ecd@skynet.be)
+ * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ * Copyright (C) 1996-1999 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au)
+ */
+
+#include <linux/linkage.h>
+#include <linux/errno.h>
+
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/smp.h>
+#include <asm/contregs.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <asm/psr.h>
+#include <asm/vaddrs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/winmacro.h>
+#include <asm/signal.h>
+#include <asm/obio.h>
+#include <asm/mxcc.h>
+#include <asm/thread_info.h>
+#include <asm/param.h>
+#include <asm/unistd.h>
+
+#include <asm/asmmacro.h>
+#include <asm/export.h>
+
+#define curptr      g6
+
+/* These are just handy. */
+#define _SV	save	%sp, -STACKFRAME_SZ, %sp
+#define _RS     restore 
+
+#define FLUSH_ALL_KERNEL_WINDOWS \
+	_SV; _SV; _SV; _SV; _SV; _SV; _SV; \
+	_RS; _RS; _RS; _RS; _RS; _RS; _RS;
+
+	.text
+
+#ifdef CONFIG_KGDB
+	.align	4
+	.globl		arch_kgdb_breakpoint
+	.type		arch_kgdb_breakpoint,#function
+arch_kgdb_breakpoint:
+	ta		0x7d
+	retl
+	 nop
+	.size		arch_kgdb_breakpoint,.-arch_kgdb_breakpoint
+#endif
+
+#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
+	.align	4
+	.globl	floppy_hardint
+floppy_hardint:
+	/*
+	 * This code cannot touch registers %l0 %l1 and %l2
+	 * because SAVE_ALL depends on their values. It depends
+	 * on %l3 also, but we regenerate it before a call.
+	 * Other registers are:
+	 * %l3 -- base address of fdc registers
+	 * %l4 -- pdma_vaddr
+	 * %l5 -- scratch for ld/st address
+	 * %l6 -- pdma_size
+	 * %l7 -- scratch [floppy byte, ld/st address, aux. data]
+	 */
+
+	/* Do we have work to do? */
+	sethi	%hi(doing_pdma), %l7
+	ld	[%l7 + %lo(doing_pdma)], %l7
+	cmp	%l7, 0
+	be	floppy_dosoftint
+	 nop
+
+	/* Load fdc register base */
+	sethi	%hi(fdc_status), %l3
+	ld	[%l3 + %lo(fdc_status)], %l3
+
+	/* Setup register addresses */
+	sethi	%hi(pdma_vaddr), %l5	! transfer buffer
+	ld	[%l5 + %lo(pdma_vaddr)], %l4
+	sethi	%hi(pdma_size), %l5	! bytes to go
+	ld	[%l5 + %lo(pdma_size)], %l6
+next_byte:
+  	ldub	[%l3], %l7
+
+	andcc	%l7, 0x80, %g0		! Does fifo still have data
+	bz	floppy_fifo_emptied	! fifo has been emptied...
+	 andcc	%l7, 0x20, %g0		! in non-dma mode still?
+	bz	floppy_overrun		! nope, overrun
+	 andcc	%l7, 0x40, %g0		! 0=write 1=read
+	bz	floppy_write
+	 sub	%l6, 0x1, %l6
+
+	/* Ok, actually read this byte */
+	ldub	[%l3 + 1], %l7
+	orcc	%g0, %l6, %g0
+	stb	%l7, [%l4]
+	bne	next_byte
+	 add	%l4, 0x1, %l4
+
+	b	floppy_tdone
+	 nop
+
+floppy_write:
+	/* Ok, actually write this byte */
+	ldub	[%l4], %l7
+	orcc	%g0, %l6, %g0
+	stb	%l7, [%l3 + 1]
+	bne	next_byte
+	 add	%l4, 0x1, %l4
+
+	/* fall through... */
+floppy_tdone:
+	sethi	%hi(pdma_vaddr), %l5
+	st	%l4, [%l5 + %lo(pdma_vaddr)]
+	sethi	%hi(pdma_size), %l5
+	st	%l6, [%l5 + %lo(pdma_size)]
+	/* Flip terminal count pin */
+	set	auxio_register, %l7
+	ld	[%l7], %l7
+
+	ldub	[%l7], %l5
+
+	or	%l5, 0xc2, %l5
+	stb	%l5, [%l7]
+	andn    %l5, 0x02, %l5
+
+2:
+	/* Kill some time so the bits set */
+	WRITE_PAUSE
+	WRITE_PAUSE
+
+	stb     %l5, [%l7]
+
+	/* Prevent recursion */
+	sethi	%hi(doing_pdma), %l7
+	b	floppy_dosoftint
+	 st	%g0, [%l7 + %lo(doing_pdma)]
+
+	/* We emptied the FIFO, but we haven't read everything
+	 * as of yet.  Store the current transfer address and
+	 * bytes left to read so we can continue when the next
+	 * fast IRQ comes in.
+	 */
+floppy_fifo_emptied:
+	sethi	%hi(pdma_vaddr), %l5
+	st	%l4, [%l5 + %lo(pdma_vaddr)]
+	sethi	%hi(pdma_size), %l7
+	st	%l6, [%l7 + %lo(pdma_size)]
+
+	/* Restore condition codes */
+	wr	%l0, 0x0, %psr
+	WRITE_PAUSE
+
+	jmp	%l1
+	rett	%l2
+
+floppy_overrun:
+	sethi	%hi(pdma_vaddr), %l5
+	st	%l4, [%l5 + %lo(pdma_vaddr)]
+	sethi	%hi(pdma_size), %l5
+	st	%l6, [%l5 + %lo(pdma_size)]
+	/* Prevent recursion */
+	sethi	%hi(doing_pdma), %l7
+	st	%g0, [%l7 + %lo(doing_pdma)]
+
+	/* fall through... */
+floppy_dosoftint:
+	rd	%wim, %l3
+	SAVE_ALL
+
+	/* Set all IRQs off. */
+	or	%l0, PSR_PIL, %l4
+	wr	%l4, 0x0, %psr
+	WRITE_PAUSE
+	wr	%l4, PSR_ET, %psr
+	WRITE_PAUSE
+
+	mov	11, %o0			! floppy irq level (unused anyway)
+	mov	%g0, %o1		! devid is not used in fast interrupts
+	call	sparc_floppy_irq
+	 add	%sp, STACKFRAME_SZ, %o2	! struct pt_regs *regs
+
+	RESTORE_ALL
+	
+#endif /* (CONFIG_BLK_DEV_FD) */
+
+	/* Bad trap handler */
+	.globl	bad_trap_handler
+bad_trap_handler:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0	! pt_regs
+	call	do_hw_interrupt
+	 mov	%l7, %o1		! trap number
+
+	RESTORE_ALL
+	
+/* For now all IRQ's not registered get sent here. handler_irq() will
+ * see if a routine is registered to handle this interrupt and if not
+ * it will say so on the console.
+ */
+
+	.align	4
+	.globl	real_irq_entry, patch_handler_irq
+real_irq_entry:
+	SAVE_ALL
+
+#ifdef CONFIG_SMP
+	.globl	patchme_maybe_smp_msg
+
+	cmp	%l7, 11
+patchme_maybe_smp_msg:
+	bgu	maybe_smp4m_msg
+	 nop
+#endif
+
+real_irq_continue:
+	or	%l0, PSR_PIL, %g2
+	wr	%g2, 0x0, %psr
+	WRITE_PAUSE
+	wr	%g2, PSR_ET, %psr
+	WRITE_PAUSE
+	mov	%l7, %o0		! irq level
+patch_handler_irq:
+	call	handler_irq
+	 add	%sp, STACKFRAME_SZ, %o1	! pt_regs ptr
+	or	%l0, PSR_PIL, %g2	! restore PIL after handler_irq
+	wr	%g2, PSR_ET, %psr	! keep ET up
+	WRITE_PAUSE
+
+	RESTORE_ALL
+
+#ifdef CONFIG_SMP
+	/* SMP per-cpu ticker interrupts are handled specially. */
+smp4m_ticker:
+	bne	real_irq_continue+4
+	 or	%l0, PSR_PIL, %g2
+	wr	%g2, 0x0, %psr
+	WRITE_PAUSE
+	wr	%g2, PSR_ET, %psr
+	WRITE_PAUSE
+	call	smp4m_percpu_timer_interrupt
+	 add	%sp, STACKFRAME_SZ, %o0
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+	RESTORE_ALL
+
+#define GET_PROCESSOR4M_ID(reg)	\
+	rd	%tbr, %reg;	\
+	srl	%reg, 12, %reg;	\
+	and	%reg, 3, %reg;
+
+	/* Here is where we check for possible SMP IPI passed to us
+	 * on some level other than 15 which is the NMI and only used
+	 * for cross calls.  That has a separate entry point below.
+	 *
+	 * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*.
+	 */
+maybe_smp4m_msg:
+	GET_PROCESSOR4M_ID(o3)
+	sethi	%hi(sun4m_irq_percpu), %l5
+	sll	%o3, 2, %o3
+	or	%l5, %lo(sun4m_irq_percpu), %o5
+	sethi	%hi(0x70000000), %o2	! Check all soft-IRQs
+	ld	[%o5 + %o3], %o1
+	ld	[%o1 + 0x00], %o3	! sun4m_irq_percpu[cpu]->pending
+	andcc	%o3, %o2, %g0
+	be,a	smp4m_ticker
+	 cmp	%l7, 14
+	/* Soft-IRQ IPI */
+	st	%o2, [%o1 + 0x04]	! sun4m_irq_percpu[cpu]->clear=0x70000000
+	WRITE_PAUSE
+	ld	[%o1 + 0x00], %g0	! sun4m_irq_percpu[cpu]->pending
+	WRITE_PAUSE
+	or	%l0, PSR_PIL, %l4
+	wr	%l4, 0x0, %psr
+	WRITE_PAUSE
+	wr	%l4, PSR_ET, %psr
+	WRITE_PAUSE
+	srl	%o3, 28, %o2		! shift for simpler checks below
+maybe_smp4m_msg_check_single:
+	andcc	%o2, 0x1, %g0
+	beq,a	maybe_smp4m_msg_check_mask
+	 andcc	%o2, 0x2, %g0
+	call	smp_call_function_single_interrupt
+	 nop
+	andcc	%o2, 0x2, %g0
+maybe_smp4m_msg_check_mask:
+	beq,a	maybe_smp4m_msg_check_resched
+	 andcc	%o2, 0x4, %g0
+	call	smp_call_function_interrupt
+	 nop
+	andcc	%o2, 0x4, %g0
+maybe_smp4m_msg_check_resched:
+	/* rescheduling is done in RESTORE_ALL regardless, but incr stats */
+	beq,a	maybe_smp4m_msg_out
+	 nop
+	call	smp_resched_interrupt
+	 nop
+maybe_smp4m_msg_out:
+	RESTORE_ALL
+
+	.align	4
+	.globl	linux_trap_ipi15_sun4m
+linux_trap_ipi15_sun4m:
+	SAVE_ALL
+	sethi	%hi(0x80000000), %o2
+	GET_PROCESSOR4M_ID(o0)
+	sethi	%hi(sun4m_irq_percpu), %l5
+	or	%l5, %lo(sun4m_irq_percpu), %o5
+	sll	%o0, 2, %o0
+	ld	[%o5 + %o0], %o5
+	ld	[%o5 + 0x00], %o3	! sun4m_irq_percpu[cpu]->pending
+	andcc	%o3, %o2, %g0
+	be	sun4m_nmi_error		! Must be an NMI async memory error
+	 st	%o2, [%o5 + 0x04]	! sun4m_irq_percpu[cpu]->clear=0x80000000
+	WRITE_PAUSE
+	ld	[%o5 + 0x00], %g0	! sun4m_irq_percpu[cpu]->pending
+	WRITE_PAUSE
+	or	%l0, PSR_PIL, %l4
+	wr	%l4, 0x0, %psr
+	WRITE_PAUSE
+	wr	%l4, PSR_ET, %psr
+	WRITE_PAUSE
+	call	smp4m_cross_call_irq
+	 nop
+	b	ret_trap_lockless_ipi
+	 clr	%l6
+
+	.globl	smp4d_ticker
+	/* SMP per-cpu ticker interrupts are handled specially. */
+smp4d_ticker:
+	SAVE_ALL
+	or	%l0, PSR_PIL, %g2
+	sethi	%hi(CC_ICLR), %o0
+	sethi	%hi(1 << 14), %o1
+	or	%o0, %lo(CC_ICLR), %o0
+	stha	%o1, [%o0] ASI_M_MXCC	/* Clear PIL 14 in MXCC's ICLR */
+	wr	%g2, 0x0, %psr
+	WRITE_PAUSE
+	wr	%g2, PSR_ET, %psr
+	WRITE_PAUSE
+	call	smp4d_percpu_timer_interrupt
+	 add	%sp, STACKFRAME_SZ, %o0
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+	RESTORE_ALL
+
+	.align	4
+	.globl	linux_trap_ipi15_sun4d
+linux_trap_ipi15_sun4d:
+	SAVE_ALL
+	sethi	%hi(CC_BASE), %o4
+	sethi	%hi(MXCC_ERR_ME|MXCC_ERR_PEW|MXCC_ERR_ASE|MXCC_ERR_PEE), %o2
+	or	%o4, (CC_EREG - CC_BASE), %o0
+	ldda	[%o0] ASI_M_MXCC, %o0
+	andcc	%o0, %o2, %g0
+	bne	1f
+	 sethi	%hi(BB_STAT2), %o2
+	lduba	[%o2] ASI_M_CTL, %o2
+	andcc	%o2, BB_STAT2_MASK, %g0
+	bne	2f
+	 or	%o4, (CC_ICLR - CC_BASE), %o0
+	sethi	%hi(1 << 15), %o1
+	stha	%o1, [%o0] ASI_M_MXCC	/* Clear PIL 15 in MXCC's ICLR */
+	or	%l0, PSR_PIL, %l4
+	wr	%l4, 0x0, %psr
+	WRITE_PAUSE
+	wr	%l4, PSR_ET, %psr
+	WRITE_PAUSE
+	call	smp4d_cross_call_irq
+	 nop
+	b	ret_trap_lockless_ipi
+	 clr	%l6
+
+1:	/* MXCC error */
+2:	/* BB error */
+	/* Disable PIL 15 */
+	set	CC_IMSK, %l4
+	lduha	[%l4] ASI_M_MXCC, %l5
+	sethi	%hi(1 << 15), %l7
+	or	%l5, %l7, %l5
+	stha	%l5, [%l4] ASI_M_MXCC
+	/* FIXME */
+1:	b,a	1b
+
+	.globl	smpleon_ipi
+	.extern leon_ipi_interrupt
+	/* SMP per-cpu IPI interrupts are handled specially. */
+smpleon_ipi:
+        SAVE_ALL
+	or	%l0, PSR_PIL, %g2
+	wr	%g2, 0x0, %psr
+	WRITE_PAUSE
+	wr	%g2, PSR_ET, %psr
+	WRITE_PAUSE
+	call	leonsmp_ipi_interrupt
+	 add	%sp, STACKFRAME_SZ, %o1 ! pt_regs
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+	RESTORE_ALL
+
+	.align	4
+	.globl	linux_trap_ipi15_leon
+linux_trap_ipi15_leon:
+	SAVE_ALL
+	or	%l0, PSR_PIL, %l4
+	wr	%l4, 0x0, %psr
+	WRITE_PAUSE
+	wr	%l4, PSR_ET, %psr
+	WRITE_PAUSE
+	call	leon_cross_call_irq
+	 nop
+	b	ret_trap_lockless_ipi
+	 clr	%l6
+
+#endif /* CONFIG_SMP */
+
+	/* This routine handles illegal instructions and privileged
+	 * instruction attempts from user code.
+	 */
+	.align	4
+	.globl	bad_instruction
+bad_instruction:
+	sethi	%hi(0xc1f80000), %l4
+	ld	[%l1], %l5
+	sethi	%hi(0x81d80000), %l7
+	and	%l5, %l4, %l5
+	cmp	%l5, %l7
+	be	1f
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	do_illegal_instruction
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+1:	/* unimplemented flush - just skip */
+	jmpl	%l2, %g0
+	 rett	%l2 + 4
+
+	.align	4
+	.globl	priv_instruction
+priv_instruction:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	do_priv_instruction
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles unaligned data accesses. */
+	.align	4
+	.globl	mna_handler
+mna_handler:
+	andcc	%l0, PSR_PS, %g0
+	be	mna_fromuser
+	 nop
+
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+
+	ld	[%l1], %o1
+	call	kernel_unaligned_trap
+	 add	%sp, STACKFRAME_SZ, %o0
+
+	RESTORE_ALL
+
+mna_fromuser:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	ld	[%l1], %o1
+	call	user_unaligned_trap
+	 add	%sp, STACKFRAME_SZ, %o0
+
+	RESTORE_ALL
+
+	/* This routine handles floating point disabled traps. */
+	.align	4
+	.globl	fpd_trap_handler
+fpd_trap_handler:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	do_fpd_trap
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles Floating Point Exceptions. */
+	.align	4
+	.globl	fpe_trap_handler
+fpe_trap_handler:
+	set	fpsave_magic, %l5
+	cmp	%l1, %l5
+	be	1f
+	 sethi	%hi(fpsave), %l5
+	or	%l5, %lo(fpsave), %l5
+	cmp	%l1, %l5
+	bne	2f
+	 sethi	%hi(fpsave_catch2), %l5
+	or	%l5, %lo(fpsave_catch2), %l5
+	wr	%l0, 0x0, %psr
+	WRITE_PAUSE
+	jmp	%l5
+	 rett	%l5 + 4
+1:	
+	sethi	%hi(fpsave_catch), %l5
+	or	%l5, %lo(fpsave_catch), %l5
+	wr	%l0, 0x0, %psr
+	WRITE_PAUSE
+	jmp	%l5
+	 rett	%l5 + 4
+
+2:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	do_fpe_trap
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles Tag Overflow Exceptions. */
+	.align	4
+	.globl	do_tag_overflow
+do_tag_overflow:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	handle_tag_overflow
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles Watchpoint Exceptions. */
+	.align	4
+	.globl	do_watchpoint
+do_watchpoint:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	handle_watchpoint
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles Register Access Exceptions. */
+	.align	4
+	.globl	do_reg_access
+do_reg_access:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	handle_reg_access
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles Co-Processor Disabled Exceptions. */
+	.align	4
+	.globl	do_cp_disabled
+do_cp_disabled:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	handle_cp_disabled
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles Co-Processor Exceptions. */
+	.align	4
+	.globl	do_cp_exception
+do_cp_exception:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	handle_cp_exception
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	/* This routine handles Hardware Divide By Zero Exceptions. */
+	.align	4
+	.globl	do_hw_divzero
+do_hw_divzero:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr		! re-enable traps
+	WRITE_PAUSE
+
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	%l1, %o1
+	mov	%l2, %o2
+	call	handle_hw_divzero
+	 mov	%l0, %o3
+
+	RESTORE_ALL
+
+	.align	4
+	.globl	do_flush_windows
+do_flush_windows:
+	SAVE_ALL
+
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+
+	andcc	%l0, PSR_PS, %g0
+	bne	dfw_kernel
+	 nop
+
+	call	flush_user_windows
+	 nop
+
+	/* Advance over the trap instruction. */
+	ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1
+	add	%l1, 0x4, %l2
+	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
+	st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
+
+	RESTORE_ALL
+
+	.globl	flush_patch_one
+
+	/* We get these for debugging routines using __builtin_return_address() */
+dfw_kernel:
+flush_patch_one:
+	FLUSH_ALL_KERNEL_WINDOWS
+
+	/* Advance over the trap instruction. */
+	ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1
+	add	%l1, 0x4, %l2
+	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
+	st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
+
+	RESTORE_ALL
+
+	/* The getcc software trap.  The user wants the condition codes from
+	 * the %psr in register %g1.
+	 */
+
+	.align	4
+	.globl	getcc_trap_handler
+getcc_trap_handler:
+	srl	%l0, 20, %g1	! give user
+	and	%g1, 0xf, %g1	! only ICC bits in %psr
+	jmp	%l2		! advance over trap instruction
+	rett	%l2 + 0x4	! like this...
+
+	/* The setcc software trap.  The user has condition codes in %g1
+	 * that it would like placed in the %psr.  Be careful not to flip
+	 * any unintentional bits!
+	 */
+
+	.align	4
+	.globl	setcc_trap_handler
+setcc_trap_handler:
+	sll	%g1, 0x14, %l4
+	set	PSR_ICC, %l5
+	andn	%l0, %l5, %l0	! clear ICC bits in %psr
+	and	%l4, %l5, %l4	! clear non-ICC bits in user value
+	or	%l4, %l0, %l4	! or them in... mix mix mix
+
+	wr	%l4, 0x0, %psr	! set new %psr
+	WRITE_PAUSE		! TI scumbags...
+
+	jmp	%l2		! advance over trap instruction
+	rett	%l2 + 0x4	! like this...
+
+sun4m_nmi_error:
+	/* NMI async memory error handling. */
+	sethi	%hi(0x80000000), %l4
+	sethi	%hi(sun4m_irq_global), %o5
+	ld	[%o5 + %lo(sun4m_irq_global)], %l5
+	st	%l4, [%l5 + 0x0c]	! sun4m_irq_global->mask_set=0x80000000
+	WRITE_PAUSE
+	ld	[%l5 + 0x00], %g0	! sun4m_irq_global->pending
+	WRITE_PAUSE
+	or	%l0, PSR_PIL, %l4
+	wr	%l4, 0x0, %psr
+	WRITE_PAUSE
+	wr	%l4, PSR_ET, %psr
+	WRITE_PAUSE
+	call	sun4m_nmi
+	 nop
+	st	%l4, [%l5 + 0x08]	! sun4m_irq_global->mask_clear=0x80000000
+	WRITE_PAUSE
+	ld	[%l5 + 0x00], %g0	! sun4m_irq_global->pending
+	WRITE_PAUSE
+	RESTORE_ALL
+
+#ifndef CONFIG_SMP
+	.align	4
+	.globl	linux_trap_ipi15_sun4m
+linux_trap_ipi15_sun4m:
+	SAVE_ALL
+
+	ba	sun4m_nmi_error
+	 nop
+#endif /* CONFIG_SMP */
+
+	.align	4
+	.globl	srmmu_fault
+srmmu_fault:
+	mov	0x400, %l5
+	mov	0x300, %l4
+
+LEON_PI(lda	[%l5] ASI_LEON_MMUREGS, %l6)	! read sfar first
+SUN_PI_(lda	[%l5] ASI_M_MMUREGS, %l6)	! read sfar first
+
+LEON_PI(lda	[%l4] ASI_LEON_MMUREGS, %l5)	! read sfsr last
+SUN_PI_(lda	[%l4] ASI_M_MMUREGS, %l5)	! read sfsr last
+
+	andn	%l6, 0xfff, %l6
+	srl	%l5, 6, %l5			! and encode all info into l7
+
+	and	%l5, 2, %l5
+	or	%l5, %l6, %l6
+
+	or	%l6, %l7, %l7			! l7 = [addr,write,txtfault]
+
+	SAVE_ALL
+
+	mov	%l7, %o1
+	mov	%l7, %o2
+	and	%o1, 1, %o1		! arg2 = text_faultp
+	mov	%l7, %o3
+	and	%o2, 2, %o2		! arg3 = writep
+	andn	%o3, 0xfff, %o3		! arg4 = faulting address
+
+	wr	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+
+	call	do_sparc_fault
+	 add	%sp, STACKFRAME_SZ, %o0	! arg1 = pt_regs ptr
+
+	RESTORE_ALL
+
+	.align	4
+sunos_execv:
+	.globl	sunos_execv
+	b	sys_execve
+	 clr	%i2
+
+	.align	4
+	.globl	sys_sigstack
+sys_sigstack:
+	mov	%o7, %l5
+	mov	%fp, %o2
+	call	do_sys_sigstack
+	 mov	%l5, %o7
+
+	.align	4
+	.globl	sys_sigreturn
+sys_sigreturn:
+	call	do_sigreturn
+	 add	%sp, STACKFRAME_SZ, %o0
+
+	ld	[%curptr + TI_FLAGS], %l5
+	andcc	%l5, _TIF_SYSCALL_TRACE, %g0
+	be	1f
+	 nop
+
+	call	syscall_trace
+	 mov	1, %o1
+
+1:
+	/* We don't want to muck with user registers like a
+	 * normal syscall, just return.
+	 */
+	RESTORE_ALL
+
+	.align	4
+	.globl	sys_rt_sigreturn
+sys_rt_sigreturn:
+	call	do_rt_sigreturn
+	 add	%sp, STACKFRAME_SZ, %o0
+
+	ld	[%curptr + TI_FLAGS], %l5
+	andcc	%l5, _TIF_SYSCALL_TRACE, %g0
+	be	1f
+	 nop
+
+	add	%sp, STACKFRAME_SZ, %o0
+	call	syscall_trace
+	 mov	1, %o1
+
+1:
+	/* We are returning to a signal handler. */
+	RESTORE_ALL
+
+	/* Now that we have a real sys_clone, sys_fork() is
+	 * implemented in terms of it.  Our _real_ implementation
+	 * of SunOS vfork() will use sys_vfork().
+	 *
+	 * XXX These three should be consolidated into mostly shared
+	 * XXX code just like on sparc64... -DaveM
+	 */
+	.align	4
+	.globl	sys_fork, flush_patch_two
+sys_fork:
+	mov	%o7, %l5
+flush_patch_two:
+	FLUSH_ALL_KERNEL_WINDOWS;
+	ld	[%curptr + TI_TASK], %o4
+	rd	%psr, %g4
+	WRITE_PAUSE
+	mov	SIGCHLD, %o0			! arg0:	clone flags
+	rd	%wim, %g5
+	WRITE_PAUSE
+	mov	%fp, %o1			! arg1:	usp
+	std	%g4, [%o4 + AOFF_task_thread + AOFF_thread_fork_kpsr]
+	add	%sp, STACKFRAME_SZ, %o2		! arg2:	pt_regs ptr
+	mov	0, %o3
+	call	sparc_do_fork
+	 mov	%l5, %o7
+
+	/* Whee, kernel threads! */
+	.globl	sys_clone, flush_patch_three
+sys_clone:
+	mov	%o7, %l5
+flush_patch_three:
+	FLUSH_ALL_KERNEL_WINDOWS;
+	ld	[%curptr + TI_TASK], %o4
+	rd	%psr, %g4
+	WRITE_PAUSE
+
+	/* arg0,1: flags,usp  -- loaded already */
+	cmp	%o1, 0x0			! Is new_usp NULL?
+	rd	%wim, %g5
+	WRITE_PAUSE
+	be,a	1f
+	 mov	%fp, %o1			! yes, use callers usp
+	andn	%o1, 7, %o1			! no, align to 8 bytes
+1:
+	std	%g4, [%o4 + AOFF_task_thread + AOFF_thread_fork_kpsr]
+	add	%sp, STACKFRAME_SZ, %o2		! arg2:	pt_regs ptr
+	mov	0, %o3
+	call	sparc_do_fork
+	 mov	%l5, %o7
+
+	/* Whee, real vfork! */
+	.globl	sys_vfork, flush_patch_four
+sys_vfork:
+flush_patch_four:
+	FLUSH_ALL_KERNEL_WINDOWS;
+	ld	[%curptr + TI_TASK], %o4
+	rd	%psr, %g4
+	WRITE_PAUSE
+	rd	%wim, %g5
+	WRITE_PAUSE
+	std	%g4, [%o4 + AOFF_task_thread + AOFF_thread_fork_kpsr]
+	sethi	%hi(0x4000 | 0x0100 | SIGCHLD), %o0
+	mov	%fp, %o1
+	or	%o0, %lo(0x4000 | 0x0100 | SIGCHLD), %o0
+	sethi	%hi(sparc_do_fork), %l1
+	mov	0, %o3
+	jmpl	%l1 + %lo(sparc_do_fork), %g0
+	 add	%sp, STACKFRAME_SZ, %o2
+
+        .align  4
+linux_sparc_ni_syscall:
+	sethi   %hi(sys_ni_syscall), %l7
+	b       do_syscall
+	 or     %l7, %lo(sys_ni_syscall), %l7
+
+linux_syscall_trace:
+	add	%sp, STACKFRAME_SZ, %o0
+	call	syscall_trace
+	 mov	0, %o1
+	cmp	%o0, 0
+	bne	3f
+	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %g1
+	sethi	%hi(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I0], %i0
+	or	%l7, %lo(sys_call_table), %l7
+	ld	[%sp + STACKFRAME_SZ + PT_I1], %i1
+	ld	[%sp + STACKFRAME_SZ + PT_I2], %i2
+	ld	[%sp + STACKFRAME_SZ + PT_I3], %i3
+	ld	[%sp + STACKFRAME_SZ + PT_I4], %i4
+	ld	[%sp + STACKFRAME_SZ + PT_I5], %i5
+	cmp	%g1, NR_syscalls
+	bgeu	3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
+	mov	%i0, %o0
+	ld	[%l7 + %l4], %l7
+	mov	%i1, %o1
+	mov	%i2, %o2
+	mov	%i3, %o3
+	b	2f
+	 mov	%i4, %o4
+
+	.globl	ret_from_fork
+ret_from_fork:
+	call	schedule_tail
+	 ld	[%g3 + TI_TASK], %o0
+	b	ret_sys_call
+	 ld	[%sp + STACKFRAME_SZ + PT_I0], %o0
+
+	.globl	ret_from_kernel_thread
+ret_from_kernel_thread:
+	call	schedule_tail
+	 ld	[%g3 + TI_TASK], %o0
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %l0
+	call	%l0
+	 ld	[%sp + STACKFRAME_SZ + PT_G2], %o0
+	rd	%psr, %l1
+	ld	[%sp + STACKFRAME_SZ + PT_PSR], %l0
+	andn	%l0, PSR_CWP, %l0
+	nop
+	and	%l1, PSR_CWP, %l1
+	or	%l0, %l1, %l0
+	st	%l0, [%sp + STACKFRAME_SZ + PT_PSR]
+	b	ret_sys_call
+	 mov	0, %o0
+
+	/* Linux native system calls enter here... */
+	.align	4
+	.globl	linux_sparc_syscall
+linux_sparc_syscall:
+	sethi	%hi(PSR_SYSCALL), %l4
+	or	%l0, %l4, %l0
+	/* Direct access to user regs, must faster. */
+	cmp	%g1, NR_syscalls
+	bgeu	linux_sparc_ni_syscall
+	 sll	%g1, 2, %l4
+	ld	[%l7 + %l4], %l7
+
+do_syscall:
+	SAVE_ALL_HEAD
+	 rd	%wim, %l3
+
+	wr	%l0, PSR_ET, %psr
+	mov	%i0, %o0
+	mov	%i1, %o1
+	mov	%i2, %o2
+
+	ld	[%curptr + TI_FLAGS], %l5
+	mov	%i3, %o3
+	andcc	%l5, _TIF_SYSCALL_TRACE, %g0
+	mov	%i4, %o4
+	bne	linux_syscall_trace
+	 mov	%i0, %l5
+2:
+	call	%l7
+	 mov	%i5, %o5
+
+3:
+	st	%o0, [%sp + STACKFRAME_SZ + PT_I0]
+
+ret_sys_call:
+	ld	[%curptr + TI_FLAGS], %l6
+	cmp	%o0, -ERESTART_RESTARTBLOCK
+	ld	[%sp + STACKFRAME_SZ + PT_PSR], %g3
+	set	PSR_C, %g2
+	bgeu	1f
+	 andcc	%l6, _TIF_SYSCALL_TRACE, %g0
+
+	/* System call success, clear Carry condition code. */
+	andn	%g3, %g2, %g3
+	clr	%l6
+	st	%g3, [%sp + STACKFRAME_SZ + PT_PSR]	
+	bne	linux_syscall_trace2
+	 ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
+	add	%l1, 0x4, %l2			/* npc = npc+4 */
+	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
+	b	ret_trap_entry
+	 st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
+1:
+	/* System call failure, set Carry condition code.
+	 * Also, get abs(errno) to return to the process.
+	 */
+	sub	%g0, %o0, %o0
+	or	%g3, %g2, %g3
+	st	%o0, [%sp + STACKFRAME_SZ + PT_I0]
+	mov	1, %l6
+	st	%g3, [%sp + STACKFRAME_SZ + PT_PSR]
+	bne	linux_syscall_trace2
+	 ld	[%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
+	add	%l1, 0x4, %l2			/* npc = npc+4 */
+	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
+	b	ret_trap_entry
+	 st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
+
+linux_syscall_trace2:
+	add	%sp, STACKFRAME_SZ, %o0
+	mov	1, %o1
+	call	syscall_trace
+	 add	%l1, 0x4, %l2			/* npc = npc+4 */
+	st	%l1, [%sp + STACKFRAME_SZ + PT_PC]
+	b	ret_trap_entry
+	 st	%l2, [%sp + STACKFRAME_SZ + PT_NPC]
+
+
+/* Saving and restoring the FPU state is best done from lowlevel code.
+ *
+ * void fpsave(unsigned long *fpregs, unsigned long *fsr,
+ *             void *fpqueue, unsigned long *fpqdepth)
+ */
+
+	.globl	fpsave
+fpsave:
+	st	%fsr, [%o1]	! this can trap on us if fpu is in bogon state
+	ld	[%o1], %g1
+	set	0x2000, %g4
+	andcc	%g1, %g4, %g0
+	be	2f
+	 mov	0, %g2
+
+	/* We have an fpqueue to save. */
+1:
+	std	%fq, [%o2]
+fpsave_magic:
+	st	%fsr, [%o1]
+	ld	[%o1], %g3
+	andcc	%g3, %g4, %g0
+	add	%g2, 1, %g2
+	bne	1b
+	 add	%o2, 8, %o2
+
+2:
+	st	%g2, [%o3]
+
+	std	%f0, [%o0 + 0x00]
+	std	%f2, [%o0 + 0x08]
+	std	%f4, [%o0 + 0x10]
+	std	%f6, [%o0 + 0x18]
+	std	%f8, [%o0 + 0x20]
+	std	%f10, [%o0 + 0x28]
+	std	%f12, [%o0 + 0x30]
+	std	%f14, [%o0 + 0x38]
+	std	%f16, [%o0 + 0x40]
+	std	%f18, [%o0 + 0x48]
+	std	%f20, [%o0 + 0x50]
+	std	%f22, [%o0 + 0x58]
+	std	%f24, [%o0 + 0x60]
+	std	%f26, [%o0 + 0x68]
+	std	%f28, [%o0 + 0x70]
+	retl
+	 std	%f30, [%o0 + 0x78]
+
+	/* Thanks for Theo Deraadt and the authors of the Sprite/netbsd/openbsd
+	 * code for pointing out this possible deadlock, while we save state
+	 * above we could trap on the fsr store so our low level fpu trap
+	 * code has to know how to deal with this.
+	 */
+fpsave_catch:
+	b	fpsave_magic + 4
+	 st	%fsr, [%o1]
+
+fpsave_catch2:
+	b	fpsave + 4
+	 st	%fsr, [%o1]
+
+	/* void fpload(unsigned long *fpregs, unsigned long *fsr); */
+
+	.globl	fpload
+fpload:
+	ldd	[%o0 + 0x00], %f0
+	ldd	[%o0 + 0x08], %f2
+	ldd	[%o0 + 0x10], %f4
+	ldd	[%o0 + 0x18], %f6
+	ldd	[%o0 + 0x20], %f8
+	ldd	[%o0 + 0x28], %f10
+	ldd	[%o0 + 0x30], %f12
+	ldd	[%o0 + 0x38], %f14
+	ldd	[%o0 + 0x40], %f16
+	ldd	[%o0 + 0x48], %f18
+	ldd	[%o0 + 0x50], %f20
+	ldd	[%o0 + 0x58], %f22
+	ldd	[%o0 + 0x60], %f24
+	ldd	[%o0 + 0x68], %f26
+	ldd	[%o0 + 0x70], %f28
+	ldd	[%o0 + 0x78], %f30
+	ld	[%o1], %fsr
+	retl
+	 nop
+
+	/* __ndelay and __udelay take two arguments:
+	 * 0 - nsecs or usecs to delay
+	 * 1 - per_cpu udelay_val (loops per jiffy)
+	 *
+	 * Note that ndelay gives HZ times higher resolution but has a 10ms
+	 * limit.  udelay can handle up to 1s.
+	 */
+	.globl	__ndelay
+__ndelay:
+	save	%sp, -STACKFRAME_SZ, %sp
+	mov	%i0, %o0		! round multiplier up so large ns ok
+	mov	0x1ae, %o1		! 2**32 / (1 000 000 000 / HZ)
+	umul	%o0, %o1, %o0
+	rd	%y, %o1
+	mov	%i1, %o1		! udelay_val
+	umul	%o0, %o1, %o0
+	rd	%y, %o1
+	ba	delay_continue
+	 mov	%o1, %o0		! >>32 later for better resolution
+
+	.globl	__udelay
+__udelay:
+	save	%sp, -STACKFRAME_SZ, %sp
+	mov	%i0, %o0
+	sethi	%hi(0x10c7), %o1	! round multiplier up so large us ok
+	or	%o1, %lo(0x10c7), %o1	! 2**32 / 1 000 000
+	umul	%o0, %o1, %o0
+	rd	%y, %o1
+	mov	%i1, %o1		! udelay_val
+	umul	%o0, %o1, %o0
+	rd	%y, %o1
+	sethi	%hi(0x028f4b62), %l0	! Add in rounding constant * 2**32,
+	or	%g0, %lo(0x028f4b62), %l0
+	addcc	%o0, %l0, %o0		! 2**32 * 0.009 999
+	bcs,a	3f
+	 add	%o1, 0x01, %o1
+3:
+	mov	HZ, %o0			! >>32 earlier for wider range
+	umul	%o0, %o1, %o0
+	rd	%y, %o1
+
+delay_continue:
+	cmp	%o0, 0x0
+1:
+	bne	1b
+	 subcc	%o0, 1, %o0
+	
+	ret
+	restore
+EXPORT_SYMBOL(__udelay)
+EXPORT_SYMBOL(__ndelay)
+
+	/* Handle a software breakpoint */
+	/* We have to inform parent that child has stopped */
+	.align 4
+	.globl breakpoint_trap
+breakpoint_trap:
+	rd	%wim,%l3
+	SAVE_ALL
+	wr 	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+
+	st	%i0, [%sp + STACKFRAME_SZ + PT_G0] ! for restarting syscalls
+	call	sparc_breakpoint
+	 add	%sp, STACKFRAME_SZ, %o0
+
+	RESTORE_ALL
+
+#ifdef CONFIG_KGDB
+	ENTRY(kgdb_trap_low)
+	rd	%wim,%l3
+	SAVE_ALL
+	wr 	%l0, PSR_ET, %psr
+	WRITE_PAUSE
+
+	mov	%l7, %o0		! trap_level
+	call	kgdb_trap
+	 add	%sp, STACKFRAME_SZ, %o1	! struct pt_regs *regs
+
+	RESTORE_ALL
+	ENDPROC(kgdb_trap_low)
+#endif
+
+	.align	4
+	.globl	flush_patch_exception
+flush_patch_exception:
+	FLUSH_ALL_KERNEL_WINDOWS;
+	ldd	[%o0], %o6
+	jmpl	%o7 + 0xc, %g0			! see asm-sparc/processor.h
+	 mov	1, %g1				! signal EFAULT condition
+
+	.align	4
+	.globl	kill_user_windows, kuw_patch1_7win
+	.globl	kuw_patch1
+kuw_patch1_7win:	sll	%o3, 6, %o3
+
+	/* No matter how much overhead this routine has in the worst
+	 * case scenario, it is several times better than taking the
+	 * traps with the old method of just doing flush_user_windows().
+	 */
+kill_user_windows:
+	ld	[%g6 + TI_UWINMASK], %o0	! get current umask
+	orcc	%g0, %o0, %g0			! if no bits set, we are done
+	be	3f				! nothing to do
+	 rd	%psr, %o5			! must clear interrupts
+	or	%o5, PSR_PIL, %o4		! or else that could change
+	wr	%o4, 0x0, %psr			! the uwinmask state
+	WRITE_PAUSE				! burn them cycles
+1:
+	ld	[%g6 + TI_UWINMASK], %o0	! get consistent state
+	orcc	%g0, %o0, %g0			! did an interrupt come in?
+	be	4f				! yep, we are done
+	 rd	%wim, %o3			! get current wim
+	srl	%o3, 1, %o4			! simulate a save
+kuw_patch1:
+	sll	%o3, 7, %o3			! compute next wim
+	or	%o4, %o3, %o3			! result
+	andncc	%o0, %o3, %o0			! clean this bit in umask
+	bne	kuw_patch1			! not done yet
+	 srl	%o3, 1, %o4			! begin another save simulation
+	wr	%o3, 0x0, %wim			! set the new wim
+	st	%g0, [%g6 + TI_UWINMASK]	! clear uwinmask
+4:
+	wr	%o5, 0x0, %psr			! re-enable interrupts
+	WRITE_PAUSE				! burn baby burn
+3:
+	retl					! return
+	 st	%g0, [%g6 + TI_W_SAVED]		! no windows saved
+
+	.align	4
+	.globl	restore_current
+restore_current:
+	LOAD_CURRENT(g6, o0)
+	retl
+	 nop
+
+#ifdef CONFIG_PCIC_PCI
+#include <asm/pcic.h>
+
+	.align	4
+	.globl	linux_trap_ipi15_pcic
+linux_trap_ipi15_pcic:
+	rd	%wim, %l3
+	SAVE_ALL
+
+	/*
+	 * First deactivate NMI
+	 * or we cannot drop ET, cannot get window spill traps.
+	 * The busy loop is necessary because the PIO error
+	 * sometimes does not go away quickly and we trap again.
+	 */
+	sethi	%hi(pcic_regs), %o1
+	ld	[%o1 + %lo(pcic_regs)], %o2
+
+	! Get pending status for printouts later.
+	ld	[%o2 + PCI_SYS_INT_PENDING], %o0
+
+	mov	PCI_SYS_INT_PENDING_CLEAR_ALL, %o1
+	stb	%o1, [%o2 + PCI_SYS_INT_PENDING_CLEAR]
+1:
+	ld	[%o2 + PCI_SYS_INT_PENDING], %o1
+	andcc	%o1, ((PCI_SYS_INT_PENDING_PIO|PCI_SYS_INT_PENDING_PCI)>>24), %g0
+	bne	1b
+	 nop
+
+	or	%l0, PSR_PIL, %l4
+	wr	%l4, 0x0, %psr
+	WRITE_PAUSE
+	wr	%l4, PSR_ET, %psr
+	WRITE_PAUSE
+
+	call	pcic_nmi
+	 add	%sp, STACKFRAME_SZ, %o1	! struct pt_regs *regs
+	RESTORE_ALL
+
+	.globl	pcic_nmi_trap_patch
+pcic_nmi_trap_patch:
+	sethi	%hi(linux_trap_ipi15_pcic), %l3
+	jmpl	%l3 + %lo(linux_trap_ipi15_pcic), %g0
+	 rd	%psr, %l0
+	.word	0
+
+#endif /* CONFIG_PCIC_PCI */
+
+	.globl	flushw_all
+flushw_all:
+	save	%sp, -0x40, %sp
+	save	%sp, -0x40, %sp
+	save	%sp, -0x40, %sp
+	save	%sp, -0x40, %sp
+	save	%sp, -0x40, %sp
+	save	%sp, -0x40, %sp
+	save	%sp, -0x40, %sp
+	restore
+	restore
+	restore
+	restore
+	restore
+	restore
+	ret
+	 restore
+
+#ifdef CONFIG_SMP
+ENTRY(hard_smp_processor_id)
+661:	rd		%tbr, %g1
+	srl		%g1, 12, %o0
+	and		%o0, 3, %o0
+	.section	.cpuid_patch, "ax"
+	/* Instruction location. */
+	.word		661b
+	/* SUN4D implementation. */
+	lda		[%g0] ASI_M_VIKING_TMP1, %o0
+	nop
+	nop
+	/* LEON implementation. */
+	rd		%asr17, %o0
+	srl		%o0, 0x1c, %o0
+	nop
+	.previous
+	retl
+	 nop
+ENDPROC(hard_smp_processor_id)
+#endif
+
+/* End of entry.S */
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
new file mode 100644
index 0000000..c746c0f
--- /dev/null
+++ b/arch/sparc/kernel/entry.h
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+
+/* irq */
+void handler_irq(int irq, struct pt_regs *regs);
+
+#ifdef CONFIG_SPARC32
+/* traps */
+void do_hw_interrupt(struct pt_regs *regs, unsigned long type);
+void do_illegal_instruction(struct pt_regs *regs, unsigned long pc,
+                            unsigned long npc, unsigned long psr);
+
+void do_priv_instruction(struct pt_regs *regs, unsigned long pc,
+                         unsigned long npc, unsigned long psr);
+void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc,
+                            unsigned long npc, unsigned long psr);
+void do_fpd_trap(struct pt_regs *regs, unsigned long pc,
+                 unsigned long npc, unsigned long psr);
+void do_fpe_trap(struct pt_regs *regs, unsigned long pc,
+                 unsigned long npc, unsigned long psr);
+void handle_tag_overflow(struct pt_regs *regs, unsigned long pc,
+                         unsigned long npc, unsigned long psr);
+void handle_watchpoint(struct pt_regs *regs, unsigned long pc,
+                       unsigned long npc, unsigned long psr);
+void handle_reg_access(struct pt_regs *regs, unsigned long pc,
+                       unsigned long npc, unsigned long psr);
+void handle_cp_disabled(struct pt_regs *regs, unsigned long pc,
+                        unsigned long npc, unsigned long psr);
+void handle_cp_exception(struct pt_regs *regs, unsigned long pc,
+                         unsigned long npc, unsigned long psr);
+
+
+
+/* entry.S */
+void fpsave(unsigned long *fpregs, unsigned long *fsr,
+            void *fpqueue, unsigned long *fpqdepth);
+void fpload(unsigned long *fpregs, unsigned long *fsr);
+
+#else /* CONFIG_SPARC32 */
+
+#include <asm/trap_block.h>
+
+struct popc_3insn_patch_entry {
+	unsigned int	addr;
+	unsigned int	insns[3];
+};
+extern struct popc_3insn_patch_entry __popc_3insn_patch,
+	__popc_3insn_patch_end;
+
+struct popc_6insn_patch_entry {
+	unsigned int	addr;
+	unsigned int	insns[6];
+};
+extern struct popc_6insn_patch_entry __popc_6insn_patch,
+	__popc_6insn_patch_end;
+
+struct pause_patch_entry {
+	unsigned int	addr;
+	unsigned int	insns[3];
+};
+extern struct pause_patch_entry __pause_3insn_patch,
+	__pause_3insn_patch_end;
+
+void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
+			     struct sun4v_1insn_patch_entry *);
+void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+			     struct sun4v_2insn_patch_entry *);
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *,
+			     struct sun4v_2insn_patch_entry *);
+extern unsigned int dcache_parity_tl1_occurred;
+extern unsigned int icache_parity_tl1_occurred;
+
+asmlinkage void sparc_breakpoint(struct pt_regs *regs);
+void timer_interrupt(int irq, struct pt_regs *regs);
+
+void do_notify_resume(struct pt_regs *regs,
+		      unsigned long orig_i0,
+		      unsigned long thread_info_flags);
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs);
+asmlinkage void syscall_trace_leave(struct pt_regs *regs);
+
+void bad_trap_tl1(struct pt_regs *regs, long lvl);
+
+void do_fpieee(struct pt_regs *regs);
+void do_fpother(struct pt_regs *regs);
+void do_tof(struct pt_regs *regs);
+void do_div0(struct pt_regs *regs);
+void do_illegal_instruction(struct pt_regs *regs);
+void mem_address_unaligned(struct pt_regs *regs,
+			   unsigned long sfar,
+			   unsigned long sfsr);
+void sun4v_do_mna(struct pt_regs *regs,
+		  unsigned long addr,
+		  unsigned long type_ctx);
+void do_privop(struct pt_regs *regs);
+void do_privact(struct pt_regs *regs);
+void do_cee(struct pt_regs *regs);
+void do_div0_tl1(struct pt_regs *regs);
+void do_fpieee_tl1(struct pt_regs *regs);
+void do_fpother_tl1(struct pt_regs *regs);
+void do_ill_tl1(struct pt_regs *regs);
+void do_irq_tl1(struct pt_regs *regs);
+void do_lddfmna_tl1(struct pt_regs *regs);
+void do_stdfmna_tl1(struct pt_regs *regs);
+void do_paw(struct pt_regs *regs);
+void do_paw_tl1(struct pt_regs *regs);
+void do_vaw(struct pt_regs *regs);
+void do_vaw_tl1(struct pt_regs *regs);
+void do_tof_tl1(struct pt_regs *regs);
+void do_getpsr(struct pt_regs *regs);
+
+void spitfire_insn_access_exception(struct pt_regs *regs,
+				    unsigned long sfsr,
+				    unsigned long sfar);
+void spitfire_insn_access_exception_tl1(struct pt_regs *regs,
+				        unsigned long sfsr,
+				        unsigned long sfar);
+void spitfire_data_access_exception(struct pt_regs *regs,
+				    unsigned long sfsr,
+				    unsigned long sfar);
+void spitfire_data_access_exception_tl1(struct pt_regs *regs,
+				        unsigned long sfsr,
+				        unsigned long sfar);
+void spitfire_access_error(struct pt_regs *regs,
+			   unsigned long status_encoded,
+			   unsigned long afar);
+
+void cheetah_fecc_handler(struct pt_regs *regs,
+			  unsigned long afsr,
+			  unsigned long afar);
+void cheetah_cee_handler(struct pt_regs *regs,
+			 unsigned long afsr,
+			 unsigned long afar);
+void cheetah_deferred_handler(struct pt_regs *regs,
+			      unsigned long afsr,
+			      unsigned long afar);
+void cheetah_plus_parity_error(int type, struct pt_regs *regs);
+
+void sun4v_insn_access_exception(struct pt_regs *regs,
+				 unsigned long addr,
+				 unsigned long type_ctx);
+void sun4v_insn_access_exception_tl1(struct pt_regs *regs,
+				     unsigned long addr,
+				     unsigned long type_ctx);
+void sun4v_data_access_exception(struct pt_regs *regs,
+				 unsigned long addr,
+				 unsigned long type_ctx);
+void sun4v_data_access_exception_tl1(struct pt_regs *regs,
+				     unsigned long addr,
+				     unsigned long type_ctx);
+void sun4v_resum_error(struct pt_regs *regs,
+		       unsigned long offset);
+void sun4v_resum_overflow(struct pt_regs *regs);
+void sun4v_nonresum_error(struct pt_regs *regs,
+			  unsigned long offset);
+void sun4v_nonresum_overflow(struct pt_regs *regs);
+void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs,
+				      unsigned long addr,
+				      unsigned long context);
+
+extern unsigned long sun4v_err_itlb_vaddr;
+extern unsigned long sun4v_err_itlb_ctx;
+extern unsigned long sun4v_err_itlb_pte;
+extern unsigned long sun4v_err_itlb_error;
+
+void sun4v_itlb_error_report(struct pt_regs *regs, int tl);
+
+extern unsigned long sun4v_err_dtlb_vaddr;
+extern unsigned long sun4v_err_dtlb_ctx;
+extern unsigned long sun4v_err_dtlb_pte;
+extern unsigned long sun4v_err_dtlb_error;
+
+void sun4v_dtlb_error_report(struct pt_regs *regs, int tl);
+void hypervisor_tlbop_error(unsigned long err,
+			    unsigned long op);
+void hypervisor_tlbop_error_xcall(unsigned long err,
+				  unsigned long op);
+
+/* WARNING: The error trap handlers in assembly know the precise
+ *	    layout of the following structure.
+ *
+ * C-level handlers in traps.c use this information to log the
+ * error and then determine how to recover (if possible).
+ */
+struct cheetah_err_info {
+/*0x00*/u64 afsr;
+/*0x08*/u64 afar;
+
+	/* D-cache state */
+/*0x10*/u64 dcache_data[4];	/* The actual data	*/
+/*0x30*/u64 dcache_index;	/* D-cache index	*/
+/*0x38*/u64 dcache_tag;		/* D-cache tag/valid	*/
+/*0x40*/u64 dcache_utag;	/* D-cache microtag	*/
+/*0x48*/u64 dcache_stag;	/* D-cache snooptag	*/
+
+	/* I-cache state */
+/*0x50*/u64 icache_data[8];	/* The actual insns + predecode	*/
+/*0x90*/u64 icache_index;	/* I-cache index	*/
+/*0x98*/u64 icache_tag;		/* I-cache phys tag	*/
+/*0xa0*/u64 icache_utag;	/* I-cache microtag	*/
+/*0xa8*/u64 icache_stag;	/* I-cache snooptag	*/
+/*0xb0*/u64 icache_upper;	/* I-cache upper-tag	*/
+/*0xb8*/u64 icache_lower;	/* I-cache lower-tag	*/
+
+	/* E-cache state */
+/*0xc0*/u64 ecache_data[4];	/* 32 bytes from staging registers */
+/*0xe0*/u64 ecache_index;	/* E-cache index	*/
+/*0xe8*/u64 ecache_tag;		/* E-cache tag/state	*/
+
+/*0xf0*/u64 __pad[32 - 30];
+};
+#define CHAFSR_INVALID		((u64)-1L)
+
+/* This is allocated at boot time based upon the largest hardware
+ * cpu ID in the system.  We allocate two entries per cpu, one for
+ * TL==0 logging and one for TL >= 1 logging.
+ */
+extern struct cheetah_err_info *cheetah_error_log;
+
+/* UPA nodes send interrupt packet to UltraSparc with first data reg
+ * value low 5 (7 on Starfire) bits holding the IRQ identifier being
+ * delivered.  We must translate this into a non-vector IRQ so we can
+ * set the softint on this cpu.
+ *
+ * To make processing these packets efficient and race free we use
+ * an array of irq buckets below.  The interrupt vector handler in
+ * entry.S feeds incoming packets into per-cpu pil-indexed lists.
+ *
+ * If you make changes to ino_bucket, please update hand coded assembler
+ * of the vectored interrupt trap handler(s) in entry.S and sun4v_ivec.S
+ */
+struct ino_bucket {
+/*0x00*/unsigned long __irq_chain_pa;
+
+	/* Interrupt number assigned to this INO.  */
+/*0x08*/unsigned int __irq;
+/*0x0c*/unsigned int __pad;
+};
+
+extern struct ino_bucket *ivector_table;
+extern unsigned long ivector_table_pa;
+
+void init_irqwork_curcpu(void);
+void sun4v_register_mondo_queues(int this_cpu);
+
+#endif /* CONFIG_SPARC32 */
+#endif /* _ENTRY_H */
diff --git a/arch/sparc/kernel/etrap_32.S b/arch/sparc/kernel/etrap_32.S
new file mode 100644
index 0000000..9f243f9
--- /dev/null
+++ b/arch/sparc/kernel/etrap_32.S
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * etrap.S: Sparc trap window preparation for entry into the
+ *          Linux kernel.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/contregs.h>
+#include <asm/page.h>
+#include <asm/psr.h>
+#include <asm/ptrace.h>
+#include <asm/winmacro.h>
+#include <asm/asmmacro.h>
+#include <asm/thread_info.h>
+
+/* Registers to not touch at all. */
+#define t_psr        l0 /* Set by caller */
+#define t_pc         l1 /* Set by caller */
+#define t_npc        l2 /* Set by caller */
+#define t_wim        l3 /* Set by caller */
+#define t_twinmask   l4 /* Set at beginning of this entry routine. */
+#define t_kstack     l5 /* Set right before pt_regs frame is built */
+#define t_retpc      l6 /* If you change this, change winmacro.h header file */
+#define t_systable   l7 /* Never touch this, could be the syscall table ptr. */
+#define curptr       g6 /* Set after pt_regs frame is built */
+
+	.text
+	.align 4
+
+	/* SEVEN WINDOW PATCH INSTRUCTIONS */
+	.globl	tsetup_7win_patch1, tsetup_7win_patch2
+	.globl	tsetup_7win_patch3, tsetup_7win_patch4
+	.globl	tsetup_7win_patch5, tsetup_7win_patch6
+tsetup_7win_patch1:	sll	%t_wim, 0x6, %t_wim
+tsetup_7win_patch2:	and	%g2, 0x7f, %g2
+tsetup_7win_patch3:	and	%g2, 0x7f, %g2
+tsetup_7win_patch4:	and	%g1, 0x7f, %g1
+tsetup_7win_patch5:	sll	%t_wim, 0x6, %t_wim
+tsetup_7win_patch6:	and	%g2, 0x7f, %g2
+	/* END OF PATCH INSTRUCTIONS */
+
+	/* At trap time, interrupts and all generic traps do the
+	 * following:
+	 *
+	 * rd	%psr, %l0
+	 * b	some_handler
+	 * rd	%wim, %l3
+	 * nop
+	 *
+	 * Then 'some_handler' if it needs a trap frame (ie. it has
+	 * to call c-code and the trap cannot be handled in-window)
+	 * then it does the SAVE_ALL macro in entry.S which does
+	 *
+	 * sethi	%hi(trap_setup), %l4
+	 * jmpl		%l4 + %lo(trap_setup), %l6
+	 * nop
+	 */
+
+	/* 2 3 4  window number
+	 * -----
+	 * O T S  mnemonic
+	 *
+	 * O == Current window before trap
+	 * T == Window entered when trap occurred
+	 * S == Window we will need to save if (1<<T) == %wim
+	 *
+	 * Before execution gets here, it must be guaranteed that
+	 * %l0 contains trap time %psr, %l1 and %l2 contain the
+	 * trap pc and npc, and %l3 contains the trap time %wim.
+	 */
+
+	.globl	trap_setup, tsetup_patch1, tsetup_patch2
+	.globl	tsetup_patch3, tsetup_patch4
+	.globl	tsetup_patch5, tsetup_patch6
+trap_setup:
+	/* Calculate mask of trap window.  See if from user
+	 * or kernel and branch conditionally.
+	 */
+	mov	1, %t_twinmask
+	andcc	%t_psr, PSR_PS, %g0		 ! fromsupv_p = (psr & PSR_PS)
+	be	trap_setup_from_user		 ! nope, from user mode
+	 sll	%t_twinmask, %t_psr, %t_twinmask ! t_twinmask = (1 << psr)
+
+	/* From kernel, allocate more kernel stack and
+	 * build a pt_regs trap frame.
+	 */
+	sub	%fp, (STACKFRAME_SZ + TRACEREG_SZ), %t_kstack
+	STORE_PT_ALL(t_kstack, t_psr, t_pc, t_npc, g2)
+
+	/* See if we are in the trap window. */
+	andcc	%t_twinmask, %t_wim, %g0
+	bne	trap_setup_kernel_spill		! in trap window, clean up
+	 nop
+
+	/* Trap from kernel with a window available.
+	 * Just do it...
+	 */
+	jmpl	%t_retpc + 0x8, %g0	! return to caller
+	 mov	%t_kstack, %sp		! jump onto new stack
+
+trap_setup_kernel_spill:
+	ld	[%curptr + TI_UWINMASK], %g1
+	orcc	%g0, %g1, %g0
+	bne	trap_setup_user_spill	! there are some user windows, yuck
+	/* Spill from kernel, but only kernel windows, adjust
+	 * %wim and go.
+	 */
+	 srl	%t_wim, 0x1, %g2	! begin computation of new %wim
+tsetup_patch1:
+	sll	%t_wim, 0x7, %t_wim	! patched on 7 window Sparcs
+	or	%t_wim, %g2, %g2
+tsetup_patch2:
+	and	%g2, 0xff, %g2		! patched on 7 window Sparcs
+
+	save	%g0, %g0, %g0
+
+	/* Set new %wim value */
+	wr	%g2, 0x0, %wim
+
+	/* Save the kernel window onto the corresponding stack. */
+	STORE_WINDOW(sp)
+
+	restore	%g0, %g0, %g0
+
+	jmpl	%t_retpc + 0x8, %g0	! return to caller
+	 mov	%t_kstack, %sp		! and onto new kernel stack
+
+#define STACK_OFFSET (THREAD_SIZE - TRACEREG_SZ - STACKFRAME_SZ)
+
+trap_setup_from_user:
+	/* We can't use %curptr yet. */
+	LOAD_CURRENT(t_kstack, t_twinmask)
+
+	sethi	%hi(STACK_OFFSET), %t_twinmask
+	or	%t_twinmask, %lo(STACK_OFFSET), %t_twinmask
+	add	%t_kstack, %t_twinmask, %t_kstack
+
+	mov	1, %t_twinmask
+	sll	%t_twinmask, %t_psr, %t_twinmask ! t_twinmask = (1 << psr)
+
+	/* Build pt_regs frame. */
+	STORE_PT_ALL(t_kstack, t_psr, t_pc, t_npc, g2)
+
+#if 0
+	/* If we're sure every task_struct is THREAD_SIZE aligned,
+	   we can speed this up. */
+	sethi	%hi(STACK_OFFSET), %curptr
+	or	%curptr, %lo(STACK_OFFSET), %curptr
+	sub	%t_kstack, %curptr, %curptr
+#else
+	sethi	%hi(~(THREAD_SIZE - 1)), %curptr
+	and	%t_kstack, %curptr, %curptr
+#endif
+
+	/* Clear current_thread_info->w_saved */
+	st	%g0, [%curptr + TI_W_SAVED]
+
+	/* See if we are in the trap window. */
+	andcc	%t_twinmask, %t_wim, %g0
+	bne	trap_setup_user_spill		! yep we are
+	 orn	%g0, %t_twinmask, %g1		! negate trap win mask into %g1
+
+	/* Trap from user, but not into the invalid window.
+	 * Calculate new umask.  The way this works is,
+	 * any window from the %wim at trap time until
+	 * the window right before the one we are in now,
+	 * is a user window.  A diagram:
+	 *
+	 *      7 6 5 4 3 2 1 0    window number
+	 *      ---------------
+	 *        I     L T        mnemonic
+	 *
+	 * Window 'I' is the invalid window in our example,
+	 * window 'L' is the window the user was in when
+	 * the trap occurred, window T is the trap window
+	 * we are in now.  So therefore, windows 5, 4 and
+	 * 3 are user windows.  The following sequence
+	 * computes the user winmask to represent this.
+	 */
+	subcc	%t_wim, %t_twinmask, %g2
+	bneg,a	1f
+	 sub	%g2, 0x1, %g2
+1:
+	andn	%g2, %t_twinmask, %g2
+tsetup_patch3:
+	and	%g2, 0xff, %g2			! patched on 7win Sparcs
+	st	%g2, [%curptr + TI_UWINMASK]	! store new umask
+
+	jmpl	%t_retpc + 0x8, %g0		! return to caller
+	 mov	%t_kstack, %sp			! and onto kernel stack
+
+trap_setup_user_spill:
+	/* A spill occurred from either kernel or user mode
+	 * and there exist some user windows to deal with.
+	 * A mask of the currently valid user windows
+	 * is in %g1 upon entry to here.
+	 */
+
+tsetup_patch4:
+	and	%g1, 0xff, %g1		! patched on 7win Sparcs, mask
+	srl	%t_wim, 0x1, %g2	! compute new %wim
+tsetup_patch5:
+	sll	%t_wim, 0x7, %t_wim	! patched on 7win Sparcs
+	or	%t_wim, %g2, %g2	! %g2 is new %wim
+tsetup_patch6:
+	and	%g2, 0xff, %g2		! patched on 7win Sparcs
+	andn	%g1, %g2, %g1		! clear this bit in %g1
+	st	%g1, [%curptr + TI_UWINMASK]
+
+	save	%g0, %g0, %g0
+
+	wr	%g2, 0x0, %wim
+
+	/* Call MMU-architecture dependent stack checking
+	 * routine.
+	 */
+	b	tsetup_srmmu_stackchk
+	 andcc	%sp, 0x7, %g0
+
+	/* Architecture specific stack checking routines.  When either
+	 * of these routines are called, the globals are free to use
+	 * as they have been safely stashed on the new kernel stack
+	 * pointer.  Thus the definition below for simplicity.
+	 */
+#define glob_tmp     g1
+
+	.globl	tsetup_srmmu_stackchk
+tsetup_srmmu_stackchk:
+	/* Check results of callers andcc %sp, 0x7, %g0 */
+	bne	trap_setup_user_stack_is_bolixed
+	 sethi   %hi(PAGE_OFFSET), %glob_tmp
+
+	cmp	%glob_tmp, %sp
+	bleu,a	1f
+LEON_PI( lda	[%g0] ASI_LEON_MMUREGS, %glob_tmp)	! read MMU control
+SUN_PI_( lda	[%g0] ASI_M_MMUREGS, %glob_tmp)		! read MMU control
+
+trap_setup_user_stack_is_bolixed:
+	/* From user/kernel into invalid window w/bad user
+	 * stack. Save bad user stack, and return to caller.
+	 */
+	SAVE_BOLIXED_USER_STACK(curptr, g3)
+	restore	%g0, %g0, %g0
+
+	jmpl	%t_retpc + 0x8, %g0
+	 mov	%t_kstack, %sp
+
+1:
+	/* Clear the fault status and turn on the no_fault bit. */
+	or	%glob_tmp, 0x2, %glob_tmp		! or in no_fault bit
+LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)		! set it
+SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)		! set it
+
+	/* Dump the registers and cross fingers. */
+	STORE_WINDOW(sp)
+
+	/* Clear the no_fault bit and check the status. */
+	andn	%glob_tmp, 0x2, %glob_tmp
+LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)
+SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)
+
+	mov	AC_M_SFAR, %glob_tmp
+LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %g0)
+SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %g0)
+
+	mov	AC_M_SFSR, %glob_tmp
+LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %glob_tmp)! save away status of winstore
+SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %glob_tmp)	! save away status of winstore
+
+	andcc	%glob_tmp, 0x2, %g0			! did we fault?
+	bne	trap_setup_user_stack_is_bolixed	! failure
+	 nop
+
+	restore %g0, %g0, %g0
+
+	jmpl	%t_retpc + 0x8, %g0
+	 mov	%t_kstack, %sp
+
diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S
new file mode 100644
index 0000000..08cc41f
--- /dev/null
+++ b/arch/sparc/kernel/etrap_64.S
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * etrap.S: Preparing for entry into the kernel on Sparc V9.
+ *
+ * Copyright (C) 1996, 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/page.h>
+#include <asm/spitfire.h>
+#include <asm/head.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+
+#define		TASK_REGOFF		(THREAD_SIZE-TRACEREG_SZ-STACKFRAME_SZ)
+#define		ETRAP_PSTATE1		(PSTATE_TSO | PSTATE_PRIV)
+#define		ETRAP_PSTATE2		\
+		(PSTATE_TSO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE)
+
+/*
+ * On entry, %g7 is return address - 0x4.
+ * %g4 and %g5 will be preserved %l4 and %l5 respectively.
+ */
+
+		.text		
+		.align	64
+		.globl	etrap_syscall, etrap, etrap_irq, etraptl1
+etrap:		rdpr	%pil, %g2
+etrap_irq:	clr	%g3
+etrap_syscall:	TRAP_LOAD_THREAD_REG(%g6, %g1)
+		rdpr	%tstate, %g1
+		or	%g1, %g3, %g1
+		sllx	%g2, 20, %g3
+		andcc	%g1, TSTATE_PRIV, %g0
+		or	%g1, %g3, %g1
+		bne,pn	%xcc, 1f
+		 sub	%sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
+661:		wrpr	%g0, 7, %cleanwin
+		.section .fast_win_ctrl_1insn_patch, "ax"
+		.word	661b
+		.word	0x85880000	! allclean
+		.previous
+
+		sethi	%hi(TASK_REGOFF), %g2
+		sethi	%hi(TSTATE_PEF), %g3
+		or	%g2, %lo(TASK_REGOFF), %g2
+		and	%g1, %g3, %g3
+		brnz,pn	%g3, 1f
+		 add	%g6, %g2, %g2
+		wr	%g0, 0, %fprs
+1:		rdpr	%tpc, %g3
+
+		stx	%g1, [%g2 + STACKFRAME_SZ + PT_V9_TSTATE]
+		rdpr	%tnpc, %g1
+		stx	%g3, [%g2 + STACKFRAME_SZ + PT_V9_TPC]
+		rd	%y, %g3
+		stx	%g1, [%g2 + STACKFRAME_SZ + PT_V9_TNPC]
+		rdpr	%tt, %g1
+		st	%g3, [%g2 + STACKFRAME_SZ + PT_V9_Y]
+		sethi	%hi(PT_REGS_MAGIC), %g3
+		or	%g3, %g1, %g1
+		st	%g1, [%g2 + STACKFRAME_SZ + PT_V9_MAGIC]
+
+		rdpr	%cansave, %g1
+		brnz,pt %g1, etrap_save
+		 nop
+
+		rdpr	%cwp, %g1
+		add	%g1, 2, %g1
+		wrpr	%g1, %cwp
+		be,pt	%xcc, etrap_user_spill
+		 mov	ASI_AIUP, %g3
+
+		rdpr	%otherwin, %g3
+		brz	%g3, etrap_kernel_spill
+		 mov	ASI_AIUS, %g3
+
+etrap_user_spill:
+
+		wr	%g3, 0x0, %asi
+		ldx	[%g6 + TI_FLAGS], %g3
+		and	%g3, _TIF_32BIT, %g3
+		brnz,pt	%g3, etrap_user_spill_32bit
+		 nop
+		ba,a,pt	%xcc, etrap_user_spill_64bit
+
+etrap_save:	save	%g2, -STACK_BIAS, %sp
+		mov	%g6, %l6
+
+		bne,pn	%xcc, 3f
+		 mov	PRIMARY_CONTEXT, %l4
+661:		rdpr	%canrestore, %g3
+		.section .fast_win_ctrl_1insn_patch, "ax"
+		.word	661b
+		nop
+		.previous
+
+		rdpr	%wstate, %g2
+661:		wrpr	%g0, 0, %canrestore
+		.section .fast_win_ctrl_1insn_patch, "ax"
+		.word	661b
+		nop
+		.previous
+		sll	%g2, 3, %g2
+
+		/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR.  */
+		mov	1, %l5
+		sth	%l5, [%l6 + TI_SYS_NOERROR]
+
+661:		wrpr	%g3, 0, %otherwin
+		.section .fast_win_ctrl_1insn_patch, "ax"
+		.word	661b
+		.word	0x87880000	! otherw
+		.previous
+
+		wrpr	%g2, 0, %wstate
+		sethi	%hi(sparc64_kern_pri_context), %g2
+		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g3
+
+661:		stxa	%g3, [%l4] ASI_DMMU
+		.section .sun4v_1insn_patch, "ax"
+		.word	661b
+		stxa	%g3, [%l4] ASI_MMU
+		.previous
+
+		sethi	%hi(KERNBASE), %l4
+		flush	%l4
+		mov	ASI_AIUS, %l7
+2:		mov	%g4, %l4
+		mov	%g5, %l5
+		add	%g7, 4, %l2
+
+		/* Go to trap time globals so we can save them.  */
+661:		wrpr	%g0, ETRAP_PSTATE1, %pstate
+		.section .sun4v_1insn_patch, "ax"
+		.word	661b
+		SET_GL(0)
+		.previous
+
+		stx	%g1, [%sp + PTREGS_OFF + PT_V9_G1]
+		stx	%g2, [%sp + PTREGS_OFF + PT_V9_G2]
+		sllx	%l7, 24, %l7
+		stx	%g3, [%sp + PTREGS_OFF + PT_V9_G3]
+		rdpr	%cwp, %l0
+		stx	%g4, [%sp + PTREGS_OFF + PT_V9_G4]
+		stx	%g5, [%sp + PTREGS_OFF + PT_V9_G5]
+		stx	%g6, [%sp + PTREGS_OFF + PT_V9_G6]
+		stx	%g7, [%sp + PTREGS_OFF + PT_V9_G7]
+		or	%l7, %l0, %l7
+661:		sethi	%hi(TSTATE_TSO | TSTATE_PEF), %l0
+		/* If userspace is using ADI, it could potentially pass
+		 * a pointer with version tag embedded in it. To maintain
+		 * the ADI security, we must enable PSTATE.mcde. Userspace
+		 * would have already set TTE.mcd in an earlier call to
+		 * kernel and set the version tag for the address being
+		 * dereferenced. Setting PSTATE.mcde would ensure any
+		 * access to userspace data through a system call honors
+		 * ADI and does not allow a rogue app to bypass ADI by
+		 * using system calls. Setting PSTATE.mcde only affects
+		 * accesses to virtual addresses that have TTE.mcd set.
+		 * Set PMCDPER to ensure any exceptions caused by ADI
+		 * version tag mismatch are exposed before system call
+		 * returns to userspace. Setting PMCDPER affects only
+		 * writes to virtual addresses that have TTE.mcd set and
+		 * have a version tag set as well.
+		 */
+		.section .sun_m7_1insn_patch, "ax"
+		.word	661b
+		sethi	%hi(TSTATE_TSO | TSTATE_PEF | TSTATE_MCDE), %l0
+		.previous
+661:		nop
+		.section .sun_m7_1insn_patch, "ax"
+		.word	661b
+		.word 0xaf902001	/* wrpr %g0, 1, %pmcdper */
+		.previous
+		or	%l7, %l0, %l7
+		wrpr	%l2, %tnpc
+		wrpr	%l7, (TSTATE_PRIV | TSTATE_IE), %tstate
+		stx	%i0, [%sp + PTREGS_OFF + PT_V9_I0]
+		stx	%i1, [%sp + PTREGS_OFF + PT_V9_I1]
+		stx	%i2, [%sp + PTREGS_OFF + PT_V9_I2]
+		stx	%i3, [%sp + PTREGS_OFF + PT_V9_I3]
+		stx	%i4, [%sp + PTREGS_OFF + PT_V9_I4]
+		stx	%i5, [%sp + PTREGS_OFF + PT_V9_I5]
+		stx	%i6, [%sp + PTREGS_OFF + PT_V9_I6]
+		mov	%l6, %g6
+		stx	%i7, [%sp + PTREGS_OFF + PT_V9_I7]
+		LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %l1)
+		ldx	[%g6 + TI_TASK], %g4
+		done
+
+3:		mov	ASI_P, %l7
+		ldub	[%l6 + TI_FPDEPTH], %l5
+		add	%l6, TI_FPSAVED + 1, %l4
+		srl	%l5, 1, %l3
+		add	%l5, 2, %l5
+
+		/* Set TI_SYS_FPDEPTH to %l5 and clear TI_SYS_NOERROR.  */
+		sth	%l5, [%l6 + TI_SYS_NOERROR]
+		ba,pt	%xcc, 2b
+		 stb	%g0, [%l4 + %l3]
+		nop
+
+etraptl1:	/* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
+		 * We place this right after pt_regs on the trap stack.
+		 * The layout is:
+		 *	0x00	TL1's TSTATE
+		 *	0x08	TL1's TPC
+		 *	0x10	TL1's TNPC
+		 *	0x18	TL1's TT
+		 *	 ...
+		 *	0x58	TL4's TT
+		 *	0x60	TL
+		 */
+		TRAP_LOAD_THREAD_REG(%g6, %g1)
+		sub	%sp, ((4 * 8) * 4) + 8, %g2
+		rdpr	%tl, %g1
+
+		wrpr	%g0, 1, %tl
+		rdpr	%tstate, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x00]
+		rdpr	%tpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x08]
+		rdpr	%tnpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x10]
+		rdpr	%tt, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x18]
+
+		wrpr	%g0, 2, %tl
+		rdpr	%tstate, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x20]
+		rdpr	%tpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x28]
+		rdpr	%tnpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x30]
+		rdpr	%tt, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x38]
+
+		sethi	%hi(is_sun4v), %g3
+		lduw	[%g3 + %lo(is_sun4v)], %g3
+		brnz,pn	%g3, finish_tl1_capture
+		 nop
+
+		wrpr	%g0, 3, %tl
+		rdpr	%tstate, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x40]
+		rdpr	%tpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x48]
+		rdpr	%tnpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x50]
+		rdpr	%tt, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x58]
+
+		wrpr	%g0, 4, %tl
+		rdpr	%tstate, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x60]
+		rdpr	%tpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x68]
+		rdpr	%tnpc, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x70]
+		rdpr	%tt, %g3
+		stx	%g3, [%g2 + STACK_BIAS + 0x78]
+
+		stx	%g1, [%g2 + STACK_BIAS + 0x80]
+
+finish_tl1_capture:
+		wrpr	%g0, 1, %tl
+661:		nop
+		.section .sun4v_1insn_patch, "ax"
+		.word	661b
+		SET_GL(1)
+		.previous
+
+		rdpr	%tstate, %g1
+		sub	%g2, STACKFRAME_SZ + TRACEREG_SZ - STACK_BIAS, %g2
+		ba,pt	%xcc, 1b
+		 andcc	%g1, TSTATE_PRIV, %g0
+
+#undef TASK_REGOFF
+#undef ETRAP_PSTATE1
diff --git a/arch/sparc/kernel/fpu_traps.S b/arch/sparc/kernel/fpu_traps.S
new file mode 100644
index 0000000..051659e
--- /dev/null
+++ b/arch/sparc/kernel/fpu_traps.S
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	/* This is trivial with the new code... */
+	.globl		do_fpdis
+	.type		do_fpdis,#function
+do_fpdis:
+	sethi		%hi(TSTATE_PEF), %g4
+	rdpr		%tstate, %g5
+	andcc		%g5, %g4, %g0
+	be,pt		%xcc, 1f
+	 nop
+	rd		%fprs, %g5
+	andcc		%g5, FPRS_FEF, %g0
+	be,pt		%xcc, 1f
+	 nop
+
+	/* Legal state when DCR_IFPOE is set in Cheetah %dcr. */
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	add		%g0, %g0, %g0
+	ba,a,pt		%xcc, rtrap
+
+1:	TRAP_LOAD_THREAD_REG(%g6, %g1)
+	ldub		[%g6 + TI_FPSAVED], %g5
+	wr		%g0, FPRS_FEF, %fprs
+	andcc		%g5, FPRS_FEF, %g0
+	be,a,pt		%icc, 1f
+	 clr		%g7
+	ldx		[%g6 + TI_GSR], %g7
+1:	andcc		%g5, FPRS_DL, %g0
+	bne,pn		%icc, 2f
+	 fzero		%f0
+	andcc		%g5, FPRS_DU, %g0
+	bne,pn		%icc, 1f
+	 fzero		%f2
+	faddd		%f0, %f2, %f4
+	fmuld		%f0, %f2, %f6
+	faddd		%f0, %f2, %f8
+	fmuld		%f0, %f2, %f10
+	faddd		%f0, %f2, %f12
+	fmuld		%f0, %f2, %f14
+	faddd		%f0, %f2, %f16
+	fmuld		%f0, %f2, %f18
+	faddd		%f0, %f2, %f20
+	fmuld		%f0, %f2, %f22
+	faddd		%f0, %f2, %f24
+	fmuld		%f0, %f2, %f26
+	faddd		%f0, %f2, %f28
+	fmuld		%f0, %f2, %f30
+	faddd		%f0, %f2, %f32
+	fmuld		%f0, %f2, %f34
+	faddd		%f0, %f2, %f36
+	fmuld		%f0, %f2, %f38
+	faddd		%f0, %f2, %f40
+	fmuld		%f0, %f2, %f42
+	faddd		%f0, %f2, %f44
+	fmuld		%f0, %f2, %f46
+	faddd		%f0, %f2, %f48
+	fmuld		%f0, %f2, %f50
+	faddd		%f0, %f2, %f52
+	fmuld		%f0, %f2, %f54
+	faddd		%f0, %f2, %f56
+	fmuld		%f0, %f2, %f58
+	b,pt		%xcc, fpdis_exit2
+	 faddd		%f0, %f2, %f60
+1:	mov		SECONDARY_CONTEXT, %g3
+	add		%g6, TI_FPREGS + 0x80, %g1
+	faddd		%f0, %f2, %f4
+	fmuld		%f0, %f2, %f6
+
+661:	ldxa		[%g3] ASI_DMMU, %g5
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	ldxa		[%g3] ASI_MMU, %g5
+	.previous
+
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661:	stxa		%g2, [%g3] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g2, [%g3] ASI_MMU
+	.previous
+
+	membar		#Sync
+	add		%g6, TI_FPREGS + 0xc0, %g2
+	faddd		%f0, %f2, %f8
+	fmuld		%f0, %f2, %f10
+	membar		#Sync
+	ldda		[%g1] ASI_BLK_S, %f32
+	ldda		[%g2] ASI_BLK_S, %f48
+	membar		#Sync
+	faddd		%f0, %f2, %f12
+	fmuld		%f0, %f2, %f14
+	faddd		%f0, %f2, %f16
+	fmuld		%f0, %f2, %f18
+	faddd		%f0, %f2, %f20
+	fmuld		%f0, %f2, %f22
+	faddd		%f0, %f2, %f24
+	fmuld		%f0, %f2, %f26
+	faddd		%f0, %f2, %f28
+	fmuld		%f0, %f2, %f30
+	ba,a,pt		%xcc, fpdis_exit
+
+2:	andcc		%g5, FPRS_DU, %g0
+	bne,pt		%icc, 3f
+	 fzero		%f32
+	mov		SECONDARY_CONTEXT, %g3
+	fzero		%f34
+
+661:	ldxa		[%g3] ASI_DMMU, %g5
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	ldxa		[%g3] ASI_MMU, %g5
+	.previous
+
+	add		%g6, TI_FPREGS, %g1
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661:	stxa		%g2, [%g3] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g2, [%g3] ASI_MMU
+	.previous
+
+	membar		#Sync
+	add		%g6, TI_FPREGS + 0x40, %g2
+	faddd		%f32, %f34, %f36
+	fmuld		%f32, %f34, %f38
+	membar		#Sync
+	ldda		[%g1] ASI_BLK_S, %f0
+	ldda		[%g2] ASI_BLK_S, %f16
+	membar		#Sync
+	faddd		%f32, %f34, %f40
+	fmuld		%f32, %f34, %f42
+	faddd		%f32, %f34, %f44
+	fmuld		%f32, %f34, %f46
+	faddd		%f32, %f34, %f48
+	fmuld		%f32, %f34, %f50
+	faddd		%f32, %f34, %f52
+	fmuld		%f32, %f34, %f54
+	faddd		%f32, %f34, %f56
+	fmuld		%f32, %f34, %f58
+	faddd		%f32, %f34, %f60
+	fmuld		%f32, %f34, %f62
+	ba,a,pt		%xcc, fpdis_exit
+
+3:	mov		SECONDARY_CONTEXT, %g3
+	add		%g6, TI_FPREGS, %g1
+
+661:	ldxa		[%g3] ASI_DMMU, %g5
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	ldxa		[%g3] ASI_MMU, %g5
+	.previous
+
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661:	stxa		%g2, [%g3] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g2, [%g3] ASI_MMU
+	.previous
+
+	membar		#Sync
+	mov		0x40, %g2
+	membar		#Sync
+	ldda		[%g1] ASI_BLK_S, %f0
+	ldda		[%g1 + %g2] ASI_BLK_S, %f16
+	add		%g1, 0x80, %g1
+	ldda		[%g1] ASI_BLK_S, %f32
+	ldda		[%g1 + %g2] ASI_BLK_S, %f48
+	membar		#Sync
+fpdis_exit:
+
+661:	stxa		%g5, [%g3] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g5, [%g3] ASI_MMU
+	.previous
+
+	membar		#Sync
+fpdis_exit2:
+	wr		%g7, 0, %gsr
+	ldx		[%g6 + TI_XFSR], %fsr
+	rdpr		%tstate, %g3
+	or		%g3, %g4, %g3		! anal...
+	wrpr		%g3, %tstate
+	wr		%g0, FPRS_FEF, %fprs	! clean DU/DL bits
+	retry
+	.size		do_fpdis,.-do_fpdis
+
+	.align		32
+	.type		fp_other_bounce,#function
+fp_other_bounce:
+	call		do_fpother
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		fp_other_bounce,.-fp_other_bounce
+
+	.align		32
+	.globl		do_fpother_check_fitos
+	.type		do_fpother_check_fitos,#function
+do_fpother_check_fitos:
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
+	sethi		%hi(fp_other_bounce - 4), %g7
+	or		%g7, %lo(fp_other_bounce - 4), %g7
+
+	/* NOTE: Need to preserve %g7 until we fully commit
+	 *       to the fitos fixup.
+	 */
+	stx		%fsr, [%g6 + TI_XFSR]
+	rdpr		%tstate, %g3
+	andcc		%g3, TSTATE_PRIV, %g0
+	bne,pn		%xcc, do_fptrap_after_fsr
+	 nop
+	ldx		[%g6 + TI_XFSR], %g3
+	srlx		%g3, 14, %g1
+	and		%g1, 7, %g1
+	cmp		%g1, 2			! Unfinished FP-OP
+	bne,pn		%xcc, do_fptrap_after_fsr
+	 sethi		%hi(1 << 23), %g1	! Inexact
+	andcc		%g3, %g1, %g0
+	bne,pn		%xcc, do_fptrap_after_fsr
+	 rdpr		%tpc, %g1
+	lduwa		[%g1] ASI_AIUP, %g3	! This cannot ever fail
+#define FITOS_MASK	0xc1f83fe0
+#define FITOS_COMPARE	0x81a01880
+	sethi		%hi(FITOS_MASK), %g1
+	or		%g1, %lo(FITOS_MASK), %g1
+	and		%g3, %g1, %g1
+	sethi		%hi(FITOS_COMPARE), %g2
+	or		%g2, %lo(FITOS_COMPARE), %g2
+	cmp		%g1, %g2
+	bne,pn		%xcc, do_fptrap_after_fsr
+	 nop
+	std		%f62, [%g6 + TI_FPREGS + (62 * 4)]
+	sethi		%hi(fitos_table_1), %g1
+	and		%g3, 0x1f, %g2
+	or		%g1, %lo(fitos_table_1),  %g1
+	sllx		%g2, 2, %g2
+	jmpl		%g1 + %g2, %g0
+	 ba,pt		%xcc, fitos_emul_continue
+
+fitos_table_1:
+	fitod		%f0, %f62
+	fitod		%f1, %f62
+	fitod		%f2, %f62
+	fitod		%f3, %f62
+	fitod		%f4, %f62
+	fitod		%f5, %f62
+	fitod		%f6, %f62
+	fitod		%f7, %f62
+	fitod		%f8, %f62
+	fitod		%f9, %f62
+	fitod		%f10, %f62
+	fitod		%f11, %f62
+	fitod		%f12, %f62
+	fitod		%f13, %f62
+	fitod		%f14, %f62
+	fitod		%f15, %f62
+	fitod		%f16, %f62
+	fitod		%f17, %f62
+	fitod		%f18, %f62
+	fitod		%f19, %f62
+	fitod		%f20, %f62
+	fitod		%f21, %f62
+	fitod		%f22, %f62
+	fitod		%f23, %f62
+	fitod		%f24, %f62
+	fitod		%f25, %f62
+	fitod		%f26, %f62
+	fitod		%f27, %f62
+	fitod		%f28, %f62
+	fitod		%f29, %f62
+	fitod		%f30, %f62
+	fitod		%f31, %f62
+
+fitos_emul_continue:
+	sethi		%hi(fitos_table_2), %g1
+	srl		%g3, 25, %g2
+	or		%g1, %lo(fitos_table_2), %g1
+	and		%g2, 0x1f, %g2
+	sllx		%g2, 2, %g2
+	jmpl		%g1 + %g2, %g0
+	 ba,pt		%xcc, fitos_emul_fini
+
+fitos_table_2:
+	fdtos		%f62, %f0
+	fdtos		%f62, %f1
+	fdtos		%f62, %f2
+	fdtos		%f62, %f3
+	fdtos		%f62, %f4
+	fdtos		%f62, %f5
+	fdtos		%f62, %f6
+	fdtos		%f62, %f7
+	fdtos		%f62, %f8
+	fdtos		%f62, %f9
+	fdtos		%f62, %f10
+	fdtos		%f62, %f11
+	fdtos		%f62, %f12
+	fdtos		%f62, %f13
+	fdtos		%f62, %f14
+	fdtos		%f62, %f15
+	fdtos		%f62, %f16
+	fdtos		%f62, %f17
+	fdtos		%f62, %f18
+	fdtos		%f62, %f19
+	fdtos		%f62, %f20
+	fdtos		%f62, %f21
+	fdtos		%f62, %f22
+	fdtos		%f62, %f23
+	fdtos		%f62, %f24
+	fdtos		%f62, %f25
+	fdtos		%f62, %f26
+	fdtos		%f62, %f27
+	fdtos		%f62, %f28
+	fdtos		%f62, %f29
+	fdtos		%f62, %f30
+	fdtos		%f62, %f31
+
+fitos_emul_fini:
+	ldd		[%g6 + TI_FPREGS + (62 * 4)], %f62
+	done
+	.size		do_fpother_check_fitos,.-do_fpother_check_fitos
+
+	.align		32
+	.globl		do_fptrap
+	.type		do_fptrap,#function
+do_fptrap:
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
+	stx		%fsr, [%g6 + TI_XFSR]
+do_fptrap_after_fsr:
+	ldub		[%g6 + TI_FPSAVED], %g3
+	rd		%fprs, %g1
+	or		%g3, %g1, %g3
+	stb		%g3, [%g6 + TI_FPSAVED]
+	rd		%gsr, %g3
+	stx		%g3, [%g6 + TI_GSR]
+	mov		SECONDARY_CONTEXT, %g3
+
+661:	ldxa		[%g3] ASI_DMMU, %g5
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	ldxa		[%g3] ASI_MMU, %g5
+	.previous
+
+	sethi		%hi(sparc64_kern_sec_context), %g2
+	ldx		[%g2 + %lo(sparc64_kern_sec_context)], %g2
+
+661:	stxa		%g2, [%g3] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g2, [%g3] ASI_MMU
+	.previous
+
+	membar		#Sync
+	add		%g6, TI_FPREGS, %g2
+	andcc		%g1, FPRS_DL, %g0
+	be,pn		%icc, 4f
+	 mov		0x40, %g3
+	stda		%f0, [%g2] ASI_BLK_S
+	stda		%f16, [%g2 + %g3] ASI_BLK_S
+	andcc		%g1, FPRS_DU, %g0
+	be,pn		%icc, 5f
+4:       add		%g2, 128, %g2
+	stda		%f32, [%g2] ASI_BLK_S
+	stda		%f48, [%g2 + %g3] ASI_BLK_S
+5:	mov		SECONDARY_CONTEXT, %g1
+	membar		#Sync
+
+661:	stxa		%g5, [%g1] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g5, [%g1] ASI_MMU
+	.previous
+
+	membar		#Sync
+	ba,pt		%xcc, etrap
+	 wr		%g0, 0, %fprs
+	.size		do_fptrap,.-do_fptrap
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
new file mode 100644
index 0000000..684b84c
--- /dev/null
+++ b/arch/sparc/kernel/ftrace.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <trace/syscall.h>
+
+#include <asm/ftrace.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static const u32 ftrace_nop = 0x01000000;
+
+static u32 ftrace_call_replace(unsigned long ip, unsigned long addr)
+{
+	u32 call;
+	s32 off;
+
+	off = ((s32)addr - (s32)ip);
+	call = 0x40000000 | ((u32)off >> 2);
+
+	return call;
+}
+
+static int ftrace_modify_code(unsigned long ip, u32 old, u32 new)
+{
+	u32 replaced;
+	int faulted;
+
+	__asm__ __volatile__(
+	"1:	cas	[%[ip]], %[old], %[new]\n"
+	"	flush	%[ip]\n"
+	"	mov	0, %[faulted]\n"
+	"2:\n"
+	"	.section .fixup,#alloc,#execinstr\n"
+	"	.align	4\n"
+	"3:	sethi	%%hi(2b), %[faulted]\n"
+	"	jmpl	%[faulted] + %%lo(2b), %%g0\n"
+	"	 mov	1, %[faulted]\n"
+	"	.previous\n"
+	"	.section __ex_table,\"a\"\n"
+	"	.align	4\n"
+	"	.word	1b, 3b\n"
+	"	.previous\n"
+	: "=r" (replaced), [faulted] "=r" (faulted)
+	: [new] "0" (new), [old] "r" (old), [ip] "r" (ip)
+	: "memory");
+
+	if (replaced != old && replaced != new)
+		faulted = 2;
+
+	return faulted;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	u32 old, new;
+
+	old = ftrace_call_replace(ip, addr);
+	new = ftrace_nop;
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long ip = rec->ip;
+	u32 old, new;
+
+	old = ftrace_nop;
+	new = ftrace_call_replace(ip, addr);
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long ip = (unsigned long)(&ftrace_call);
+	u32 old, new;
+
+	old = *(u32 *) &ftrace_call;
+	new = ftrace_call_replace(ip, (unsigned long)func);
+	return ftrace_modify_code(ip, old, new);
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	unsigned long ip = (unsigned long)(&ftrace_graph_call);
+	u32 old, new;
+
+	old = *(u32 *) &ftrace_graph_call;
+	new = ftrace_call_replace(ip, (unsigned long) &ftrace_graph_caller);
+	return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	unsigned long ip = (unsigned long)(&ftrace_graph_call);
+	u32 old, new;
+
+	old = *(u32 *) &ftrace_graph_call;
+	new = ftrace_call_replace(ip, (unsigned long) &ftrace_stub);
+
+	return ftrace_modify_code(ip, old, new);
+}
+
+#endif /* !CONFIG_DYNAMIC_FTRACE */
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+unsigned long prepare_ftrace_return(unsigned long parent,
+				    unsigned long self_addr,
+				    unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long) &return_to_handler;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return parent + 8UL;
+
+	if (function_graph_enter(parent, self_addr, frame_pointer, NULL))
+		return parent + 8UL;
+
+	return return_hooker;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/sparc/kernel/getsetcc.S b/arch/sparc/kernel/getsetcc.S
new file mode 100644
index 0000000..181e09f
--- /dev/null
+++ b/arch/sparc/kernel/getsetcc.S
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	.globl		getcc
+	.type		getcc,#function
+getcc:
+	ldx		[%o0 + PT_V9_TSTATE], %o1
+	srlx		%o1, 32, %o1
+	and		%o1, 0xf, %o1
+	retl
+	 stx		%o1, [%o0 + PT_V9_G1]
+	.size		getcc,.-getcc
+
+	.globl		setcc
+	.type		setcc,#function
+setcc:
+	ldx		[%o0 + PT_V9_TSTATE], %o1
+	ldx		[%o0 + PT_V9_G1], %o2
+	or		%g0, %ulo(TSTATE_ICC), %o3
+	sllx		%o3, 32, %o3
+	andn		%o1, %o3, %o1
+	sllx		%o2, 32, %o2
+	and		%o2, %o3, %o2
+	or		%o1, %o2, %o1
+	retl
+	 stx		%o1, [%o0 + PT_V9_TSTATE]
+	.size		setcc,.-setcc
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
new file mode 100644
index 0000000..e55f2c0
--- /dev/null
+++ b/arch/sparc/kernel/head_32.S
@@ -0,0 +1,812 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * head.S: The initial boot code for the Sparc port of Linux.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1995,1999 Pete Zaitcev   (zaitcev@yahoo.com)
+ * Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ * Copyright (C) 1997 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 Michael A. Griffith (grif@acm.org)
+ *
+ * CompactPCI platform by Eric Brower, 1999.
+ */
+
+#include <linux/version.h>
+#include <linux/init.h>
+
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/contregs.h>
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/page.h>
+#include <asm/kdebug.h>
+#include <asm/winmacro.h>
+#include <asm/thread_info.h>	/* TI_UWINMASK */
+#include <asm/errno.h>
+#include <asm/pgtsrmmu.h>	/* SRMMU_PGDIR_SHIFT */
+#include <asm/export.h>
+
+	.data
+/* The following are used with the prom_vector node-ops to figure out
+ * the cpu-type
+ */
+	.align 4
+	.globl cputypval
+cputypval:
+	.asciz "sun4m"
+	.ascii "     "
+
+/* Tested on SS-5, SS-10 */
+	.align 4
+cputypvar:
+	.asciz "compatible"
+
+	.align 4
+
+notsup:
+	.asciz	"Sparc-Linux sun4/sun4c or MMU-less not supported\n\n"
+	.align 4
+
+sun4e_notsup:
+        .asciz  "Sparc-Linux sun4e support does not exist\n\n"
+	.align 4
+
+/* The trap-table - located in the __HEAD section */
+#include "ttable_32.S"
+
+	.align PAGE_SIZE
+
+/* This was the only reasonable way I could think of to properly align
+ * these page-table data structures.
+ */
+	.globl empty_zero_page
+empty_zero_page:	.skip PAGE_SIZE
+EXPORT_SYMBOL(empty_zero_page)
+
+	.global root_flags
+	.global ram_flags
+	.global root_dev
+	.global sparc_ramdisk_image
+	.global sparc_ramdisk_size
+
+/* This stuff has to be in sync with SILO and other potential boot loaders
+ * Fields should be kept upward compatible and whenever any change is made,
+ * HdrS version should be incremented.
+ */
+	.ascii	"HdrS"
+	.word	LINUX_VERSION_CODE
+	.half	0x0203		/* HdrS version */
+root_flags:
+	.half	1
+root_dev:
+	.half	0
+ram_flags:
+	.half	0
+sparc_ramdisk_image:
+	.word	0
+sparc_ramdisk_size:
+	.word	0
+	.word	reboot_command
+	.word	0, 0, 0
+	.word	_end
+
+/* Cool, here we go. Pick up the romvec pointer in %o0 and stash it in
+ * %g7 and at prom_vector_p. And also quickly check whether we are on
+ * a v0, v2, or v3 prom.
+ */
+gokernel:
+		/* Ok, it's nice to know, as early as possible, if we
+		 * are already mapped where we expect to be in virtual
+		 * memory.  The Solaris /boot elf format bootloader
+		 * will peek into our elf header and load us where
+		 * we want to be, otherwise we have to re-map.
+		 *
+		 * Some boot loaders don't place the jmp'rs address
+		 * in %o7, so we do a pc-relative call to a local
+		 * label, then see what %o7 has.
+		 */
+
+		mov	%o7, %g4		! Save %o7
+
+		/* Jump to it, and pray... */
+current_pc:
+		call	1f
+		 nop
+
+1:
+		mov	%o7, %g3
+
+		tst	%o0
+		be	no_sun4u_here
+		 mov	%g4, %o7		/* Previous %o7. */
+
+		mov	%o0, %l0		! stash away romvec
+		mov	%o0, %g7		! put it here too
+		mov	%o1, %l1		! stash away debug_vec too
+
+		/* Ok, let's check out our run time program counter. */
+		set	current_pc, %g5
+		cmp	%g3, %g5
+		be	already_mapped
+		 nop
+
+		/* %l6 will hold the offset we have to subtract
+		 * from absolute symbols in order to access areas
+		 * in our own image.  If already mapped this is
+		 * just plain zero, else it is KERNBASE.
+		 */
+		set	KERNBASE, %l6
+		b	copy_prom_lvl14
+		 nop
+
+already_mapped:
+		mov	0, %l6
+
+		/* Copy over the Prom's level 14 clock handler. */
+copy_prom_lvl14:
+#if 1
+		/* DJHR
+		 * preserve our linked/calculated instructions
+		 */
+		set	lvl14_save, %g1
+		set	t_irq14, %g3
+		sub	%g1, %l6, %g1		! translate to physical
+		sub	%g3, %l6, %g3		! translate to physical
+		ldd	[%g3], %g4
+		std	%g4, [%g1]
+		ldd	[%g3+8], %g4
+		std	%g4, [%g1+8]
+#endif
+		rd	%tbr, %g1
+		andn	%g1, 0xfff, %g1		! proms trap table base
+		or	%g0, (0x1e<<4), %g2	! offset to lvl14 intr
+		or	%g1, %g2, %g2
+		set	t_irq14, %g3
+		sub	%g3, %l6, %g3
+		ldd	[%g2], %g4
+		std	%g4, [%g3]
+		ldd	[%g2 + 0x8], %g4
+		std	%g4, [%g3 + 0x8]	! Copy proms handler
+
+/* DON'T TOUCH %l0 thru %l5 in these remapping routines,
+ * we need their values afterwards!
+ */
+
+		/* Now check whether we are already mapped, if we
+		 * are we can skip all this garbage coming up.
+		 */
+copy_prom_done:
+		cmp	%l6, 0
+		be	go_to_highmem		! this will be a nop then
+		 nop
+
+		/* Validate that we are in fact running on an
+		 * SRMMU based cpu.
+		 */
+		set	0x4000, %g6
+		cmp	%g7, %g6
+		bne	not_a_sun4
+		 nop
+
+halt_notsup:
+		ld	[%g7 + 0x68], %o1
+		set	notsup, %o0
+		sub	%o0, %l6, %o0
+		call	%o1
+		 nop
+		ba	halt_me
+		 nop
+
+not_a_sun4:
+		/* It looks like this is a machine we support.
+		 * Now find out what MMU we are dealing with
+		 * LEON - identified by the psr.impl field
+		 * Viking - identified by the psr.impl field
+		 * In all other cases a sun4m srmmu.
+		 * We check that the MMU is enabled in all cases.
+		 */
+
+		/* Check if this is a LEON CPU */
+		rd	%psr, %g3
+		srl	%g3, PSR_IMPL_SHIFT, %g3
+		and	%g3, PSR_IMPL_SHIFTED_MASK, %g3
+		cmp	%g3, PSR_IMPL_LEON
+		be	leon_remap		/* It is a LEON - jump */
+		 nop
+
+		/* Sanity-check, is MMU enabled */
+		lda	[%g0] ASI_M_MMUREGS, %g1
+		andcc	%g1, 1, %g0
+		be	halt_notsup
+		 nop
+
+		/* Check for a viking (TI) module. */
+		cmp	%g3, PSR_IMPL_TI
+		bne	srmmu_not_viking
+		 nop
+
+		/* Figure out what kind of viking we are on.
+		 * We need to know if we have to play with the
+		 * AC bit and disable traps or not.
+		 */
+
+		/* I've only seen MicroSparc's on SparcClassics with this
+		 * bit set.
+		 */
+		set	0x800, %g2
+		lda	[%g0] ASI_M_MMUREGS, %g3	! peek in the control reg
+		and	%g2, %g3, %g3
+		subcc	%g3, 0x0, %g0
+		bnz	srmmu_not_viking			! is in mbus mode
+		 nop
+
+		rd	%psr, %g3			! DO NOT TOUCH %g3
+		andn	%g3, PSR_ET, %g2
+		wr	%g2, 0x0, %psr
+		WRITE_PAUSE
+
+		/* Get context table pointer, then convert to
+		 * a physical address, which is 36 bits.
+		 */
+		set	AC_M_CTPR, %g4
+		lda	[%g4] ASI_M_MMUREGS, %g4
+		sll	%g4, 0x4, %g4			! We use this below
+							! DO NOT TOUCH %g4
+
+		/* Set the AC bit in the Viking's MMU control reg. */
+		lda	[%g0] ASI_M_MMUREGS, %g5	! DO NOT TOUCH %g5
+		set	0x8000, %g6			! AC bit mask
+		or	%g5, %g6, %g6			! Or it in...
+		sta	%g6, [%g0] ASI_M_MMUREGS	! Close your eyes...
+
+		/* Grrr, why does it seem like every other load/store
+		 * on the sun4m is in some ASI space...
+		 * Fine with me, let's get the pointer to the level 1
+		 * page table directory and fetch its entry.
+		 */
+		lda	[%g4] ASI_M_BYPASS, %o1		! This is a level 1 ptr
+		srl	%o1, 0x4, %o1			! Clear low 4 bits
+		sll	%o1, 0x8, %o1			! Make physical
+
+		/* Ok, pull in the PTD. */
+		lda	[%o1] ASI_M_BYPASS, %o2		! This is the 0x0 16MB pgd
+
+		/* Calculate to KERNBASE entry. */
+		add	%o1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %o3
+
+		/* Poke the entry into the calculated address. */
+		sta	%o2, [%o3] ASI_M_BYPASS
+
+		/* I don't get it Sun, if you engineered all these
+		 * boot loaders and the PROM (thank you for the debugging
+		 * features btw) why did you not have them load kernel
+		 * images up in high address space, since this is necessary
+		 * for ABI compliance anyways?  Does this low-mapping provide
+		 * enhanced interoperability?
+		 *
+		 * "The PROM is the computer."
+		 */
+
+		/* Ok, restore the MMU control register we saved in %g5 */
+		sta	%g5, [%g0] ASI_M_MMUREGS	! POW... ouch
+
+		/* Turn traps back on.  We saved it in %g3 earlier. */
+		wr	%g3, 0x0, %psr			! tick tock, tick tock
+
+		/* Now we burn precious CPU cycles due to bad engineering. */
+		WRITE_PAUSE
+
+		/* Wow, all that just to move a 32-bit value from one
+		 * place to another...  Jump to high memory.
+		 */
+		b	go_to_highmem
+		 nop
+
+srmmu_not_viking:
+		/* This works on viking's in Mbus mode and all
+		 * other MBUS modules.  It is virtually the same as
+		 * the above madness sans turning traps off and flipping
+		 * the AC bit.
+		 */
+		set	AC_M_CTPR, %g1
+		lda	[%g1] ASI_M_MMUREGS, %g1	! get ctx table ptr
+		sll	%g1, 0x4, %g1			! make physical addr
+		lda	[%g1] ASI_M_BYPASS, %g1		! ptr to level 1 pg_table
+		srl	%g1, 0x4, %g1
+		sll	%g1, 0x8, %g1			! make phys addr for l1 tbl
+
+		lda	[%g1] ASI_M_BYPASS, %g2		! get level1 entry for 0x0
+		add	%g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
+		sta	%g2, [%g3] ASI_M_BYPASS		! place at KERNBASE entry
+		b	go_to_highmem
+		 nop					! wheee....
+
+
+leon_remap:
+		/* Sanity-check, is MMU enabled */
+		lda	[%g0] ASI_LEON_MMUREGS, %g1
+		andcc	%g1, 1, %g0
+		be	halt_notsup
+		 nop
+
+		/* Same code as in the srmmu_not_viking case,
+		 * with the LEON ASI for mmuregs
+		 */
+		set	AC_M_CTPR, %g1
+		lda	[%g1] ASI_LEON_MMUREGS, %g1	! get ctx table ptr
+		sll	%g1, 0x4, %g1			! make physical addr
+		lda	[%g1] ASI_M_BYPASS, %g1		! ptr to level 1 pg_table
+		srl	%g1, 0x4, %g1
+		sll	%g1, 0x8, %g1			! make phys addr for l1 tbl
+
+		lda	[%g1] ASI_M_BYPASS, %g2		! get level1 entry for 0x0
+		add	%g1, KERNBASE >> (SRMMU_PGDIR_SHIFT - 2), %g3
+		sta	%g2, [%g3] ASI_M_BYPASS		! place at KERNBASE entry
+		b	go_to_highmem
+		 nop					! wheee....
+
+/* Now do a non-relative jump so that PC is in high-memory */
+go_to_highmem:
+		set	execute_in_high_mem, %g1
+		jmpl	%g1, %g0
+		 nop
+
+/* The code above should be at beginning and we have to take care about
+ * short jumps, as branching to .init.text section from .text is usually
+ * impossible */
+		__INIT
+/* Acquire boot time privileged register values, this will help debugging.
+ * I figure out and store nwindows and nwindowsm1 later on.
+ */
+execute_in_high_mem:
+		mov	%l0, %o0		! put back romvec
+		mov	%l1, %o1		! and debug_vec
+
+		sethi	%hi(prom_vector_p), %g1
+		st	%o0, [%g1 + %lo(prom_vector_p)]
+
+		sethi	%hi(linux_dbvec), %g1
+		st	%o1, [%g1 + %lo(linux_dbvec)]
+
+		/* Get the machine type via the romvec
+		 * getprops node operation
+		 */
+		add	%g7, 0x1c, %l1
+		ld	[%l1], %l0
+		ld	[%l0], %l0
+		call	%l0
+		 or	%g0, %g0, %o0		! next_node(0) = first_node
+		or	%o0, %g0, %g6
+
+		sethi	%hi(cputypvar), %o1	! First node has cpu-arch
+		or	%o1, %lo(cputypvar), %o1
+		sethi	%hi(cputypval), %o2	! information, the string
+		or	%o2, %lo(cputypval), %o2
+		ld	[%l1], %l0		! 'compatible' tells
+		ld	[%l0 + 0xc], %l0	! that we want 'sun4x' where
+		call	%l0			! x is one of 'm', 'd' or 'e'.
+		 nop				! %o2 holds pointer
+						! to a buf where above string
+						! will get stored by the prom.
+
+
+		/* Check value of "compatible" property.
+		 * "value" => "model"
+		 * leon => sparc_leon
+		 * sun4m => sun4m
+		 * sun4s => sun4m
+		 * sun4d => sun4d
+		 * sun4e => "no_sun4e_here"
+		 * '*'   => "no_sun4u_here"
+		 * Check single letters only
+		 */
+
+		set	cputypval, %o2
+		/* If cputypval[0] == 'l' (lower case letter L) this is leon */
+		ldub	[%o2], %l1
+		cmp	%l1, 'l'
+		be	leon_init
+		 nop
+
+		/* Check cputypval[4] to find the sun model */
+		ldub	[%o2 + 0x4], %l1
+
+		cmp	%l1, 'm'
+		be	sun4m_init
+		 cmp	%l1, 's'
+		be	sun4m_init
+		 cmp	%l1, 'd'
+		be	sun4d_init
+		 cmp	%l1, 'e'
+		be	no_sun4e_here		! Could be a sun4e.
+		 nop
+		b	no_sun4u_here		! AIEEE, a V9 sun4u... Get our BIG BROTHER kernel :))
+		 nop
+
+leon_init:
+		/* LEON CPU - set boot_cpu_id */
+		sethi	%hi(boot_cpu_id), %g2	! boot-cpu index
+
+#ifdef CONFIG_SMP
+		ldub	[%g2 + %lo(boot_cpu_id)], %g1
+		cmp	%g1, 0xff		! unset means first CPU
+		bne	leon_smp_cpu_startup	! continue only with master
+		 nop
+#endif
+		/* Get CPU-ID from most significant 4-bit of ASR17 */
+		rd     %asr17, %g1
+		srl    %g1, 28, %g1
+
+		/* Update boot_cpu_id only on boot cpu */
+		stub	%g1, [%g2 + %lo(boot_cpu_id)]
+
+		ba continue_boot
+		 nop
+
+/* CPUID in bootbus can be found at PA 0xff0140000 */
+#define SUN4D_BOOTBUS_CPUID     0xf0140000
+
+sun4d_init:
+	/* Need to patch call to handler_irq */
+	set	patch_handler_irq, %g4
+	set	sun4d_handler_irq, %g5
+	sethi	%hi(0x40000000), %g3		! call
+	sub	%g5, %g4, %g5
+	srl	%g5, 2, %g5
+	or	%g5, %g3, %g5
+	st	%g5, [%g4]
+
+#ifdef CONFIG_SMP
+	/* Get our CPU id out of bootbus */
+	set     SUN4D_BOOTBUS_CPUID, %g3
+	lduba   [%g3] ASI_M_CTL, %g3
+	and     %g3, 0xf8, %g3
+	srl     %g3, 3, %g4
+	sta     %g4, [%g0] ASI_M_VIKING_TMP1
+	sethi	%hi(boot_cpu_id), %g5
+	stb	%g4, [%g5 + %lo(boot_cpu_id)]
+#endif
+
+	/* Fall through to sun4m_init */
+
+sun4m_init:
+/* Ok, the PROM could have done funny things and apple cider could still
+ * be sitting in the fault status/address registers.  Read them all to
+ * clear them so we don't get magic faults later on.
+ */
+/* This sucks, apparently this makes Vikings call prom panic, will fix later */
+2:
+		rd	%psr, %o1
+		srl	%o1, PSR_IMPL_SHIFT, %o1	! Get a type of the CPU
+
+		subcc	%o1, PSR_IMPL_TI, %g0		! TI: Viking or MicroSPARC
+		be	continue_boot
+		 nop
+
+		set	AC_M_SFSR, %o0
+		lda	[%o0] ASI_M_MMUREGS, %g0
+		set	AC_M_SFAR, %o0
+		lda	[%o0] ASI_M_MMUREGS, %g0
+
+		/* Fujitsu MicroSPARC-II has no asynchronous flavors of FARs */
+		subcc	%o1, 0, %g0
+		be	continue_boot
+		 nop
+
+		set	AC_M_AFSR, %o0
+		lda	[%o0] ASI_M_MMUREGS, %g0
+		set	AC_M_AFAR, %o0
+		lda	[%o0] ASI_M_MMUREGS, %g0
+		 nop
+
+
+continue_boot:
+
+/* Aieee, now set PC and nPC, enable traps, give ourselves a stack and it's
+ * show-time!
+ */
+		/* Turn on Supervisor, EnableFloating, and all the PIL bits.
+		 * Also puts us in register window zero with traps off.
+		 */
+		set	(PSR_PS | PSR_S | PSR_PIL | PSR_EF), %g2
+		wr	%g2, 0x0, %psr
+		WRITE_PAUSE
+
+		/* I want a kernel stack NOW! */
+		set	init_thread_union, %g1
+		set	(THREAD_SIZE - STACKFRAME_SZ), %g2
+		add	%g1, %g2, %sp
+		mov	0, %fp			/* And for good luck */
+
+		/* Zero out our BSS section. */
+		set	__bss_start , %o0	! First address of BSS
+		set	_end , %o1		! Last address of BSS
+		add	%o0, 0x1, %o0
+1:
+		stb	%g0, [%o0]
+		subcc	%o0, %o1, %g0
+		bl	1b
+		 add	%o0, 0x1, %o0
+
+		/* If boot_cpu_id has not been setup by machine specific
+		 * init-code above we default it to zero.
+		 */
+		sethi	%hi(boot_cpu_id), %g2
+		ldub	[%g2 + %lo(boot_cpu_id)], %g3
+		cmp	%g3, 0xff
+		bne	1f
+		 nop
+		mov	%g0, %g3
+		stub	%g3, [%g2 + %lo(boot_cpu_id)]
+
+1:		sll	%g3, 2, %g3
+
+		/* Initialize the uwinmask value for init task just in case.
+		 * But first make current_set[boot_cpu_id] point to something useful.
+		 */
+		set	init_thread_union, %g6
+		set	current_set, %g2
+#ifdef CONFIG_SMP
+		st	%g6, [%g2]
+		add	%g2, %g3, %g2
+#endif
+		st	%g6, [%g2]
+
+		st	%g0, [%g6 + TI_UWINMASK]
+
+/* Compute NWINDOWS and stash it away. Now uses %wim trick explained
+ * in the V8 manual. Ok, this method seems to work, Sparc is cool...
+ * No, it doesn't work, have to play the save/readCWP/restore trick.
+ */
+
+		wr	%g0, 0x0, %wim			! so we do not get a trap
+		WRITE_PAUSE
+
+		save
+
+		rd	%psr, %g3
+
+		restore
+
+		and	%g3, 0x1f, %g3
+		add	%g3, 0x1, %g3
+
+		mov	2, %g1
+		wr	%g1, 0x0, %wim			! make window 1 invalid
+		WRITE_PAUSE
+
+		cmp	%g3, 0x7
+		bne	2f
+		 nop
+
+		/* Adjust our window handling routines to
+		 * do things correctly on 7 window Sparcs.
+		 */
+
+#define		PATCH_INSN(src, dest) \
+		set	src, %g5; \
+		set	dest, %g2; \
+		ld	[%g5], %g4; \
+		st	%g4, [%g2];
+
+		/* Patch for window spills... */
+		PATCH_INSN(spnwin_patch1_7win, spnwin_patch1)
+		PATCH_INSN(spnwin_patch2_7win, spnwin_patch2)
+		PATCH_INSN(spnwin_patch3_7win, spnwin_patch3)
+
+		/* Patch for window fills... */
+		PATCH_INSN(fnwin_patch1_7win, fnwin_patch1)
+		PATCH_INSN(fnwin_patch2_7win, fnwin_patch2)
+
+		/* Patch for trap entry setup... */
+		PATCH_INSN(tsetup_7win_patch1, tsetup_patch1)
+		PATCH_INSN(tsetup_7win_patch2, tsetup_patch2)
+		PATCH_INSN(tsetup_7win_patch3, tsetup_patch3)
+		PATCH_INSN(tsetup_7win_patch4, tsetup_patch4)
+		PATCH_INSN(tsetup_7win_patch5, tsetup_patch5)
+		PATCH_INSN(tsetup_7win_patch6, tsetup_patch6)
+
+		/* Patch for returning from traps... */
+		PATCH_INSN(rtrap_7win_patch1, rtrap_patch1)
+		PATCH_INSN(rtrap_7win_patch2, rtrap_patch2)
+		PATCH_INSN(rtrap_7win_patch3, rtrap_patch3)
+		PATCH_INSN(rtrap_7win_patch4, rtrap_patch4)
+		PATCH_INSN(rtrap_7win_patch5, rtrap_patch5)
+
+		/* Patch for killing user windows from the register file. */
+		PATCH_INSN(kuw_patch1_7win, kuw_patch1)
+
+		/* Now patch the kernel window flush sequences.
+		 * This saves 2 traps on every switch and fork.
+		 */
+		set	0x01000000, %g4
+		set	flush_patch_one, %g5
+		st	%g4, [%g5 + 0x18]
+		st	%g4, [%g5 + 0x1c]
+		set	flush_patch_two, %g5
+		st	%g4, [%g5 + 0x18]
+		st	%g4, [%g5 + 0x1c]
+		set	flush_patch_three, %g5
+		st	%g4, [%g5 + 0x18]
+		st	%g4, [%g5 + 0x1c]
+		set	flush_patch_four, %g5
+		st	%g4, [%g5 + 0x18]
+		st	%g4, [%g5 + 0x1c]
+		set	flush_patch_exception, %g5
+		st	%g4, [%g5 + 0x18]
+		st	%g4, [%g5 + 0x1c]
+		set	flush_patch_switch, %g5
+		st	%g4, [%g5 + 0x18]
+		st	%g4, [%g5 + 0x1c]
+
+2:
+		sethi	%hi(nwindows), %g4
+		st	%g3, [%g4 + %lo(nwindows)]	! store final value
+		sub	%g3, 0x1, %g3
+		sethi	%hi(nwindowsm1), %g4
+		st	%g3, [%g4 + %lo(nwindowsm1)]
+
+		/* Here we go, start using Linux's trap table... */
+		set	trapbase, %g3
+		wr	%g3, 0x0, %tbr
+		WRITE_PAUSE
+
+		/* Finally, turn on traps so that we can call c-code. */
+		rd	%psr, %g3
+		wr	%g3, 0x0, %psr
+		WRITE_PAUSE
+
+		wr	%g3, PSR_ET, %psr
+		WRITE_PAUSE
+
+		/* Call sparc32_start_kernel(struct linux_romvec *rp) */
+		sethi	%hi(prom_vector_p), %g5
+		ld	[%g5 + %lo(prom_vector_p)], %o0
+		call	sparc32_start_kernel
+		 nop
+
+		/* We should not get here. */
+		call	halt_me
+		 nop
+
+no_sun4e_here:
+		ld	[%g7 + 0x68], %o1
+		set	sun4e_notsup, %o0
+		call	%o1
+		 nop
+		b	halt_me
+		 nop
+
+		__INITDATA
+
+sun4u_1:
+		.asciz "finddevice"
+		.align	4
+sun4u_2:
+		.asciz "/chosen"
+		.align	4
+sun4u_3:
+		.asciz "getprop"
+		.align	4
+sun4u_4:
+		.asciz "stdout"
+		.align	4
+sun4u_5:
+		.asciz "write"
+		.align	4
+sun4u_6:
+		.asciz  "\n\rOn sun4u you have to use sparc64 kernel\n\rand not a sparc32 version\n\r\n\r"
+sun4u_6e:
+		.align	4
+sun4u_7:
+		.asciz "exit"
+		.align	8
+sun4u_a1:
+		.word	0, sun4u_1, 0, 1, 0, 1, 0, sun4u_2, 0
+sun4u_r1:
+		.word	0
+sun4u_a2:
+		.word	0, sun4u_3, 0, 4, 0, 1, 0
+sun4u_i2:
+		.word	0, 0, sun4u_4, 0, sun4u_1, 0, 8, 0
+sun4u_r2:
+		.word	0
+sun4u_a3:
+		.word	0, sun4u_5, 0, 3, 0, 1, 0
+sun4u_i3:
+		.word	0, 0, sun4u_6, 0, sun4u_6e - sun4u_6 - 1, 0
+sun4u_r3:
+		.word	0
+sun4u_a4:
+		.word	0, sun4u_7, 0, 0, 0, 0
+sun4u_r4:
+
+		__INIT
+no_sun4u_here:
+		set	sun4u_a1, %o0
+		set	current_pc, %l2
+		cmp	%l2, %g3
+		be	1f
+		 mov	%o4, %l0
+		sub	%g3, %l2, %l6
+		add	%o0, %l6, %o0
+		mov	%o0, %l4
+		mov	sun4u_r4 - sun4u_a1, %l3
+		ld	[%l4], %l5
+2:
+		add	%l4, 4, %l4
+		cmp	%l5, %l2
+		add	%l5, %l6, %l5
+		bgeu,a	3f
+		 st	%l5, [%l4 - 4]
+3:
+		subcc	%l3, 4, %l3
+		bne	2b
+		 ld	[%l4], %l5
+1:
+		call	%l0
+		 mov	%o0, %l1
+
+		ld	[%l1 + (sun4u_r1 - sun4u_a1)], %o1
+		add	%l1, (sun4u_a2 - sun4u_a1), %o0
+		call	%l0
+		 st	%o1, [%o0 + (sun4u_i2 - sun4u_a2)]
+
+		ld	[%l1 + (sun4u_1 - sun4u_a1)], %o1
+		add	%l1, (sun4u_a3 - sun4u_a1), %o0
+		call	%l0
+		st	%o1, [%o0 + (sun4u_i3 - sun4u_a3)]
+
+		call	%l0
+		 add	%l1, (sun4u_a4 - sun4u_a1), %o0
+
+		/* Not reached */
+halt_me:
+		ld	[%g7 + 0x74], %o0
+		call	%o0			! Get us out of here...
+		 nop				! Apparently Solaris is better.
+
+/* Ok, now we continue in the .data/.text sections */
+
+	.data
+	.align 4
+
+/*
+ * Fill up the prom vector, note in particular the kind first element,
+ * no joke. I don't need all of them in here as the entire prom vector
+ * gets initialized in c-code so all routines can use it.
+ */
+
+prom_vector_p:
+		.word 0
+
+/* We calculate the following at boot time, window fills/spills and trap entry
+ * code uses these to keep track of the register windows.
+ */
+
+	.align 4
+	.globl	nwindows
+	.globl	nwindowsm1
+nwindows:
+	.word	8
+nwindowsm1:
+	.word	7
+
+/* Boot time debugger vector value.  We need this later on. */
+
+	.align 4
+	.globl	linux_dbvec
+linux_dbvec:
+	.word	0
+	.word	0
+
+	.align 8
+
+	.globl	lvl14_save
+lvl14_save:
+	.word	0
+	.word	0
+	.word	0
+	.word	0
+	.word	t_irq14
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
new file mode 100644
index 0000000..540bfc9
--- /dev/null
+++ b/arch/sparc/kernel/head_64.S
@@ -0,0 +1,969 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* head.S: Initial boot code for the Sparc64 port of Linux.
+ *
+ * Copyright (C) 1996, 1997, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996 David Sitsky (David.Sitsky@anu.edu.au)
+ * Copyright (C) 1997, 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ */
+
+#include <linux/version.h>
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/thread_info.h>
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/spitfire.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/errno.h>
+#include <asm/signal.h>
+#include <asm/processor.h>
+#include <asm/lsu.h>
+#include <asm/dcr.h>
+#include <asm/dcu.h>
+#include <asm/head.h>
+#include <asm/ttable.h>
+#include <asm/mmu.h>
+#include <asm/cpudata.h>
+#include <asm/pil.h>
+#include <asm/estate.h>
+#include <asm/sfafsr.h>
+#include <asm/unistd.h>
+#include <asm/export.h>
+
+/* This section from from _start to sparc64_boot_end should fit into
+ * 0x0000000000404000 to 0x0000000000408000.
+ */
+	.text
+	.globl	start, _start, stext, _stext
+_start:
+start:
+_stext:
+stext:
+! 0x0000000000404000
+	b	sparc64_boot
+	 flushw					/* Flush register file.      */
+
+/* This stuff has to be in sync with SILO and other potential boot loaders
+ * Fields should be kept upward compatible and whenever any change is made,
+ * HdrS version should be incremented.
+ */
+        .global root_flags, ram_flags, root_dev
+        .global sparc_ramdisk_image, sparc_ramdisk_size
+	.global sparc_ramdisk_image64
+
+        .ascii  "HdrS"
+        .word   LINUX_VERSION_CODE
+
+	/* History:
+	 *
+	 * 0x0300 : Supports being located at other than 0x4000
+	 * 0x0202 : Supports kernel params string
+	 * 0x0201 : Supports reboot_command
+	 */
+	.half   0x0301          /* HdrS version */
+
+root_flags:
+        .half   1
+root_dev:
+        .half   0
+ram_flags:
+        .half   0
+sparc_ramdisk_image:
+        .word   0
+sparc_ramdisk_size:
+        .word   0
+        .xword  reboot_command
+	.xword	bootstr_info
+sparc_ramdisk_image64:
+	.xword	0
+	.word	_end
+
+	/* PROM cif handler code address is in %o4.  */
+sparc64_boot:
+	mov	%o4, %l7
+
+	/* We need to remap the kernel.  Use position independent
+	 * code to remap us to KERNBASE.
+	 *
+	 * SILO can invoke us with 32-bit address masking enabled,
+	 * so make sure that's clear.
+	 */
+	rdpr	%pstate, %g1
+	andn	%g1, PSTATE_AM, %g1
+	wrpr	%g1, 0x0, %pstate
+	ba,a,pt	%xcc, 1f
+	 nop
+
+	.globl	prom_finddev_name, prom_chosen_path, prom_root_node
+	.globl	prom_getprop_name, prom_mmu_name, prom_peer_name
+	.globl	prom_callmethod_name, prom_translate_name, prom_root_compatible
+	.globl	prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
+	.globl	prom_boot_mapped_pc, prom_boot_mapping_mode
+	.globl	prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
+	.globl	prom_compatible_name, prom_cpu_path, prom_cpu_compatible
+	.globl	is_sun4v, sun4v_chip_type, prom_set_trap_table_name
+prom_peer_name:
+	.asciz	"peer"
+prom_compatible_name:
+	.asciz	"compatible"
+prom_finddev_name:
+	.asciz	"finddevice"
+prom_chosen_path:
+	.asciz	"/chosen"
+prom_cpu_path:
+	.asciz	"/cpu"
+prom_getprop_name:
+	.asciz	"getprop"
+prom_mmu_name:
+	.asciz	"mmu"
+prom_callmethod_name:
+	.asciz	"call-method"
+prom_translate_name:
+	.asciz	"translate"
+prom_map_name:
+	.asciz	"map"
+prom_unmap_name:
+	.asciz	"unmap"
+prom_set_trap_table_name:
+	.asciz	"SUNW,set-trap-table"
+prom_sun4v_name:
+	.asciz	"sun4v"
+prom_niagara_prefix:
+	.asciz	"SUNW,UltraSPARC-T"
+prom_sparc_prefix:
+	.asciz	"SPARC-"
+prom_sparc64x_prefix:
+	.asciz	"SPARC64-X"
+	.align	4
+prom_root_compatible:
+	.skip	64
+prom_cpu_compatible:
+	.skip	64
+prom_root_node:
+	.word	0
+EXPORT_SYMBOL(prom_root_node)
+prom_mmu_ihandle_cache:
+	.word	0
+prom_boot_mapped_pc:
+	.word	0
+prom_boot_mapping_mode:
+	.word	0
+	.align	8
+prom_boot_mapping_phys_high:
+	.xword	0
+prom_boot_mapping_phys_low:
+	.xword	0
+is_sun4v:
+	.word	0
+sun4v_chip_type:
+	.word	SUN4V_CHIP_INVALID
+EXPORT_SYMBOL(sun4v_chip_type)
+1:
+	rd	%pc, %l0
+
+	mov	(1b - prom_peer_name), %l1
+	sub	%l0, %l1, %l1
+	mov	0, %l2
+
+	/* prom_root_node = prom_peer(0) */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "peer"
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, 0
+	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x20], %l4	! prom root node
+	mov	(1b - prom_root_node), %l1
+	sub	%l0, %l1, %l1
+	stw	%l4, [%l1]
+
+	mov	(1b - prom_getprop_name), %l1
+	mov	(1b - prom_compatible_name), %l2
+	mov	(1b - prom_root_compatible), %l5
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l5, %l5
+
+	/* prom_getproperty(prom_root_node, "compatible",
+	 *                  &prom_root_compatible, 64)
+	 */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, prom_root_node
+	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "compatible"
+	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_root_compatible
+	mov	64, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, size
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	mov	(1b - prom_finddev_name), %l1
+	mov	(1b - prom_chosen_path), %l2
+	mov	(1b - prom_boot_mapped_pc), %l3
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l3, %l3
+	stw	%l0, [%l3]
+	sub	%sp, (192 + 128), %sp
+
+	/* chosen_node = prom_finddevice("/chosen") */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/chosen"
+	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x20], %l4	! chosen device node
+
+	mov	(1b - prom_getprop_name), %l1
+	mov	(1b - prom_mmu_name), %l2
+	mov	(1b - prom_mmu_ihandle_cache), %l5
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l5, %l5
+
+	/* prom_mmu_ihandle_cache = prom_getint(chosen_node, "mmu") */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, chosen_node
+	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "mmu"
+	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_mmu_ihandle_cache
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, sizeof(arg3)
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	mov	(1b - prom_callmethod_name), %l1
+	mov	(1b - prom_translate_name), %l2
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	lduw	[%l5], %l5			! prom_mmu_ihandle_cache
+
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "call-method"
+	mov	3, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 3
+	mov	5, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 5
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1: "translate"
+	stx	%l5, [%sp + 2047 + 128 + 0x20]	! arg2: prom_mmu_ihandle_cache
+	/* PAGE align */
+	srlx	%l0, 13, %l3
+	sllx	%l3, 13, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: vaddr, our PC
+	stx	%g0, [%sp + 2047 + 128 + 0x30]	! res1
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! res2
+	stx	%g0, [%sp + 2047 + 128 + 0x40]	! res3
+	stx	%g0, [%sp + 2047 + 128 + 0x48]	! res4
+	stx	%g0, [%sp + 2047 + 128 + 0x50]	! res5
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x40], %l1	! translation mode
+	mov	(1b - prom_boot_mapping_mode), %l4
+	sub	%l0, %l4, %l4
+	stw	%l1, [%l4]
+	mov	(1b - prom_boot_mapping_phys_high), %l4
+	sub	%l0, %l4, %l4
+	ldx	[%sp + 2047 + 128 + 0x48], %l2	! physaddr high
+	stx	%l2, [%l4 + 0x0]
+	ldx	[%sp + 2047 + 128 + 0x50], %l3	! physaddr low
+	/* 4MB align */
+	srlx	%l3, ILOG2_4MB, %l3
+	sllx	%l3, ILOG2_4MB, %l3
+	stx	%l3, [%l4 + 0x8]
+
+	/* Leave service as-is, "call-method" */
+	mov	7, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 7
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	mov	(1b - prom_map_name), %l3
+	sub	%l0, %l3, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x18]	! arg1: "map"
+	/* Leave arg2 as-is, prom_mmu_ihandle_cache */
+	mov	-1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x28]	! arg3: mode (-1 default)
+	/* 4MB align the kernel image size. */
+	set	(_end - KERNBASE), %l3
+	set	((4 * 1024 * 1024) - 1), %l4
+	add	%l3, %l4, %l3
+	andn	%l3, %l4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4: roundup(ksize, 4MB)
+	sethi	%hi(KERNBASE), %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x38]	! arg5: vaddr (KERNBASE)
+	stx	%g0, [%sp + 2047 + 128 + 0x40]	! arg6: empty
+	mov	(1b - prom_boot_mapping_phys_low), %l3
+	sub	%l0, %l3, %l3
+	ldx	[%l3], %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x48]	! arg7: phys addr
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	add	%sp, (192 + 128), %sp
+
+	sethi	%hi(prom_root_compatible), %g1
+	or	%g1, %lo(prom_root_compatible), %g1
+	sethi	%hi(prom_sun4v_name), %g7
+	or	%g7, %lo(prom_sun4v_name), %g7
+	mov	5, %g3
+90:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
+	bne,pn	%icc, 80f
+	 add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 90b
+	 add	%g1, 1, %g1
+
+	sethi	%hi(is_sun4v), %g1
+	or	%g1, %lo(is_sun4v), %g1
+	mov	1, %g7
+	stw	%g7, [%g1]
+
+	/* cpu_node = prom_finddevice("/cpu") */
+	mov	(1b - prom_finddev_name), %l1
+	mov	(1b - prom_cpu_path), %l2
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%sp, (192 + 128), %sp
+
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "finddevice"
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 1
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l2, [%sp + 2047 + 128 + 0x18]	! arg1, "/cpu"
+	stx	%g0, [%sp + 2047 + 128 + 0x20]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	ldx	[%sp + 2047 + 128 + 0x20], %l4	! cpu device node
+
+	mov	(1b - prom_getprop_name), %l1
+	mov	(1b - prom_compatible_name), %l2
+	mov	(1b - prom_cpu_compatible), %l5
+	sub	%l0, %l1, %l1
+	sub	%l0, %l2, %l2
+	sub	%l0, %l5, %l5
+
+	/* prom_getproperty(cpu_node, "compatible",
+	 *                  &prom_cpu_compatible, 64)
+	 */
+	stx	%l1, [%sp + 2047 + 128 + 0x00]	! service, "getprop"
+	mov	4, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x08]	! num_args, 4
+	mov	1, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x10]	! num_rets, 1
+	stx	%l4, [%sp + 2047 + 128 + 0x18]	! arg1, cpu_node
+	stx	%l2, [%sp + 2047 + 128 + 0x20]	! arg2, "compatible"
+	stx	%l5, [%sp + 2047 + 128 + 0x28]	! arg3, &prom_cpu_compatible
+	mov	64, %l3
+	stx	%l3, [%sp + 2047 + 128 + 0x30]	! arg4, size
+	stx	%g0, [%sp + 2047 + 128 + 0x38]	! ret1
+	call	%l7
+	 add	%sp, (2047 + 128), %o0		! argument array
+
+	add	%sp, (192 + 128), %sp
+
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	sethi	%hi(prom_niagara_prefix), %g7
+	or	%g7, %lo(prom_niagara_prefix), %g7
+	mov	17, %g3
+90:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
+	bne,pn	%icc, 89f
+	 add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 90b
+	 add	%g1, 1, %g1
+	ba,pt	%xcc, 91f
+	 nop
+
+89:	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	sethi	%hi(prom_sparc_prefix), %g7
+	or	%g7, %lo(prom_sparc_prefix), %g7
+	mov	6, %g3
+90:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
+	bne,pn	%icc, 4f
+	 add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 90b
+	 add	%g1, 1, %g1
+
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	ldub	[%g1 + 6], %g2
+	cmp	%g2, 'T'
+	be,pt	%xcc, 70f
+	 cmp	%g2, 'M'
+	be,pt	%xcc, 70f
+	 cmp	%g2, 'S'
+	bne,pn	%xcc, 49f
+	 nop
+
+70:	ldub	[%g1 + 7], %g2
+	cmp	%g2, CPU_ID_NIAGARA3
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA3, %g4
+	cmp	%g2, CPU_ID_NIAGARA4
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA4, %g4
+	cmp	%g2, CPU_ID_NIAGARA5
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA5, %g4
+	cmp	%g2, CPU_ID_M6
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_M6, %g4
+	cmp	%g2, CPU_ID_M7
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_M7, %g4
+	cmp	%g2, CPU_ID_M8
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_M8, %g4
+	cmp	%g2, CPU_ID_SONOMA1
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_SN, %g4
+	ba,pt	%xcc, 49f
+	 nop
+
+91:	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	ldub	[%g1 + 17], %g2
+	cmp	%g2, CPU_ID_NIAGARA1
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA1, %g4
+	cmp	%g2, CPU_ID_NIAGARA2
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_NIAGARA2, %g4
+	
+4:
+	/* Athena */
+	sethi	%hi(prom_cpu_compatible), %g1
+	or	%g1, %lo(prom_cpu_compatible), %g1
+	sethi	%hi(prom_sparc64x_prefix), %g7
+	or	%g7, %lo(prom_sparc64x_prefix), %g7
+	mov	9, %g3
+41:	ldub	[%g7], %g2
+	ldub	[%g1], %g4
+	cmp	%g2, %g4
+	bne,pn	%icc, 49f
+	add	%g7, 1, %g7
+	subcc	%g3, 1, %g3
+	bne,pt	%xcc, 41b
+	add	%g1, 1, %g1
+	ba,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC64X, %g4
+
+49:
+	mov	SUN4V_CHIP_UNKNOWN, %g4
+5:	sethi	%hi(sun4v_chip_type), %g2
+	or	%g2, %lo(sun4v_chip_type), %g2
+	stw	%g4, [%g2]
+
+80:
+	BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
+	BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
+	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
+	ba,pt	%xcc, spitfire_boot
+	 nop
+
+cheetah_plus_boot:
+	/* Preserve OBP chosen DCU and DCR register settings.  */
+	ba,pt	%xcc, cheetah_generic_boot
+	 nop
+
+cheetah_boot:
+	mov	DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
+	wr	%g1, %asr18
+
+	sethi	%uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+	or	%g7, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g7
+	sllx	%g7, 32, %g7
+	or	%g7, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g7
+	stxa	%g7, [%g0] ASI_DCU_CONTROL_REG
+	membar	#Sync
+
+cheetah_generic_boot:
+	mov	TSB_EXTENSION_P, %g3
+	stxa	%g0, [%g3] ASI_DMMU
+	stxa	%g0, [%g3] ASI_IMMU
+	membar	#Sync
+
+	mov	TSB_EXTENSION_S, %g3
+	stxa	%g0, [%g3] ASI_DMMU
+	membar	#Sync
+
+	mov	TSB_EXTENSION_N, %g3
+	stxa	%g0, [%g3] ASI_DMMU
+	stxa	%g0, [%g3] ASI_IMMU
+	membar	#Sync
+
+	ba,a,pt	%xcc, jump_to_sun4u_init
+
+spitfire_boot:
+	/* Typically PROM has already enabled both MMU's and both on-chip
+	 * caches, but we do it here anyway just to be paranoid.
+	 */
+	mov	(LSU_CONTROL_IC|LSU_CONTROL_DC|LSU_CONTROL_IM|LSU_CONTROL_DM), %g1
+	stxa	%g1, [%g0] ASI_LSU_CONTROL
+	membar	#Sync
+
+jump_to_sun4u_init:
+	/*
+	 * Make sure we are in privileged mode, have address masking,
+         * using the ordinary globals and have enabled floating
+         * point.
+	 *
+	 * Again, typically PROM has left %pil at 13 or similar, and
+	 * (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE) in %pstate.
+         */
+	wrpr    %g0, (PSTATE_PRIV|PSTATE_PEF|PSTATE_IE), %pstate
+	wr	%g0, 0, %fprs
+
+	set	sun4u_init, %g2
+	jmpl    %g2 + %g0, %g0
+	 nop
+
+	__REF
+sun4u_init:
+	BRANCH_IF_SUN4V(g1, sun4v_init)
+
+	/* Set ctx 0 */
+	mov		PRIMARY_CONTEXT, %g7
+	stxa		%g0, [%g7] ASI_DMMU
+	membar		#Sync
+
+	mov		SECONDARY_CONTEXT, %g7
+	stxa		%g0, [%g7] ASI_DMMU
+	membar	#Sync
+
+	ba,a,pt		%xcc, sun4u_continue
+
+sun4v_init:
+	/* Set ctx 0 */
+	mov		PRIMARY_CONTEXT, %g7
+	stxa		%g0, [%g7] ASI_MMU
+	membar		#Sync
+
+	mov		SECONDARY_CONTEXT, %g7
+	stxa		%g0, [%g7] ASI_MMU
+	membar		#Sync
+	ba,a,pt		%xcc, niagara_tlb_fixup
+
+sun4u_continue:
+	BRANCH_IF_ANY_CHEETAH(g1, g7, cheetah_tlb_fixup)
+
+	ba,a,pt	%xcc, spitfire_tlb_fixup
+
+niagara_tlb_fixup:
+	mov	3, %g2		/* Set TLB type to hypervisor. */
+	sethi	%hi(tlb_type), %g1
+	stw	%g2, [%g1 + %lo(tlb_type)]
+
+	/* Patch copy/clear ops.  */
+	sethi	%hi(sun4v_chip_type), %g1
+	lduw	[%g1 + %lo(sun4v_chip_type)], %g1
+	cmp	%g1, SUN4V_CHIP_NIAGARA1
+	be,pt	%xcc, niagara_patch
+	 cmp	%g1, SUN4V_CHIP_NIAGARA2
+	be,pt	%xcc, niagara2_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_NIAGARA3
+	be,pt	%xcc, niagara2_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_NIAGARA4
+	be,pt	%xcc, niagara4_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_NIAGARA5
+	be,pt	%xcc, niagara4_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_M6
+	be,pt	%xcc, niagara4_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_M7
+	be,pt	%xcc, sparc_m7_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_M8
+	be,pt	%xcc, sparc_m7_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_SN
+	be,pt	%xcc, niagara4_patch
+	 nop
+
+	call	generic_patch_copyops
+	 nop
+	call	generic_patch_bzero
+	 nop
+	call	generic_patch_pageops
+	 nop
+
+	ba,a,pt	%xcc, 80f
+	 nop
+
+sparc_m7_patch:
+	call	m7_patch_copyops
+	 nop
+	call	m7_patch_bzero
+	 nop
+	call	m7_patch_pageops
+	 nop
+
+	ba,a,pt	%xcc, 80f
+	 nop
+
+niagara4_patch:
+	call	niagara4_patch_copyops
+	 nop
+	call	niagara4_patch_bzero
+	 nop
+	call	niagara4_patch_pageops
+	 nop
+	call	niagara4_patch_fls
+	 nop
+
+	ba,a,pt	%xcc, 80f
+	 nop
+
+niagara2_patch:
+	call	niagara2_patch_copyops
+	 nop
+	call	niagara_patch_bzero
+	 nop
+	call	niagara_patch_pageops
+	 nop
+
+	ba,a,pt	%xcc, 80f
+	 nop
+
+niagara_patch:
+	call	niagara_patch_copyops
+	 nop
+	call	niagara_patch_bzero
+	 nop
+	call	niagara_patch_pageops
+	 nop
+
+80:
+	/* Patch TLB/cache ops.  */
+	call	hypervisor_patch_cachetlbops
+	 nop
+
+	ba,a,pt	%xcc, tlb_fixup_done
+
+cheetah_tlb_fixup:
+	mov	2, %g2		/* Set TLB type to cheetah+. */
+	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,1f)
+
+	mov	1, %g2		/* Set TLB type to cheetah. */
+
+1:	sethi	%hi(tlb_type), %g1
+	stw	%g2, [%g1 + %lo(tlb_type)]
+
+	/* Patch copy/page operations to cheetah optimized versions. */
+	call	cheetah_patch_copyops
+	 nop
+	call	cheetah_patch_copy_page
+	 nop
+	call	cheetah_patch_cachetlbops
+	 nop
+
+	ba,a,pt	%xcc, tlb_fixup_done
+
+spitfire_tlb_fixup:
+	/* Set TLB type to spitfire. */
+	mov	0, %g2
+	sethi	%hi(tlb_type), %g1
+	stw	%g2, [%g1 + %lo(tlb_type)]
+
+tlb_fixup_done:
+	sethi	%hi(init_thread_union), %g6
+	or	%g6, %lo(init_thread_union), %g6
+	ldx	[%g6 + TI_TASK], %g4
+
+	wr	%g0, ASI_P, %asi
+	mov	1, %g1
+	sllx	%g1, THREAD_SHIFT, %g1
+	sub	%g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+	add	%g6, %g1, %sp
+
+	/* Set per-cpu pointer initially to zero, this makes
+	 * the boot-cpu use the in-kernel-image per-cpu areas
+	 * before setup_per_cpu_area() is invoked.
+	 */
+	clr	%g5
+
+	wrpr	%g0, 0, %wstate
+	wrpr	%g0, 0x0, %tl
+
+	/* Clear the bss */
+	sethi	%hi(__bss_start), %o0
+	or	%o0, %lo(__bss_start), %o0
+	sethi	%hi(_end), %o1
+	or	%o1, %lo(_end), %o1
+	call	__bzero
+	 sub	%o1, %o0, %o1
+
+	call	prom_init
+	 mov	%l7, %o0			! OpenPROM cif handler
+
+	/* To create a one-register-window buffer between the kernel's
+	 * initial stack and the last stack frame we use from the firmware,
+	 * do the rest of the boot from a C helper function.
+	 */
+	call	start_early_boot
+	 nop
+	/* Not reached... */
+
+	.previous
+
+	/* This is meant to allow the sharing of this code between
+	 * boot processor invocation (via setup_tba() below) and
+	 * secondary processor startup (via trampoline.S).  The
+	 * former does use this code, the latter does not yet due
+	 * to some complexities.  That should be fixed up at some
+	 * point.
+	 *
+	 * There used to be enormous complexity wrt. transferring
+	 * over from the firmware's trap table to the Linux kernel's.
+	 * For example, there was a chicken & egg problem wrt. building
+	 * the OBP page tables, yet needing to be on the Linux kernel
+	 * trap table (to translate PAGE_OFFSET addresses) in order to
+	 * do that.
+	 *
+	 * We now handle OBP tlb misses differently, via linear lookups
+	 * into the prom_trans[] array.  So that specific problem no
+	 * longer exists.  Yet, unfortunately there are still some issues
+	 * preventing trampoline.S from using this code... ho hum.
+	 */
+	.globl	setup_trap_table
+setup_trap_table:
+	save	%sp, -192, %sp
+
+	/* Force interrupts to be disabled. */
+	rdpr	%pstate, %l0
+	andn	%l0, PSTATE_IE, %o1
+	wrpr	%o1, 0x0, %pstate
+	rdpr	%pil, %l1
+	wrpr	%g0, PIL_NORMAL_MAX, %pil
+
+	/* Make the firmware call to jump over to the Linux trap table.  */
+	sethi	%hi(is_sun4v), %o0
+	lduw	[%o0 + %lo(is_sun4v)], %o0
+	brz,pt	%o0, 1f
+	 nop
+
+	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+	add	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
+	stxa	%g2, [%g0] ASI_SCRATCHPAD
+
+	/* Compute physical address:
+	 *
+	 * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
+	 */
+	sethi	%hi(KERNBASE), %g3
+	sub	%g2, %g3, %g2
+	sethi	%hi(kern_base), %g3
+	ldx	[%g3 + %lo(kern_base)], %g3
+	add	%g2, %g3, %o1
+	sethi	%hi(sparc64_ttable_tl0), %o0
+
+	set	prom_set_trap_table_name, %g2
+	stx	%g2, [%sp + 2047 + 128 + 0x00]
+	mov	2, %g2
+	stx	%g2, [%sp + 2047 + 128 + 0x08]
+	mov	0, %g2
+	stx	%g2, [%sp + 2047 + 128 + 0x10]
+	stx	%o0, [%sp + 2047 + 128 + 0x18]
+	stx	%o1, [%sp + 2047 + 128 + 0x20]
+	sethi	%hi(p1275buf), %g2
+	or	%g2, %lo(p1275buf), %g2
+	ldx	[%g2 + 0x08], %o1
+	call	%o1
+	 add	%sp, (2047 + 128), %o0
+
+	ba,a,pt	%xcc, 2f
+
+1:	sethi	%hi(sparc64_ttable_tl0), %o0
+	set	prom_set_trap_table_name, %g2
+	stx	%g2, [%sp + 2047 + 128 + 0x00]
+	mov	1, %g2
+	stx	%g2, [%sp + 2047 + 128 + 0x08]
+	mov	0, %g2
+	stx	%g2, [%sp + 2047 + 128 + 0x10]
+	stx	%o0, [%sp + 2047 + 128 + 0x18]
+	sethi	%hi(p1275buf), %g2
+	or	%g2, %lo(p1275buf), %g2
+	ldx	[%g2 + 0x08], %o1
+	call	%o1
+	 add	%sp, (2047 + 128), %o0
+
+	/* Start using proper page size encodings in ctx register.  */
+2:	sethi	%hi(sparc64_kern_pri_context), %g3
+	ldx	[%g3 + %lo(sparc64_kern_pri_context)], %g2
+
+	mov		PRIMARY_CONTEXT, %g1
+
+661:	stxa		%g2, [%g1] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g2, [%g1] ASI_MMU
+	.previous
+
+	membar	#Sync
+
+	BRANCH_IF_SUN4V(o2, 1f)
+
+	/* Kill PROM timer */
+	sethi	%hi(0x80000000), %o2
+	sllx	%o2, 32, %o2
+	wr	%o2, 0, %tick_cmpr
+
+	BRANCH_IF_ANY_CHEETAH(o2, o3, 1f)
+
+	ba,a,pt	%xcc, 2f
+
+	/* Disable STICK_INT interrupts. */
+1:
+	sethi	%hi(0x80000000), %o2
+	sllx	%o2, 32, %o2
+	wr	%o2, %asr25
+
+2:
+	wrpr	%g0, %g0, %wstate
+
+	call	init_irqwork_curcpu
+	 nop
+
+	/* Now we can restore interrupt state. */
+	wrpr	%l0, 0, %pstate
+	wrpr	%l1, 0x0, %pil
+
+	ret
+	 restore
+
+	.globl	setup_tba
+setup_tba:
+	save	%sp, -192, %sp
+
+	/* The boot processor is the only cpu which invokes this
+	 * routine, the other cpus set things up via trampoline.S.
+	 * So save the OBP trap table address here.
+	 */
+	rdpr	%tba, %g7
+	sethi	%hi(prom_tba), %o1
+	or	%o1, %lo(prom_tba), %o1
+	stx	%g7, [%o1]
+
+	call	setup_trap_table
+	 nop
+
+	ret
+	 restore
+sparc64_boot_end:
+
+#include "etrap_64.S"
+#include "rtrap_64.S"
+#include "winfixup.S"
+#include "fpu_traps.S"
+#include "ivec.S"
+#include "getsetcc.S"
+#include "utrap.S"
+#include "spiterrs.S"
+#include "cherrs.S"
+#include "misctrap.S"
+#include "syscalls.S"
+#include "helpers.S"
+#include "sun4v_tlb_miss.S"
+#include "sun4v_mcd.S"
+#include "sun4v_ivec.S"
+#include "ktlb.S"
+#include "tsb.S"
+
+/*
+ * The following skip makes sure the trap table in ttable.S is aligned
+ * on a 32K boundary as required by the v9 specs for TBA register.
+ *
+ * We align to a 32K boundary, then we have the 32K kernel TSB,
+ * the 64K kernel 4MB TSB, and then the 32K aligned trap table.
+ */
+1:
+	.skip	0x4000 + _start - 1b
+
+! 0x0000000000408000
+
+	.globl	swapper_tsb
+swapper_tsb:
+	.skip	(32 * 1024)
+
+	.globl	swapper_4m_tsb
+swapper_4m_tsb:
+	.skip	(64 * 1024)
+
+! 0x0000000000420000
+
+	/* Some care needs to be exercised if you try to move the
+	 * location of the trap table relative to other things.  For
+	 * one thing there are br* instructions in some of the
+	 * trap table entires which branch back to code in ktlb.S
+	 * Those instructions can only handle a signed 16-bit
+	 * displacement.
+	 *
+	 * There is a binutils bug (bugzilla #4558) which causes
+	 * the relocation overflow checks for such instructions to
+	 * not be done correctly.  So bintuils will not notice the
+	 * error and will instead write junk into the relocation and
+	 * you'll have an unbootable kernel.
+	 */
+#include "ttable_64.S"
+
+! 0x0000000000428000
+
+#include "hvcalls.S"
+#include "systbls_64.S"
+
+	.data
+	.align	8
+	.globl	prom_tba, tlb_type
+prom_tba:	.xword	0
+tlb_type:	.word	0	/* Must NOT end up in BSS */
+EXPORT_SYMBOL(tlb_type)
+	.section	".fixup",#alloc,#execinstr
+
+ENTRY(__retl_efault)
+	retl
+	 mov	-EFAULT, %o0
+ENDPROC(__retl_efault)
+
+ENTRY(__retl_o1)
+	retl
+	 mov	%o1, %o0
+ENDPROC(__retl_o1)
+
+ENTRY(__retl_o1_asi)
+	wr      %o5, 0x0, %asi
+	retl
+	 mov    %o1, %o0
+ENDPROC(__retl_o1_asi)
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
new file mode 100644
index 0000000..e4e5b83
--- /dev/null
+++ b/arch/sparc/kernel/helpers.S
@@ -0,0 +1,65 @@
+	.align	32
+	.globl	__flushw_user
+	.type	__flushw_user,#function
+__flushw_user:
+	rdpr	%otherwin, %g1
+	brz,pn	%g1, 2f
+	 clr	%g2
+1:	save	%sp, -128, %sp
+	rdpr	%otherwin, %g1
+	brnz,pt	%g1, 1b
+	 add	%g2, 1, %g2
+1:	sub	%g2, 1, %g2
+	brnz,pt	%g2, 1b
+	 restore %g0, %g0, %g0
+2:	retl
+	 nop
+	.size	__flushw_user,.-__flushw_user
+EXPORT_SYMBOL(__flushw_user)
+
+	/* Flush %fp and %i7 to the stack for all register
+	 * windows active inside of the cpu.  This allows
+	 * show_stack_trace() to avoid using an expensive
+	 * 'flushw'.
+	 */
+	.globl		stack_trace_flush
+	.type		stack_trace_flush,#function
+stack_trace_flush:
+	rdpr		%pstate, %o0
+	wrpr		%o0, PSTATE_IE, %pstate
+
+	rdpr		%cwp, %g1
+	rdpr		%canrestore, %g2
+	sub		%g1, 1, %g3
+
+1:	brz,pn		%g2, 2f
+	 sub		%g2, 1, %g2
+	wrpr		%g3, %cwp
+	stx		%fp, [%sp + STACK_BIAS + RW_V9_I6]
+	stx		%i7, [%sp + STACK_BIAS + RW_V9_I7]
+	ba,pt		%xcc, 1b
+	 sub		%g3, 1, %g3
+
+2:	wrpr		%g1, %cwp
+	wrpr		%o0, %pstate
+
+	retl
+	 nop
+	.size		stack_trace_flush,.-stack_trace_flush
+
+#ifdef CONFIG_SMP
+	.globl		hard_smp_processor_id
+	.type		hard_smp_processor_id,#function
+hard_smp_processor_id:
+#endif
+	.globl		real_hard_smp_processor_id
+	.type		real_hard_smp_processor_id,#function
+real_hard_smp_processor_id:
+	__GET_CPUID(%o0)
+	retl
+	 nop
+#ifdef CONFIG_SMP
+	.size		hard_smp_processor_id,.-hard_smp_processor_id
+#endif
+	.size		real_hard_smp_processor_id,.-real_hard_smp_processor_id
+EXPORT_SYMBOL_GPL(real_hard_smp_processor_id)
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
new file mode 100644
index 0000000..717ec7e
--- /dev/null
+++ b/arch/sparc/kernel/hvapi.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0
+/* hvapi.c: Hypervisor API management.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+#include <asm/hypervisor.h>
+#include <asm/oplib.h>
+
+/* If the hypervisor indicates that the API setting
+ * calls are unsupported, by returning HV_EBADTRAP or
+ * HV_ENOTSUPPORTED, we assume that API groups with the
+ * PRE_API flag set are major 1 minor 0.
+ */
+struct api_info {
+	unsigned long group;
+	unsigned long major;
+	unsigned long minor;
+	unsigned int refcnt;
+	unsigned int flags;
+#define FLAG_PRE_API		0x00000001
+};
+
+static struct api_info api_table[] = {
+	{ .group = HV_GRP_SUN4V,	.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_CORE,		.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_INTR,					},
+	{ .group = HV_GRP_SOFT_STATE,				},
+	{ .group = HV_GRP_TM,					},
+	{ .group = HV_GRP_PCI,		.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_LDOM,					},
+	{ .group = HV_GRP_SVC_CHAN,	.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_NCS,		.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_RNG,					},
+	{ .group = HV_GRP_PBOOT,				},
+	{ .group = HV_GRP_TPM,					},
+	{ .group = HV_GRP_SDIO,					},
+	{ .group = HV_GRP_SDIO_ERR,				},
+	{ .group = HV_GRP_REBOOT_DATA,				},
+	{ .group = HV_GRP_ATU,		.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_DAX,					},
+	{ .group = HV_GRP_NIAG_PERF,	.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_FIRE_PERF,				},
+	{ .group = HV_GRP_N2_CPU,				},
+	{ .group = HV_GRP_NIU,					},
+	{ .group = HV_GRP_VF_CPU,				},
+	{ .group = HV_GRP_KT_CPU,				},
+	{ .group = HV_GRP_VT_CPU,				},
+	{ .group = HV_GRP_T5_CPU,				},
+	{ .group = HV_GRP_DIAG,		.flags = FLAG_PRE_API	},
+	{ .group = HV_GRP_M7_PERF,				},
+};
+
+static DEFINE_SPINLOCK(hvapi_lock);
+
+static struct api_info *__get_info(unsigned long group)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(api_table); i++) {
+		if (api_table[i].group == group)
+			return &api_table[i];
+	}
+	return NULL;
+}
+
+static void __get_ref(struct api_info *p)
+{
+	p->refcnt++;
+}
+
+static void __put_ref(struct api_info *p)
+{
+	if (--p->refcnt == 0) {
+		unsigned long ignore;
+
+		sun4v_set_version(p->group, 0, 0, &ignore);
+		p->major = p->minor = 0;
+	}
+}
+
+/* Register a hypervisor API specification.  It indicates the
+ * API group and desired major+minor.
+ *
+ * If an existing API registration exists '0' (success) will
+ * be returned if it is compatible with the one being registered.
+ * Otherwise a negative error code will be returned.
+ *
+ * Otherwise an attempt will be made to negotiate the requested
+ * API group/major/minor with the hypervisor, and errors returned
+ * if that does not succeed.
+ */
+int sun4v_hvapi_register(unsigned long group, unsigned long major,
+			 unsigned long *minor)
+{
+	struct api_info *p;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&hvapi_lock, flags);
+	p = __get_info(group);
+	ret = -EINVAL;
+	if (p) {
+		if (p->refcnt) {
+			ret = -EINVAL;
+			if (p->major == major) {
+				*minor = p->minor;
+				ret = 0;
+			}
+		} else {
+			unsigned long actual_minor;
+			unsigned long hv_ret;
+
+			hv_ret = sun4v_set_version(group, major, *minor,
+						   &actual_minor);
+			ret = -EINVAL;
+			if (hv_ret == HV_EOK) {
+				*minor = actual_minor;
+				p->major = major;
+				p->minor = actual_minor;
+				ret = 0;
+			} else if (hv_ret == HV_EBADTRAP ||
+				   hv_ret == HV_ENOTSUPPORTED) {
+				if (p->flags & FLAG_PRE_API) {
+					if (major == 1) {
+						p->major = 1;
+						p->minor = 0;
+						*minor = 0;
+						ret = 0;
+					}
+				}
+			}
+		}
+
+		if (ret == 0)
+			__get_ref(p);
+	}
+	spin_unlock_irqrestore(&hvapi_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(sun4v_hvapi_register);
+
+void sun4v_hvapi_unregister(unsigned long group)
+{
+	struct api_info *p;
+	unsigned long flags;
+
+	spin_lock_irqsave(&hvapi_lock, flags);
+	p = __get_info(group);
+	if (p)
+		__put_ref(p);
+	spin_unlock_irqrestore(&hvapi_lock, flags);
+}
+EXPORT_SYMBOL(sun4v_hvapi_unregister);
+
+int sun4v_hvapi_get(unsigned long group,
+		    unsigned long *major,
+		    unsigned long *minor)
+{
+	struct api_info *p;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&hvapi_lock, flags);
+	ret = -EINVAL;
+	p = __get_info(group);
+	if (p && p->refcnt) {
+		*major = p->major;
+		*minor = p->minor;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&hvapi_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL(sun4v_hvapi_get);
+
+void __init sun4v_hvapi_init(void)
+{
+	unsigned long group, major, minor;
+
+	group = HV_GRP_SUN4V;
+	major = 1;
+	minor = 0;
+	if (sun4v_hvapi_register(group, major, &minor))
+		goto bad;
+
+	group = HV_GRP_CORE;
+	major = 1;
+	minor = 6;
+	if (sun4v_hvapi_register(group, major, &minor))
+		goto bad;
+
+	return;
+
+bad:
+	prom_printf("HVAPI: Cannot register API group "
+		    "%lx with major(%lu) minor(%lu)\n",
+		    group, major, minor);
+	prom_halt();
+}
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
new file mode 100644
index 0000000..2f865a4
--- /dev/null
+++ b/arch/sparc/kernel/hvcalls.S
@@ -0,0 +1,930 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	/* %o0: devhandle
+	 * %o1:	devino
+	 *
+	 * returns %o0: sysino
+	 */
+ENTRY(sun4v_devino_to_sysino)
+	mov	HV_FAST_INTR_DEVINO2SYSINO, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o1, %o0
+ENDPROC(sun4v_devino_to_sysino)
+
+	/* %o0: sysino
+	 *
+	 * returns %o0: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+	 */
+ENTRY(sun4v_intr_getenabled)
+	mov	HV_FAST_INTR_GETENABLED, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o1, %o0
+ENDPROC(sun4v_intr_getenabled)
+
+	/* %o0: sysino
+	 * %o1: intr_enabled (HV_INTR_{DISABLED,ENABLED})
+	 */
+ENTRY(sun4v_intr_setenabled)
+	mov	HV_FAST_INTR_SETENABLED, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_intr_setenabled)
+
+	/* %o0: sysino
+	 *
+	 * returns %o0: intr_state (HV_INTR_STATE_*)
+	 */
+ENTRY(sun4v_intr_getstate)
+	mov	HV_FAST_INTR_GETSTATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o1, %o0
+ENDPROC(sun4v_intr_getstate)
+
+	/* %o0: sysino
+	 * %o1: intr_state (HV_INTR_STATE_*)
+	 */
+ENTRY(sun4v_intr_setstate)
+	mov	HV_FAST_INTR_SETSTATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_intr_setstate)
+
+	/* %o0: sysino
+	 *
+	 * returns %o0: cpuid
+	 */
+ENTRY(sun4v_intr_gettarget)
+	mov	HV_FAST_INTR_GETTARGET, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o1, %o0
+ENDPROC(sun4v_intr_gettarget)
+
+	/* %o0: sysino
+	 * %o1: cpuid
+	 */
+ENTRY(sun4v_intr_settarget)
+	mov	HV_FAST_INTR_SETTARGET, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_intr_settarget)
+
+	/* %o0:	cpuid
+	 * %o1: pc
+	 * %o2:	rtba
+	 * %o3:	arg0
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_cpu_start)
+	mov	HV_FAST_CPU_START, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_cpu_start)
+
+	/* %o0:	cpuid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_cpu_stop)
+	mov	HV_FAST_CPU_STOP, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_cpu_stop)
+
+	/* returns %o0:	status  */
+ENTRY(sun4v_cpu_yield)
+	mov	HV_FAST_CPU_YIELD, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_cpu_yield)
+
+	/* %o0: cpuid
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_cpu_poke)
+	mov     HV_FAST_CPU_POKE, %o5
+	ta      HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_cpu_poke)
+
+	/* %o0:	type
+	 * %o1:	queue paddr
+	 * %o2:	num queue entries
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_cpu_qconf)
+	mov	HV_FAST_CPU_QCONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_cpu_qconf)
+
+	/* %o0:	num cpus in cpu list
+	 * %o1:	cpu list paddr
+	 * %o2:	mondo block paddr
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_cpu_mondo_send)
+	mov	HV_FAST_CPU_MONDO_SEND, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_cpu_mondo_send)
+
+	/* %o0:	CPU ID
+	 *
+	 * returns %o0:	-status if status non-zero, else
+	 *         %o0:	cpu state as HV_CPU_STATE_*
+	 */
+ENTRY(sun4v_cpu_state)
+	mov	HV_FAST_CPU_STATE, %o5
+	ta	HV_FAST_TRAP
+	brnz,pn	%o0, 1f
+	 sub	%g0, %o0, %o0
+	mov	%o1, %o0
+1:	retl
+	 nop
+ENDPROC(sun4v_cpu_state)
+
+	/* %o0: virtual address
+	 * %o1: must be zero
+	 * %o2: TTE
+	 * %o3: HV_MMU_* flags
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_mmu_map_perm_addr)
+	mov	HV_FAST_MMU_MAP_PERM_ADDR, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_mmu_map_perm_addr)
+
+	/* %o0: number of TSB descriptions
+	 * %o1: TSB descriptions real address
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_mmu_tsb_ctx0)
+	mov	HV_FAST_MMU_TSB_CTX0, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_mmu_tsb_ctx0)
+
+	/* %o0:	API group number
+	 * %o1: pointer to unsigned long major number storage
+	 * %o2: pointer to unsigned long minor number storage
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_get_version)
+	mov	HV_CORE_GET_VER, %o5
+	mov	%o1, %o3
+	mov	%o2, %o4
+	ta	HV_CORE_TRAP
+	stx	%o1, [%o3]
+	retl
+	 stx	%o2, [%o4]
+ENDPROC(sun4v_get_version)
+
+	/* %o0: API group number
+	 * %o1: desired major number
+	 * %o2: desired minor number
+	 * %o3: pointer to unsigned long actual minor number storage
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_set_version)
+	mov	HV_CORE_SET_VER, %o5
+	mov	%o3, %o4
+	ta	HV_CORE_TRAP
+	retl
+	 stx	%o1, [%o4]
+ENDPROC(sun4v_set_version)
+
+	/* %o0: pointer to unsigned long time
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_tod_get)
+	mov	%o0, %o4
+	mov	HV_FAST_TOD_GET, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_tod_get)
+
+	/* %o0: time
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_tod_set)
+	mov	HV_FAST_TOD_SET, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_tod_set)
+
+	/* %o0: pointer to unsigned long status
+	 *
+	 * returns %o0: signed character
+	 */
+ENTRY(sun4v_con_getchar)
+	mov	%o0, %o4
+	mov	HV_FAST_CONS_GETCHAR, %o5
+	clr	%o0
+	clr	%o1
+	ta	HV_FAST_TRAP
+	stx	%o0, [%o4]
+	retl
+	 sra	%o1, 0, %o0
+ENDPROC(sun4v_con_getchar)
+
+	/* %o0: signed long character
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_con_putchar)
+	mov	HV_FAST_CONS_PUTCHAR, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 sra	%o0, 0, %o0
+ENDPROC(sun4v_con_putchar)
+
+	/* %o0: buffer real address
+	 * %o1: buffer size
+	 * %o2: pointer to unsigned long bytes_read
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_con_read)
+	mov	%o2, %o4
+	mov	HV_FAST_CONS_READ, %o5
+	ta	HV_FAST_TRAP
+	brnz	%o0, 1f
+	 cmp	%o1, -1		/* break */
+	be,a,pn	%icc, 1f
+	 mov	%o1, %o0
+	cmp	%o1, -2		/* hup */
+	be,a,pn	%icc, 1f
+	 mov	%o1, %o0
+	stx	%o1, [%o4]
+1:	retl
+	 nop
+ENDPROC(sun4v_con_read)
+
+	/* %o0: buffer real address
+	 * %o1: buffer size
+	 * %o2: pointer to unsigned long bytes_written
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_con_write)
+	mov	%o2, %o4
+	mov	HV_FAST_CONS_WRITE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_con_write)
+
+	/* %o0:	soft state
+	 * %o1:	address of description string
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_mach_set_soft_state)
+	mov	HV_FAST_MACH_SET_SOFT_STATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_mach_set_soft_state)
+
+	/* %o0: exit code
+	 *
+	 * Does not return.
+	 */
+ENTRY(sun4v_mach_exit)
+	mov	HV_FAST_MACH_EXIT, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_mach_exit)
+
+	/* %o0: buffer real address
+	 * %o1: buffer length
+	 * %o2: pointer to unsigned long real_buf_len
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_mach_desc)
+	mov	%o2, %o4
+	mov	HV_FAST_MACH_DESC, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_mach_desc)
+
+	/* %o0: new timeout in milliseconds
+	 * %o1: pointer to unsigned long orig_timeout
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_mach_set_watchdog)
+	mov	%o1, %o4
+	mov	HV_FAST_MACH_SET_WATCHDOG, %o5
+	ta	HV_FAST_TRAP
+	brnz,a,pn %o4, 0f
+	stx	%o1, [%o4]
+0:	retl
+	 nop
+ENDPROC(sun4v_mach_set_watchdog)
+EXPORT_SYMBOL(sun4v_mach_set_watchdog)
+
+	/* No inputs and does not return.  */
+ENTRY(sun4v_mach_sir)
+	mov	%o1, %o4
+	mov	HV_FAST_MACH_SIR, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_mach_sir)
+
+	/* %o0: channel
+	 * %o1:	ra
+	 * %o2:	num_entries
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_tx_qconf)
+	mov	HV_FAST_LDC_TX_QCONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ldc_tx_qconf)
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long ra
+	 * %o2:	pointer to unsigned long num_entries
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_tx_qinfo)
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	HV_FAST_LDC_TX_QINFO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+ENDPROC(sun4v_ldc_tx_qinfo)
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long head_off
+	 * %o2:	pointer to unsigned long tail_off
+	 * %o2:	pointer to unsigned long chan_state
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_tx_get_state)
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	%o3, %g3
+	mov	HV_FAST_LDC_TX_GET_STATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	stx	%o3, [%g3]
+	retl
+	 nop
+ENDPROC(sun4v_ldc_tx_get_state)
+
+	/* %o0: channel
+	 * %o1:	tail_off
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_tx_set_qtail)
+	mov	HV_FAST_LDC_TX_SET_QTAIL, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ldc_tx_set_qtail)
+
+	/* %o0: channel
+	 * %o1:	ra
+	 * %o2:	num_entries
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_rx_qconf)
+	mov	HV_FAST_LDC_RX_QCONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ldc_rx_qconf)
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long ra
+	 * %o2:	pointer to unsigned long num_entries
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_rx_qinfo)
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	HV_FAST_LDC_RX_QINFO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+ENDPROC(sun4v_ldc_rx_qinfo)
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long head_off
+	 * %o2:	pointer to unsigned long tail_off
+	 * %o2:	pointer to unsigned long chan_state
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_rx_get_state)
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	%o3, %g3
+	mov	HV_FAST_LDC_RX_GET_STATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	stx	%o3, [%g3]
+	retl
+	 nop
+ENDPROC(sun4v_ldc_rx_get_state)
+
+	/* %o0: channel
+	 * %o1:	head_off
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_rx_set_qhead)
+	mov	HV_FAST_LDC_RX_SET_QHEAD, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ldc_rx_set_qhead)
+
+	/* %o0: channel
+	 * %o1:	ra
+	 * %o2:	num_entries
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_ldc_set_map_table)
+	mov	HV_FAST_LDC_SET_MAP_TABLE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ldc_set_map_table)
+
+	/* %o0: channel
+	 * %o1:	pointer to unsigned long ra
+	 * %o2:	pointer to unsigned long num_entries
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_ldc_get_map_table)
+	mov	%o1, %g1
+	mov	%o2, %g2
+	mov	HV_FAST_LDC_GET_MAP_TABLE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+ENDPROC(sun4v_ldc_get_map_table)
+
+	/* %o0:	channel
+	 * %o1:	dir_code
+	 * %o2:	tgt_raddr
+	 * %o3:	lcl_raddr
+	 * %o4:	len
+	 * %o5:	pointer to unsigned long actual_len
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_copy)
+	mov	%o5, %g1
+	mov	HV_FAST_LDC_COPY, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+ENDPROC(sun4v_ldc_copy)
+
+	/* %o0:	channel
+	 * %o1:	cookie
+	 * %o2:	pointer to unsigned long ra
+	 * %o3:	pointer to unsigned long perm
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_mapin)
+	mov	%o2, %g1
+	mov	%o3, %g2
+	mov	HV_FAST_LDC_MAPIN, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	stx	%o2, [%g2]
+	retl
+	 nop
+ENDPROC(sun4v_ldc_mapin)
+
+	/* %o0:	ra
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_unmap)
+	mov	HV_FAST_LDC_UNMAP, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ldc_unmap)
+
+	/* %o0: channel
+	 * %o1:	cookie
+	 * %o2:	mte_cookie
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ldc_revoke)
+	mov	HV_FAST_LDC_REVOKE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ldc_revoke)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long cookie
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_get_cookie)
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_COOKIE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+ENDPROC(sun4v_vintr_get_cookie)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	cookie
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_set_cookie)
+	mov	HV_FAST_VINTR_SET_COOKIE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_vintr_set_cookie)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long valid_state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_get_valid)
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_VALID, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+ENDPROC(sun4v_vintr_get_valid)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	valid_state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_set_valid)
+	mov	HV_FAST_VINTR_SET_VALID, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_vintr_set_valid)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_get_state)
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_STATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+ENDPROC(sun4v_vintr_get_state)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_set_state)
+	mov	HV_FAST_VINTR_SET_STATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_vintr_set_state)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	pointer to unsigned long cpuid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_get_target)
+	mov	%o2, %g1
+	mov	HV_FAST_VINTR_GET_TARGET, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%g1]
+	retl
+	 nop
+ENDPROC(sun4v_vintr_get_target)
+
+	/* %o0: device handle
+	 * %o1:	device INO
+	 * %o2:	cpuid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(sun4v_vintr_set_target)
+	mov	HV_FAST_VINTR_SET_TARGET, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_vintr_set_target)
+
+	/* %o0: NCS sub-function
+	 * %o1:	sub-function arg real-address
+	 * %o2:	sub-function arg size
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(sun4v_ncs_request)
+	mov	HV_FAST_NCS_REQUEST, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_ncs_request)
+
+ENTRY(sun4v_svc_send)
+	save	%sp, -192, %sp
+	mov	%i0, %o0
+	mov	%i1, %o1
+	mov	%i2, %o2
+	mov	HV_FAST_SVC_SEND, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%i3]
+	ret
+	restore
+ENDPROC(sun4v_svc_send)
+
+ENTRY(sun4v_svc_recv)
+	save	%sp, -192, %sp
+	mov	%i0, %o0
+	mov	%i1, %o1
+	mov	%i2, %o2
+	mov	HV_FAST_SVC_RECV, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%i3]
+	ret
+	restore
+ENDPROC(sun4v_svc_recv)
+
+ENTRY(sun4v_svc_getstatus)
+	mov	HV_FAST_SVC_GETSTATUS, %o5
+	mov	%o1, %o4
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_svc_getstatus)
+
+ENTRY(sun4v_svc_setstatus)
+	mov	HV_FAST_SVC_SETSTATUS, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_svc_setstatus)
+
+ENTRY(sun4v_svc_clrstatus)
+	mov	HV_FAST_SVC_CLRSTATUS, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_svc_clrstatus)
+
+ENTRY(sun4v_mmustat_conf)
+	mov	%o1, %o4
+	mov	HV_FAST_MMUSTAT_CONF, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_mmustat_conf)
+
+ENTRY(sun4v_mmustat_info)
+	mov	%o0, %o4
+	mov	HV_FAST_MMUSTAT_INFO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_mmustat_info)
+
+ENTRY(sun4v_mmu_demap_all)
+	clr	%o0
+	clr	%o1
+	mov	HV_MMU_ALL, %o2
+	mov	HV_FAST_MMU_DEMAP_ALL, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_mmu_demap_all)
+
+ENTRY(sun4v_niagara_getperf)
+	mov	%o0, %o4
+	mov	HV_FAST_GET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_niagara_getperf)
+EXPORT_SYMBOL(sun4v_niagara_getperf)
+
+ENTRY(sun4v_niagara_setperf)
+	mov	HV_FAST_SET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_niagara_setperf)
+EXPORT_SYMBOL(sun4v_niagara_setperf)
+
+ENTRY(sun4v_niagara2_getperf)
+	mov	%o0, %o4
+	mov	HV_FAST_N2_GET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_niagara2_getperf)
+EXPORT_SYMBOL(sun4v_niagara2_getperf)
+
+ENTRY(sun4v_niagara2_setperf)
+	mov	HV_FAST_N2_SET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_niagara2_setperf)
+EXPORT_SYMBOL(sun4v_niagara2_setperf)
+
+ENTRY(sun4v_reboot_data_set)
+	mov	HV_FAST_REBOOT_DATA_SET, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_reboot_data_set)
+
+ENTRY(sun4v_vt_get_perfreg)
+	mov	%o1, %o4
+	mov	HV_FAST_VT_GET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_vt_get_perfreg)
+
+ENTRY(sun4v_vt_set_perfreg)
+	mov	HV_FAST_VT_SET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_vt_set_perfreg)
+
+ENTRY(sun4v_t5_get_perfreg)
+	mov	%o1, %o4
+	mov	HV_FAST_T5_GET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_t5_get_perfreg)
+
+ENTRY(sun4v_t5_set_perfreg)
+	mov	HV_FAST_T5_SET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_t5_set_perfreg)
+
+ENTRY(sun4v_m7_get_perfreg)
+	mov	%o1, %o4
+	mov	HV_FAST_M7_GET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	nop
+ENDPROC(sun4v_m7_get_perfreg)
+
+ENTRY(sun4v_m7_set_perfreg)
+	mov	HV_FAST_M7_SET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	retl
+	nop
+ENDPROC(sun4v_m7_set_perfreg)
+
+	/* %o0: address of CCB array
+	 * %o1: size (in bytes) of CCB array
+	 * %o2: flags
+	 * %o3: reserved
+	 *
+	 * returns:
+	 * %o0: status
+	 * %o1: size (in bytes) of the CCB array that was accepted
+	 * %o2: status data
+	 * %o3: reserved
+	 */
+ENTRY(sun4v_ccb_submit)
+	mov	%o5, %g1
+	mov	HV_CCB_SUBMIT, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 stx	%o2, [%g1]
+ENDPROC(sun4v_ccb_submit)
+EXPORT_SYMBOL(sun4v_ccb_submit)
+
+	/* %o0: completion area ra for the ccb to get info
+	 *
+	 * returns:
+	 * %o0: status
+	 * %o1: CCB state
+	 * %o2: position
+	 * %o3: dax unit
+	 * %o4: queue
+	 */
+ENTRY(sun4v_ccb_info)
+	mov	%o1, %g1
+	mov	HV_CCB_INFO, %o5
+	ta	HV_FAST_TRAP
+	sth	%o1, [%g1 + CCB_INFO_OFFSET_CCB_STATE]
+	sth	%o2, [%g1 + CCB_INFO_OFFSET_QUEUE_POS]
+	sth	%o3, [%g1 + CCB_INFO_OFFSET_DAX_UNIT]
+	retl
+	 sth	%o4, [%g1 + CCB_INFO_OFFSET_QUEUE_NUM]
+ENDPROC(sun4v_ccb_info)
+EXPORT_SYMBOL(sun4v_ccb_info)
+
+	/* %o0: completion area ra for the ccb to kill
+	 *
+	 * returns:
+	 * %o0: status
+	 * %o1: result of the kill
+	 */
+ENTRY(sun4v_ccb_kill)
+	mov	%o1, %g1
+	mov	HV_CCB_KILL, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 sth	%o1, [%g1]
+ENDPROC(sun4v_ccb_kill)
+EXPORT_SYMBOL(sun4v_ccb_kill)
diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S
new file mode 100644
index 0000000..f392204
--- /dev/null
+++ b/arch/sparc/kernel/hvtramp.S
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* hvtramp.S: Hypervisor start-cpu trampoline code.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+
+#include <asm/thread_info.h>
+#include <asm/hypervisor.h>
+#include <asm/scratchpad.h>
+#include <asm/spitfire.h>
+#include <asm/hvtramp.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/pil.h>
+
+	.align		8
+	.globl		hv_cpu_startup, hv_cpu_startup_end
+
+	/* This code executes directly out of the hypervisor
+	 * with physical addressing (va==pa).  %o0 contains
+	 * our client argument which for Linux points to
+	 * a descriptor data structure which defines the
+	 * MMU entries we need to load up.
+	 *
+	 * After we set things up we enable the MMU and call
+	 * into the kernel.
+	 *
+	 * First setup basic privileged cpu state.
+	 */
+hv_cpu_startup:
+	SET_GL(0)
+	wrpr		%g0, PIL_NORMAL_MAX, %pil
+	wrpr		%g0, 0, %canrestore
+	wrpr		%g0, 0, %otherwin
+	wrpr		%g0, 6, %cansave
+	wrpr		%g0, 6, %cleanwin
+	wrpr		%g0, 0, %cwp
+	wrpr		%g0, 0, %wstate
+	wrpr		%g0, 0, %tl
+
+	sethi		%hi(sparc64_ttable_tl0), %g1
+	wrpr		%g1, %tba
+
+	mov		%o0, %l0
+
+	lduw		[%l0 + HVTRAMP_DESCR_CPU], %g1
+	mov		SCRATCHPAD_CPUID, %g2
+	stxa		%g1, [%g2] ASI_SCRATCHPAD
+
+	ldx		[%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
+	stxa		%g2, [%g0] ASI_SCRATCHPAD
+
+	mov		0, %l1
+	lduw		[%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
+	add		%l0, HVTRAMP_DESCR_MAPS, %l3
+
+1:	ldx		[%l3 + HVTRAMP_MAPPING_VADDR], %o0
+	clr		%o1
+	ldx		[%l3 + HVTRAMP_MAPPING_TTE], %o2
+	mov		HV_MMU_IMMU | HV_MMU_DMMU, %o3
+	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
+	ta		HV_FAST_TRAP
+
+	brnz,pn		%o0, 80f
+	 nop
+
+	add		%l1, 1, %l1
+	cmp		%l1, %l2
+	blt,a,pt	%xcc, 1b
+	 add		%l3, HVTRAMP_MAPPING_SIZE, %l3
+
+	ldx		[%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
+	mov		HV_FAST_MMU_FAULT_AREA_CONF, %o5
+	ta		HV_FAST_TRAP
+
+	brnz,pn		%o0, 80f
+	 nop
+
+	wrpr		%g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
+
+	ldx		[%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
+
+	mov		1, %o0
+	set		1f, %o1
+	mov		HV_FAST_MMU_ENABLE, %o5
+	ta		HV_FAST_TRAP
+
+	ba,pt		%xcc, 80f
+	 nop
+
+1:
+	wr		%g0, 0, %fprs
+	wr		%g0, ASI_P, %asi
+
+	mov		PRIMARY_CONTEXT, %g7
+	stxa		%g0, [%g7] ASI_MMU
+	membar		#Sync
+
+	mov		SECONDARY_CONTEXT, %g7
+	stxa		%g0, [%g7] ASI_MMU
+	membar		#Sync
+
+	mov		%l6, %g6
+	ldx		[%g6 + TI_TASK], %g4
+
+	mov		1, %g5
+	sllx		%g5, THREAD_SHIFT, %g5
+	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+	add		%g6, %g5, %sp
+
+	call		init_irqwork_curcpu
+	 nop
+	call		hard_smp_processor_id
+	 nop
+
+	call		sun4v_register_mondo_queues
+	 nop
+
+	call		init_cur_cpu_trap
+	 mov		%g6, %o0
+
+	wrpr		%g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
+
+	call		smp_callin
+	 nop
+
+	call		cpu_panic
+	 nop
+
+80:	ba,pt		%xcc, 80b
+	 nop
+
+	.align		8
+hv_cpu_startup_end:
diff --git a/arch/sparc/kernel/idprom.c b/arch/sparc/kernel/idprom.c
new file mode 100644
index 0000000..d6c46d5
--- /dev/null
+++ b/arch/sparc/kernel/idprom.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * idprom.c: Routines to load the idprom into kernel addresses and
+ *           interpret the data contained within.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/etherdevice.h>
+
+#include <asm/oplib.h>
+#include <asm/idprom.h>
+
+struct idprom *idprom;
+EXPORT_SYMBOL(idprom);
+
+static struct idprom idprom_buffer;
+
+#ifdef CONFIG_SPARC32
+#include <asm/machines.h>  /* Fun with Sun released architectures. */
+
+/* Here is the master table of Sun machines which use some implementation
+ * of the Sparc CPU and have a meaningful IDPROM machtype value that we
+ * know about.  See asm-sparc/machines.h for empirical constants.
+ */
+static struct Sun_Machine_Models Sun_Machines[] = {
+/* First, Leon */
+{ .name = "Leon3 System-on-a-Chip",  .id_machtype = (M_LEON | M_LEON3_SOC) },
+/* Finally, early Sun4m's */
+{ .name = "Sun4m SparcSystem600",    .id_machtype = (SM_SUN4M | SM_4M_SS60) },
+{ .name = "Sun4m SparcStation10/20", .id_machtype = (SM_SUN4M | SM_4M_SS50) },
+{ .name = "Sun4m SparcStation5",     .id_machtype = (SM_SUN4M | SM_4M_SS40) },
+/* One entry for the OBP arch's which are sun4d, sun4e, and newer sun4m's */
+{ .name = "Sun4M OBP based system",  .id_machtype = (SM_SUN4M_OBP | 0x0) } };
+
+static void __init display_system_type(unsigned char machtype)
+{
+	char sysname[128];
+	register int i;
+
+	for (i = 0; i < ARRAY_SIZE(Sun_Machines); i++) {
+		if (Sun_Machines[i].id_machtype == machtype) {
+			if (machtype != (SM_SUN4M_OBP | 0x00) ||
+			    prom_getproperty(prom_root_node, "banner-name",
+					     sysname, sizeof(sysname)) <= 0)
+				printk(KERN_WARNING "TYPE: %s\n",
+				       Sun_Machines[i].name);
+			else
+				printk(KERN_WARNING "TYPE: %s\n", sysname);
+			return;
+		}
+	}
+
+	prom_printf("IDPROM: Warning, bogus id_machtype value, 0x%x\n", machtype);
+}
+#else
+static void __init display_system_type(unsigned char machtype)
+{
+}
+#endif
+
+unsigned char *arch_get_platform_mac_address(void)
+{
+	return idprom->id_ethaddr;
+}
+
+/* Calculate the IDPROM checksum (xor of the data bytes). */
+static unsigned char __init calc_idprom_cksum(struct idprom *idprom)
+{
+	unsigned char cksum, i, *ptr = (unsigned char *)idprom;
+
+	for (i = cksum = 0; i <= 0x0E; i++)
+		cksum ^= *ptr++;
+
+	return cksum;
+}
+
+/* Create a local IDPROM copy, verify integrity, and display information. */
+void __init idprom_init(void)
+{
+	prom_get_idprom((char *) &idprom_buffer, sizeof(idprom_buffer));
+
+	idprom = &idprom_buffer;
+
+	if (idprom->id_format != 0x01)
+		prom_printf("IDPROM: Warning, unknown format type!\n");
+
+	if (idprom->id_cksum != calc_idprom_cksum(idprom))
+		prom_printf("IDPROM: Warning, checksum failure (nvram=%x, calc=%x)!\n",
+			    idprom->id_cksum, calc_idprom_cksum(idprom));
+
+	display_system_type(idprom->id_machtype);
+
+	printk(KERN_WARNING "Ethernet address: %pM\n", idprom->id_ethaddr);
+}
diff --git a/arch/sparc/kernel/iommu-common.c b/arch/sparc/kernel/iommu-common.c
new file mode 100644
index 0000000..59cb166
--- /dev/null
+++ b/arch/sparc/kernel/iommu-common.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IOMMU mmap management and range allocation functions.
+ * Based almost entirely upon the powerpc iommu allocator.
+ */
+
+#include <linux/export.h>
+#include <linux/bitmap.h>
+#include <linux/bug.h>
+#include <linux/iommu-helper.h>
+#include <linux/dma-mapping.h>
+#include <linux/hash.h>
+#include <asm/iommu-common.h>
+
+static unsigned long iommu_large_alloc = 15;
+
+static	DEFINE_PER_CPU(unsigned int, iommu_hash_common);
+
+static inline bool need_flush(struct iommu_map_table *iommu)
+{
+	return ((iommu->flags & IOMMU_NEED_FLUSH) != 0);
+}
+
+static inline void set_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags |= IOMMU_NEED_FLUSH;
+}
+
+static inline void clear_flush(struct iommu_map_table *iommu)
+{
+	iommu->flags &= ~IOMMU_NEED_FLUSH;
+}
+
+static void setup_iommu_pool_hash(void)
+{
+	unsigned int i;
+	static bool do_once;
+
+	if (do_once)
+		return;
+	do_once = true;
+	for_each_possible_cpu(i)
+		per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS);
+}
+
+/*
+ * Initialize iommu_pool entries for the iommu_map_table. `num_entries'
+ * is the number of table entries. If `large_pool' is set to true,
+ * the top 1/4 of the table will be set aside for pool allocations
+ * of more than iommu_large_alloc pages.
+ */
+void iommu_tbl_pool_init(struct iommu_map_table *iommu,
+			 unsigned long num_entries,
+			 u32 table_shift,
+			 void (*lazy_flush)(struct iommu_map_table *),
+			 bool large_pool, u32 npools,
+			 bool skip_span_boundary_check)
+{
+	unsigned int start, i;
+	struct iommu_pool *p = &(iommu->large_pool);
+
+	setup_iommu_pool_hash();
+	if (npools == 0)
+		iommu->nr_pools = IOMMU_NR_POOLS;
+	else
+		iommu->nr_pools = npools;
+	BUG_ON(npools > IOMMU_NR_POOLS);
+
+	iommu->table_shift = table_shift;
+	iommu->lazy_flush = lazy_flush;
+	start = 0;
+	if (skip_span_boundary_check)
+		iommu->flags |= IOMMU_NO_SPAN_BOUND;
+	if (large_pool)
+		iommu->flags |= IOMMU_HAS_LARGE_POOL;
+
+	if (!large_pool)
+		iommu->poolsize = num_entries/iommu->nr_pools;
+	else
+		iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools;
+	for (i = 0; i < iommu->nr_pools; i++) {
+		spin_lock_init(&(iommu->pools[i].lock));
+		iommu->pools[i].start = start;
+		iommu->pools[i].hint = start;
+		start += iommu->poolsize; /* start for next pool */
+		iommu->pools[i].end = start - 1;
+	}
+	if (!large_pool)
+		return;
+	/* initialize large_pool */
+	spin_lock_init(&(p->lock));
+	p->start = start;
+	p->hint = p->start;
+	p->end = num_entries;
+}
+
+unsigned long iommu_tbl_range_alloc(struct device *dev,
+				struct iommu_map_table *iommu,
+				unsigned long npages,
+				unsigned long *handle,
+				unsigned long mask,
+				unsigned int align_order)
+{
+	unsigned int pool_hash = __this_cpu_read(iommu_hash_common);
+	unsigned long n, end, start, limit, boundary_size;
+	struct iommu_pool *pool;
+	int pass = 0;
+	unsigned int pool_nr;
+	unsigned int npools = iommu->nr_pools;
+	unsigned long flags;
+	bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0);
+	bool largealloc = (large_pool && npages > iommu_large_alloc);
+	unsigned long shift;
+	unsigned long align_mask = 0;
+
+	if (align_order > 0)
+		align_mask = ~0ul >> (BITS_PER_LONG - align_order);
+
+	/* Sanity check */
+	if (unlikely(npages == 0)) {
+		WARN_ON_ONCE(1);
+		return IOMMU_ERROR_CODE;
+	}
+
+	if (largealloc) {
+		pool = &(iommu->large_pool);
+		pool_nr = 0; /* to keep compiler happy */
+	} else {
+		/* pick out pool_nr */
+		pool_nr =  pool_hash & (npools - 1);
+		pool = &(iommu->pools[pool_nr]);
+	}
+	spin_lock_irqsave(&pool->lock, flags);
+
+ again:
+	if (pass == 0 && handle && *handle &&
+	    (*handle >= pool->start) && (*handle < pool->end))
+		start = *handle;
+	else
+		start = pool->hint;
+
+	limit = pool->end;
+
+	/* The case below can happen if we have a small segment appended
+	 * to a large, or when the previous alloc was at the very end of
+	 * the available space. If so, go back to the beginning. If a
+	 * flush is needed, it will get done based on the return value
+	 * from iommu_area_alloc() below.
+	 */
+	if (start >= limit)
+		start = pool->start;
+	shift = iommu->table_map_base >> iommu->table_shift;
+	if (limit + shift > mask) {
+		limit = mask - shift + 1;
+		/* If we're constrained on address range, first try
+		 * at the masked hint to avoid O(n) search complexity,
+		 * but on second pass, start at 0 in pool 0.
+		 */
+		if ((start & mask) >= limit || pass > 0) {
+			spin_unlock(&(pool->lock));
+			pool = &(iommu->pools[0]);
+			spin_lock(&(pool->lock));
+			start = pool->start;
+		} else {
+			start &= mask;
+		}
+	}
+
+	if (dev)
+		boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+				      1 << iommu->table_shift);
+	else
+		boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift);
+
+	boundary_size = boundary_size >> iommu->table_shift;
+	/*
+	 * if the skip_span_boundary_check had been set during init, we set
+	 * things up so that iommu_is_span_boundary() merely checks if the
+	 * (index + npages) < num_tsb_entries
+	 */
+	if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) {
+		shift = 0;
+		boundary_size = iommu->poolsize * iommu->nr_pools;
+	}
+	n = iommu_area_alloc(iommu->map, limit, start, npages, shift,
+			     boundary_size, align_mask);
+	if (n == -1) {
+		if (likely(pass == 0)) {
+			/* First failure, rescan from the beginning.  */
+			pool->hint = pool->start;
+			set_flush(iommu);
+			pass++;
+			goto again;
+		} else if (!largealloc && pass <= iommu->nr_pools) {
+			spin_unlock(&(pool->lock));
+			pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1);
+			pool = &(iommu->pools[pool_nr]);
+			spin_lock(&(pool->lock));
+			pool->hint = pool->start;
+			set_flush(iommu);
+			pass++;
+			goto again;
+		} else {
+			/* give up */
+			n = IOMMU_ERROR_CODE;
+			goto bail;
+		}
+	}
+	if (iommu->lazy_flush &&
+	    (n < pool->hint || need_flush(iommu))) {
+		clear_flush(iommu);
+		iommu->lazy_flush(iommu);
+	}
+
+	end = n + npages;
+	pool->hint = end;
+
+	/* Update handle for SG allocations */
+	if (handle)
+		*handle = end;
+bail:
+	spin_unlock_irqrestore(&(pool->lock), flags);
+
+	return n;
+}
+
+static struct iommu_pool *get_pool(struct iommu_map_table *tbl,
+				   unsigned long entry)
+{
+	struct iommu_pool *p;
+	unsigned long largepool_start = tbl->large_pool.start;
+	bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0);
+
+	/* The large pool is the last pool at the top of the table */
+	if (large_pool && entry >= largepool_start) {
+		p = &tbl->large_pool;
+	} else {
+		unsigned int pool_nr = entry / tbl->poolsize;
+
+		BUG_ON(pool_nr >= tbl->nr_pools);
+		p = &tbl->pools[pool_nr];
+	}
+	return p;
+}
+
+/* Caller supplies the index of the entry into the iommu map table
+ * itself when the mapping from dma_addr to the entry is not the
+ * default addr->entry mapping below.
+ */
+void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr,
+			  unsigned long npages, unsigned long entry)
+{
+	struct iommu_pool *pool;
+	unsigned long flags;
+	unsigned long shift = iommu->table_shift;
+
+	if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */
+		entry = (dma_addr - iommu->table_map_base) >> shift;
+	pool = get_pool(iommu, entry);
+
+	spin_lock_irqsave(&(pool->lock), flags);
+	bitmap_clear(iommu->map, entry, npages);
+	spin_unlock_irqrestore(&(pool->lock), flags);
+}
diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
new file mode 100644
index 0000000..40d008b
--- /dev/null
+++ b/arch/sparc/kernel/iommu.c
@@ -0,0 +1,779 @@
+// SPDX-License-Identifier: GPL-2.0
+/* iommu.c: Generic sparc64 IOMMU support.
+ *
+ * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1999, 2000 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/iommu-helper.h>
+#include <linux/bitmap.h>
+#include <asm/iommu-common.h>
+
+#ifdef CONFIG_PCI
+#include <linux/pci.h>
+#endif
+
+#include <asm/iommu.h>
+
+#include "iommu_common.h"
+#include "kernel.h"
+
+#define STC_CTXMATCH_ADDR(STC, CTX)	\
+	((STC)->strbuf_ctxmatch_base + ((CTX) << 3))
+#define STC_FLUSHFLAG_INIT(STC) \
+	(*((STC)->strbuf_flushflag) = 0UL)
+#define STC_FLUSHFLAG_SET(STC) \
+	(*((STC)->strbuf_flushflag) != 0UL)
+
+#define iommu_read(__reg) \
+({	u64 __ret; \
+	__asm__ __volatile__("ldxa [%1] %2, %0" \
+			     : "=r" (__ret) \
+			     : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \
+			     : "memory"); \
+	__ret; \
+})
+#define iommu_write(__reg, __val) \
+	__asm__ __volatile__("stxa %0, [%1] %2" \
+			     : /* no outputs */ \
+			     : "r" (__val), "r" (__reg), \
+			       "i" (ASI_PHYS_BYPASS_EC_E))
+
+/* Must be invoked under the IOMMU lock. */
+static void iommu_flushall(struct iommu_map_table *iommu_map_table)
+{
+	struct iommu *iommu = container_of(iommu_map_table, struct iommu, tbl);
+	if (iommu->iommu_flushinv) {
+		iommu_write(iommu->iommu_flushinv, ~(u64)0);
+	} else {
+		unsigned long tag;
+		int entry;
+
+		tag = iommu->iommu_tags;
+		for (entry = 0; entry < 16; entry++) {
+			iommu_write(tag, 0);
+			tag += 8;
+		}
+
+		/* Ensure completion of previous PIO writes. */
+		(void) iommu_read(iommu->write_complete_reg);
+	}
+}
+
+#define IOPTE_CONSISTENT(CTX) \
+	(IOPTE_VALID | IOPTE_CACHE | \
+	 (((CTX) << 47) & IOPTE_CONTEXT))
+
+#define IOPTE_STREAMING(CTX) \
+	(IOPTE_CONSISTENT(CTX) | IOPTE_STBUF)
+
+/* Existing mappings are never marked invalid, instead they
+ * are pointed to a dummy page.
+ */
+#define IOPTE_IS_DUMMY(iommu, iopte)	\
+	((iopte_val(*iopte) & IOPTE_PAGE) == (iommu)->dummy_page_pa)
+
+static inline void iopte_make_dummy(struct iommu *iommu, iopte_t *iopte)
+{
+	unsigned long val = iopte_val(*iopte);
+
+	val &= ~IOPTE_PAGE;
+	val |= iommu->dummy_page_pa;
+
+	iopte_val(*iopte) = val;
+}
+
+int iommu_table_init(struct iommu *iommu, int tsbsize,
+		     u32 dma_offset, u32 dma_addr_mask,
+		     int numa_node)
+{
+	unsigned long i, order, sz, num_tsb_entries;
+	struct page *page;
+
+	num_tsb_entries = tsbsize / sizeof(iopte_t);
+
+	/* Setup initial software IOMMU state. */
+	spin_lock_init(&iommu->lock);
+	iommu->ctx_lowest_free = 1;
+	iommu->tbl.table_map_base = dma_offset;
+	iommu->dma_addr_mask = dma_addr_mask;
+
+	/* Allocate and initialize the free area map.  */
+	sz = num_tsb_entries / 8;
+	sz = (sz + 7UL) & ~7UL;
+	iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
+	if (!iommu->tbl.map)
+		return -ENOMEM;
+	memset(iommu->tbl.map, 0, sz);
+
+	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
+			    (tlb_type != hypervisor ? iommu_flushall : NULL),
+			    false, 1, false);
+
+	/* Allocate and initialize the dummy page which we
+	 * set inactive IO PTEs to point to.
+	 */
+	page = alloc_pages_node(numa_node, GFP_KERNEL, 0);
+	if (!page) {
+		printk(KERN_ERR "IOMMU: Error, gfp(dummy_page) failed.\n");
+		goto out_free_map;
+	}
+	iommu->dummy_page = (unsigned long) page_address(page);
+	memset((void *)iommu->dummy_page, 0, PAGE_SIZE);
+	iommu->dummy_page_pa = (unsigned long) __pa(iommu->dummy_page);
+
+	/* Now allocate and setup the IOMMU page table itself.  */
+	order = get_order(tsbsize);
+	page = alloc_pages_node(numa_node, GFP_KERNEL, order);
+	if (!page) {
+		printk(KERN_ERR "IOMMU: Error, gfp(tsb) failed.\n");
+		goto out_free_dummy_page;
+	}
+	iommu->page_table = (iopte_t *)page_address(page);
+
+	for (i = 0; i < num_tsb_entries; i++)
+		iopte_make_dummy(iommu, &iommu->page_table[i]);
+
+	return 0;
+
+out_free_dummy_page:
+	free_page(iommu->dummy_page);
+	iommu->dummy_page = 0UL;
+
+out_free_map:
+	kfree(iommu->tbl.map);
+	iommu->tbl.map = NULL;
+
+	return -ENOMEM;
+}
+
+static inline iopte_t *alloc_npages(struct device *dev,
+				    struct iommu *iommu,
+				    unsigned long npages)
+{
+	unsigned long entry;
+
+	entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+				      (unsigned long)(-1), 0);
+	if (unlikely(entry == IOMMU_ERROR_CODE))
+		return NULL;
+
+	return iommu->page_table + entry;
+}
+
+static int iommu_alloc_ctx(struct iommu *iommu)
+{
+	int lowest = iommu->ctx_lowest_free;
+	int n = find_next_zero_bit(iommu->ctx_bitmap, IOMMU_NUM_CTXS, lowest);
+
+	if (unlikely(n == IOMMU_NUM_CTXS)) {
+		n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
+		if (unlikely(n == lowest)) {
+			printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
+			n = 0;
+		}
+	}
+	if (n)
+		__set_bit(n, iommu->ctx_bitmap);
+
+	return n;
+}
+
+static inline void iommu_free_ctx(struct iommu *iommu, int ctx)
+{
+	if (likely(ctx)) {
+		__clear_bit(ctx, iommu->ctx_bitmap);
+		if (ctx < iommu->ctx_lowest_free)
+			iommu->ctx_lowest_free = ctx;
+	}
+}
+
+static void *dma_4u_alloc_coherent(struct device *dev, size_t size,
+				   dma_addr_t *dma_addrp, gfp_t gfp,
+				   unsigned long attrs)
+{
+	unsigned long order, first_page;
+	struct iommu *iommu;
+	struct page *page;
+	int npages, nid;
+	iopte_t *iopte;
+	void *ret;
+
+	size = IO_PAGE_ALIGN(size);
+	order = get_order(size);
+	if (order >= 10)
+		return NULL;
+
+	nid = dev->archdata.numa_node;
+	page = alloc_pages_node(nid, gfp, order);
+	if (unlikely(!page))
+		return NULL;
+
+	first_page = (unsigned long) page_address(page);
+	memset((char *)first_page, 0, PAGE_SIZE << order);
+
+	iommu = dev->archdata.iommu;
+
+	iopte = alloc_npages(dev, iommu, size >> IO_PAGE_SHIFT);
+
+	if (unlikely(iopte == NULL)) {
+		free_pages(first_page, order);
+		return NULL;
+	}
+
+	*dma_addrp = (iommu->tbl.table_map_base +
+		      ((iopte - iommu->page_table) << IO_PAGE_SHIFT));
+	ret = (void *) first_page;
+	npages = size >> IO_PAGE_SHIFT;
+	first_page = __pa(first_page);
+	while (npages--) {
+		iopte_val(*iopte) = (IOPTE_CONSISTENT(0UL) |
+				     IOPTE_WRITE |
+				     (first_page & IOPTE_PAGE));
+		iopte++;
+		first_page += IO_PAGE_SIZE;
+	}
+
+	return ret;
+}
+
+static void dma_4u_free_coherent(struct device *dev, size_t size,
+				 void *cpu, dma_addr_t dvma,
+				 unsigned long attrs)
+{
+	struct iommu *iommu;
+	unsigned long order, npages;
+
+	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+	iommu = dev->archdata.iommu;
+
+	iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+
+	order = get_order(size);
+	if (order < 10)
+		free_pages((unsigned long)cpu, order);
+}
+
+static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t sz,
+				  enum dma_data_direction direction,
+				  unsigned long attrs)
+{
+	struct iommu *iommu;
+	struct strbuf *strbuf;
+	iopte_t *base;
+	unsigned long flags, npages, oaddr;
+	unsigned long i, base_paddr, ctx;
+	u32 bus_addr, ret;
+	unsigned long iopte_protection;
+
+	iommu = dev->archdata.iommu;
+	strbuf = dev->archdata.stc;
+
+	if (unlikely(direction == DMA_NONE))
+		goto bad_no_ctx;
+
+	oaddr = (unsigned long)(page_address(page) + offset);
+	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
+	npages >>= IO_PAGE_SHIFT;
+
+	base = alloc_npages(dev, iommu, npages);
+	spin_lock_irqsave(&iommu->lock, flags);
+	ctx = 0;
+	if (iommu->iommu_ctxflush)
+		ctx = iommu_alloc_ctx(iommu);
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	if (unlikely(!base))
+		goto bad;
+
+	bus_addr = (iommu->tbl.table_map_base +
+		    ((base - iommu->page_table) << IO_PAGE_SHIFT));
+	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
+	base_paddr = __pa(oaddr & IO_PAGE_MASK);
+	if (strbuf->strbuf_enabled)
+		iopte_protection = IOPTE_STREAMING(ctx);
+	else
+		iopte_protection = IOPTE_CONSISTENT(ctx);
+	if (direction != DMA_TO_DEVICE)
+		iopte_protection |= IOPTE_WRITE;
+
+	for (i = 0; i < npages; i++, base++, base_paddr += IO_PAGE_SIZE)
+		iopte_val(*base) = iopte_protection | base_paddr;
+
+	return ret;
+
+bad:
+	iommu_free_ctx(iommu, ctx);
+bad_no_ctx:
+	if (printk_ratelimit())
+		WARN_ON(1);
+	return SPARC_MAPPING_ERROR;
+}
+
+static void strbuf_flush(struct strbuf *strbuf, struct iommu *iommu,
+			 u32 vaddr, unsigned long ctx, unsigned long npages,
+			 enum dma_data_direction direction)
+{
+	int limit;
+
+	if (strbuf->strbuf_ctxflush &&
+	    iommu->iommu_ctxflush) {
+		unsigned long matchreg, flushreg;
+		u64 val;
+
+		flushreg = strbuf->strbuf_ctxflush;
+		matchreg = STC_CTXMATCH_ADDR(strbuf, ctx);
+
+		iommu_write(flushreg, ctx);
+		val = iommu_read(matchreg);
+		val &= 0xffff;
+		if (!val)
+			goto do_flush_sync;
+
+		while (val) {
+			if (val & 0x1)
+				iommu_write(flushreg, ctx);
+			val >>= 1;
+		}
+		val = iommu_read(matchreg);
+		if (unlikely(val)) {
+			printk(KERN_WARNING "strbuf_flush: ctx flush "
+			       "timeout matchreg[%llx] ctx[%lx]\n",
+			       val, ctx);
+			goto do_page_flush;
+		}
+	} else {
+		unsigned long i;
+
+	do_page_flush:
+		for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
+			iommu_write(strbuf->strbuf_pflush, vaddr);
+	}
+
+do_flush_sync:
+	/* If the device could not have possibly put dirty data into
+	 * the streaming cache, no flush-flag synchronization needs
+	 * to be performed.
+	 */
+	if (direction == DMA_TO_DEVICE)
+		return;
+
+	STC_FLUSHFLAG_INIT(strbuf);
+	iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
+	(void) iommu_read(iommu->write_complete_reg);
+
+	limit = 100000;
+	while (!STC_FLUSHFLAG_SET(strbuf)) {
+		limit--;
+		if (!limit)
+			break;
+		udelay(1);
+		rmb();
+	}
+	if (!limit)
+		printk(KERN_WARNING "strbuf_flush: flushflag timeout "
+		       "vaddr[%08x] ctx[%lx] npages[%ld]\n",
+		       vaddr, ctx, npages);
+}
+
+static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr,
+			      size_t sz, enum dma_data_direction direction,
+			      unsigned long attrs)
+{
+	struct iommu *iommu;
+	struct strbuf *strbuf;
+	iopte_t *base;
+	unsigned long flags, npages, ctx, i;
+
+	if (unlikely(direction == DMA_NONE)) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+		return;
+	}
+
+	iommu = dev->archdata.iommu;
+	strbuf = dev->archdata.stc;
+
+	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+	npages >>= IO_PAGE_SHIFT;
+	base = iommu->page_table +
+		((bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
+	bus_addr &= IO_PAGE_MASK;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	/* Record the context, if any. */
+	ctx = 0;
+	if (iommu->iommu_ctxflush)
+		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+
+	/* Step 1: Kick data out of streaming buffers if necessary. */
+	if (strbuf->strbuf_enabled && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		strbuf_flush(strbuf, iommu, bus_addr, ctx,
+			     npages, direction);
+
+	/* Step 2: Clear out TSB entries. */
+	for (i = 0; i < npages; i++)
+		iopte_make_dummy(iommu, base + i);
+
+	iommu_free_ctx(iommu, ctx);
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+}
+
+static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist,
+			 int nelems, enum dma_data_direction direction,
+			 unsigned long attrs)
+{
+	struct scatterlist *s, *outs, *segstart;
+	unsigned long flags, handle, prot, ctx;
+	dma_addr_t dma_next = 0, dma_addr;
+	unsigned int max_seg_size;
+	unsigned long seg_boundary_size;
+	int outcount, incount, i;
+	struct strbuf *strbuf;
+	struct iommu *iommu;
+	unsigned long base_shift;
+
+	BUG_ON(direction == DMA_NONE);
+
+	iommu = dev->archdata.iommu;
+	strbuf = dev->archdata.stc;
+	if (nelems == 0 || !iommu)
+		return 0;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	ctx = 0;
+	if (iommu->iommu_ctxflush)
+		ctx = iommu_alloc_ctx(iommu);
+
+	if (strbuf->strbuf_enabled)
+		prot = IOPTE_STREAMING(ctx);
+	else
+		prot = IOPTE_CONSISTENT(ctx);
+	if (direction != DMA_TO_DEVICE)
+		prot |= IOPTE_WRITE;
+
+	outs = s = segstart = &sglist[0];
+	outcount = 1;
+	incount = nelems;
+	handle = 0;
+
+	/* Init first segment length for backout at failure */
+	outs->dma_length = 0;
+
+	max_seg_size = dma_get_max_seg_size(dev);
+	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
+	base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
+	for_each_sg(sglist, s, nelems, i) {
+		unsigned long paddr, npages, entry, out_entry = 0, slen;
+		iopte_t *base;
+
+		slen = s->length;
+		/* Sanity check */
+		if (slen == 0) {
+			dma_next = 0;
+			continue;
+		}
+		/* Allocate iommu entries for that segment */
+		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
+		entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+					      &handle, (unsigned long)(-1), 0);
+
+		/* Handle failure */
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
+			if (printk_ratelimit())
+				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
+				       " npages %lx\n", iommu, paddr, npages);
+			goto iommu_map_failed;
+		}
+
+		base = iommu->page_table + entry;
+
+		/* Convert entry to a dma_addr_t */
+		dma_addr = iommu->tbl.table_map_base +
+			(entry << IO_PAGE_SHIFT);
+		dma_addr |= (s->offset & ~IO_PAGE_MASK);
+
+		/* Insert into HW table */
+		paddr &= IO_PAGE_MASK;
+		while (npages--) {
+			iopte_val(*base) = prot | paddr;
+			base++;
+			paddr += IO_PAGE_SIZE;
+		}
+
+		/* If we are in an open segment, try merging */
+		if (segstart != s) {
+			/* We cannot merge if:
+			 * - allocated dma_addr isn't contiguous to previous allocation
+			 */
+			if ((dma_addr != dma_next) ||
+			    (outs->dma_length + s->length > max_seg_size) ||
+			    (is_span_boundary(out_entry, base_shift,
+					      seg_boundary_size, outs, s))) {
+				/* Can't merge: create a new segment */
+				segstart = s;
+				outcount++;
+				outs = sg_next(outs);
+			} else {
+				outs->dma_length += s->length;
+			}
+		}
+
+		if (segstart == s) {
+			/* This is a new segment, fill entries */
+			outs->dma_address = dma_addr;
+			outs->dma_length = slen;
+			out_entry = entry;
+		}
+
+		/* Calculate next page pointer for contiguous check */
+		dma_next = dma_addr + slen;
+	}
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	if (outcount < incount) {
+		outs = sg_next(outs);
+		outs->dma_address = SPARC_MAPPING_ERROR;
+		outs->dma_length = 0;
+	}
+
+	return outcount;
+
+iommu_map_failed:
+	for_each_sg(sglist, s, nelems, i) {
+		if (s->dma_length != 0) {
+			unsigned long vaddr, npages, entry, j;
+			iopte_t *base;
+
+			vaddr = s->dma_address & IO_PAGE_MASK;
+			npages = iommu_num_pages(s->dma_address, s->dma_length,
+						 IO_PAGE_SIZE);
+
+			entry = (vaddr - iommu->tbl.table_map_base)
+				>> IO_PAGE_SHIFT;
+			base = iommu->page_table + entry;
+
+			for (j = 0; j < npages; j++)
+				iopte_make_dummy(iommu, base + j);
+
+			iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
+					     IOMMU_ERROR_CODE);
+
+			s->dma_address = SPARC_MAPPING_ERROR;
+			s->dma_length = 0;
+		}
+		if (s == outs)
+			break;
+	}
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	return 0;
+}
+
+/* If contexts are being used, they are the same in all of the mappings
+ * we make for a particular SG.
+ */
+static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg)
+{
+	unsigned long ctx = 0;
+
+	if (iommu->iommu_ctxflush) {
+		iopte_t *base;
+		u32 bus_addr;
+		struct iommu_map_table *tbl = &iommu->tbl;
+
+		bus_addr = sg->dma_address & IO_PAGE_MASK;
+		base = iommu->page_table +
+			((bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT);
+
+		ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL;
+	}
+	return ctx;
+}
+
+static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist,
+			    int nelems, enum dma_data_direction direction,
+			    unsigned long attrs)
+{
+	unsigned long flags, ctx;
+	struct scatterlist *sg;
+	struct strbuf *strbuf;
+	struct iommu *iommu;
+
+	BUG_ON(direction == DMA_NONE);
+
+	iommu = dev->archdata.iommu;
+	strbuf = dev->archdata.stc;
+
+	ctx = fetch_sg_ctx(iommu, sglist);
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	sg = sglist;
+	while (nelems--) {
+		dma_addr_t dma_handle = sg->dma_address;
+		unsigned int len = sg->dma_length;
+		unsigned long npages, entry;
+		iopte_t *base;
+		int i;
+
+		if (!len)
+			break;
+		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+
+		entry = ((dma_handle - iommu->tbl.table_map_base)
+			 >> IO_PAGE_SHIFT);
+		base = iommu->page_table + entry;
+
+		dma_handle &= IO_PAGE_MASK;
+		if (strbuf->strbuf_enabled && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+			strbuf_flush(strbuf, iommu, dma_handle, ctx,
+				     npages, direction);
+
+		for (i = 0; i < npages; i++)
+			iopte_make_dummy(iommu, base + i);
+
+		iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+				     IOMMU_ERROR_CODE);
+		sg = sg_next(sg);
+	}
+
+	iommu_free_ctx(iommu, ctx);
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void dma_4u_sync_single_for_cpu(struct device *dev,
+				       dma_addr_t bus_addr, size_t sz,
+				       enum dma_data_direction direction)
+{
+	struct iommu *iommu;
+	struct strbuf *strbuf;
+	unsigned long flags, ctx, npages;
+
+	iommu = dev->archdata.iommu;
+	strbuf = dev->archdata.stc;
+
+	if (!strbuf->strbuf_enabled)
+		return;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+	npages >>= IO_PAGE_SHIFT;
+	bus_addr &= IO_PAGE_MASK;
+
+	/* Step 1: Record the context, if any. */
+	ctx = 0;
+	if (iommu->iommu_ctxflush &&
+	    strbuf->strbuf_ctxflush) {
+		iopte_t *iopte;
+		struct iommu_map_table *tbl = &iommu->tbl;
+
+		iopte = iommu->page_table +
+			((bus_addr - tbl->table_map_base)>>IO_PAGE_SHIFT);
+		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
+	}
+
+	/* Step 2: Kick data out of streaming buffers. */
+	strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void dma_4u_sync_sg_for_cpu(struct device *dev,
+				   struct scatterlist *sglist, int nelems,
+				   enum dma_data_direction direction)
+{
+	struct iommu *iommu;
+	struct strbuf *strbuf;
+	unsigned long flags, ctx, npages, i;
+	struct scatterlist *sg, *sgprv;
+	u32 bus_addr;
+
+	iommu = dev->archdata.iommu;
+	strbuf = dev->archdata.stc;
+
+	if (!strbuf->strbuf_enabled)
+		return;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	/* Step 1: Record the context, if any. */
+	ctx = 0;
+	if (iommu->iommu_ctxflush &&
+	    strbuf->strbuf_ctxflush) {
+		iopte_t *iopte;
+		struct iommu_map_table *tbl = &iommu->tbl;
+
+		iopte = iommu->page_table + ((sglist[0].dma_address -
+			tbl->table_map_base) >> IO_PAGE_SHIFT);
+		ctx = (iopte_val(*iopte) & IOPTE_CONTEXT) >> 47UL;
+	}
+
+	/* Step 2: Kick data out of streaming buffers. */
+	bus_addr = sglist[0].dma_address & IO_PAGE_MASK;
+	sgprv = NULL;
+	for_each_sg(sglist, sg, nelems, i) {
+		if (sg->dma_length == 0)
+			break;
+		sgprv = sg;
+	}
+
+	npages = (IO_PAGE_ALIGN(sgprv->dma_address + sgprv->dma_length)
+		  - bus_addr) >> IO_PAGE_SHIFT;
+	strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
+
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static int dma_4u_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == SPARC_MAPPING_ERROR;
+}
+
+static int dma_4u_supported(struct device *dev, u64 device_mask)
+{
+	struct iommu *iommu = dev->archdata.iommu;
+
+	if (device_mask > DMA_BIT_MASK(32))
+		return 0;
+	if ((device_mask & iommu->dma_addr_mask) == iommu->dma_addr_mask)
+		return 1;
+#ifdef CONFIG_PCI
+	if (dev_is_pci(dev))
+		return pci64_dma_supported(to_pci_dev(dev), device_mask);
+#endif
+	return 0;
+}
+
+static const struct dma_map_ops sun4u_dma_ops = {
+	.alloc			= dma_4u_alloc_coherent,
+	.free			= dma_4u_free_coherent,
+	.map_page		= dma_4u_map_page,
+	.unmap_page		= dma_4u_unmap_page,
+	.map_sg			= dma_4u_map_sg,
+	.unmap_sg		= dma_4u_unmap_sg,
+	.sync_single_for_cpu	= dma_4u_sync_single_for_cpu,
+	.sync_sg_for_cpu	= dma_4u_sync_sg_for_cpu,
+	.dma_supported		= dma_4u_supported,
+	.mapping_error		= dma_4u_mapping_error,
+};
+
+const struct dma_map_ops *dma_ops = &sun4u_dma_ops;
+EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sparc/kernel/iommu_common.h b/arch/sparc/kernel/iommu_common.h
new file mode 100644
index 0000000..e3c02ba
--- /dev/null
+++ b/arch/sparc/kernel/iommu_common.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* iommu_common.h: UltraSparc SBUS/PCI common iommu declarations.
+ *
+ * Copyright (C) 1999, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _IOMMU_COMMON_H
+#define _IOMMU_COMMON_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/iommu-helper.h>
+
+#include <asm/iommu.h>
+
+/*
+ * These give mapping size of each iommu pte/tlb.
+ */
+#define IO_PAGE_SHIFT			13
+#define IO_PAGE_SIZE			(1UL << IO_PAGE_SHIFT)
+#define IO_PAGE_MASK			(~(IO_PAGE_SIZE-1))
+#define IO_PAGE_ALIGN(addr)		ALIGN(addr, IO_PAGE_SIZE)
+
+#define IO_TSB_ENTRIES			(128*1024)
+#define IO_TSB_SIZE			(IO_TSB_ENTRIES * 8)
+
+/*
+ * This is the hardwired shift in the iotlb tag/data parts.
+ */
+#define IOMMU_PAGE_SHIFT		13
+
+#define SG_ENT_PHYS_ADDRESS(SG)	(__pa(sg_virt((SG))))
+
+static inline int is_span_boundary(unsigned long entry,
+				   unsigned long shift,
+				   unsigned long boundary_size,
+				   struct scatterlist *outs,
+				   struct scatterlist *sg)
+{
+	unsigned long paddr = SG_ENT_PHYS_ADDRESS(outs);
+	int nr = iommu_num_pages(paddr, outs->dma_length + sg->length,
+				 IO_PAGE_SIZE);
+
+	return iommu_is_span_boundary(entry, nr, shift, boundary_size);
+}
+
+#define SPARC_MAPPING_ERROR	(~(dma_addr_t)0x0)
+
+#endif /* _IOMMU_COMMON_H */
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
new file mode 100644
index 0000000..6799c93
--- /dev/null
+++ b/arch/sparc/kernel/ioport.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ioport.c:  Simple io mapping allocator.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1995 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ *
+ * 1996: sparc_free_io, 1999: ioremap()/iounmap() by Pete Zaitcev.
+ *
+ * 2000/01/29
+ * <rth> zait: as long as pci_alloc_consistent produces something addressable, 
+ *	things are ok.
+ * <zaitcev> rth: no, it is relevant, because get_free_pages returns you a
+ *	pointer into the big page mapping
+ * <rth> zait: so what?
+ * <rth> zait: remap_it_my_way(virt_to_phys(get_free_page()))
+ * <zaitcev> Hmm
+ * <zaitcev> Suppose I did this remap_it_my_way(virt_to_phys(get_free_page())).
+ *	So far so good.
+ * <zaitcev> Now, driver calls pci_free_consistent(with result of
+ *	remap_it_my_way()).
+ * <zaitcev> How do you find the address to pass to free_pages()?
+ * <rth> zait: walk the page tables?  It's only two or three level after all.
+ * <rth> zait: you have to walk them anyway to remove the mapping.
+ * <zaitcev> Hmm
+ * <zaitcev> Sounds reasonable
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/pci.h>		/* struct pci_dev */
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/of_device.h>
+
+#include <asm/io.h>
+#include <asm/vaddrs.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/iommu.h>
+#include <asm/io-unit.h>
+#include <asm/leon.h>
+
+const struct sparc32_dma_ops *sparc32_dma_ops;
+
+/* This function must make sure that caches and memory are coherent after DMA
+ * On LEON systems without cache snooping it flushes the entire D-CACHE.
+ */
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
+{
+	if (sparc_cpu_model == sparc_leon) {
+		if (!sparc_leon3_snooping_enabled())
+			leon_flush_dcache_all();
+	}
+}
+
+static void __iomem *_sparc_ioremap(struct resource *res, u32 bus, u32 pa, int sz);
+static void __iomem *_sparc_alloc_io(unsigned int busno, unsigned long phys,
+    unsigned long size, char *name);
+static void _sparc_free_io(struct resource *res);
+
+static void register_proc_sparc_ioport(void);
+
+/* This points to the next to use virtual memory for DVMA mappings */
+static struct resource _sparc_dvma = {
+	.name = "sparc_dvma", .start = DVMA_VADDR, .end = DVMA_END - 1
+};
+/* This points to the start of I/O mappings, cluable from outside. */
+/*ext*/ struct resource sparc_iomap = {
+	.name = "sparc_iomap", .start = IOBASE_VADDR, .end = IOBASE_END - 1
+};
+
+/*
+ * Our mini-allocator...
+ * Boy this is gross! We need it because we must map I/O for
+ * timers and interrupt controller before the kmalloc is available.
+ */
+
+#define XNMLN  15
+#define XNRES  10	/* SS-10 uses 8 */
+
+struct xresource {
+	struct resource xres;	/* Must be first */
+	int xflag;		/* 1 == used */
+	char xname[XNMLN+1];
+};
+
+static struct xresource xresv[XNRES];
+
+static struct xresource *xres_alloc(void) {
+	struct xresource *xrp;
+	int n;
+
+	xrp = xresv;
+	for (n = 0; n < XNRES; n++) {
+		if (xrp->xflag == 0) {
+			xrp->xflag = 1;
+			return xrp;
+		}
+		xrp++;
+	}
+	return NULL;
+}
+
+static void xres_free(struct xresource *xrp) {
+	xrp->xflag = 0;
+}
+
+/*
+ * These are typically used in PCI drivers
+ * which are trying to be cross-platform.
+ *
+ * Bus type is always zero on IIep.
+ */
+void __iomem *ioremap(phys_addr_t offset, size_t size)
+{
+	char name[14];
+
+	sprintf(name, "phys_%08x", (u32)offset);
+	return _sparc_alloc_io(0, (unsigned long)offset, size, name);
+}
+EXPORT_SYMBOL(ioremap);
+
+/*
+ * Complementary to ioremap().
+ */
+void iounmap(volatile void __iomem *virtual)
+{
+	unsigned long vaddr = (unsigned long) virtual & PAGE_MASK;
+	struct resource *res;
+
+	/*
+	 * XXX Too slow. Can have 8192 DVMA pages on sun4m in the worst case.
+	 * This probably warrants some sort of hashing.
+	*/
+	if ((res = lookup_resource(&sparc_iomap, vaddr)) == NULL) {
+		printk("free_io/iounmap: cannot free %lx\n", vaddr);
+		return;
+	}
+	_sparc_free_io(res);
+
+	if ((char *)res >= (char*)xresv && (char *)res < (char *)&xresv[XNRES]) {
+		xres_free((struct xresource *)res);
+	} else {
+		kfree(res);
+	}
+}
+EXPORT_SYMBOL(iounmap);
+
+void __iomem *of_ioremap(struct resource *res, unsigned long offset,
+			 unsigned long size, char *name)
+{
+	return _sparc_alloc_io(res->flags & 0xF,
+			       res->start + offset,
+			       size, name);
+}
+EXPORT_SYMBOL(of_ioremap);
+
+void of_iounmap(struct resource *res, void __iomem *base, unsigned long size)
+{
+	iounmap(base);
+}
+EXPORT_SYMBOL(of_iounmap);
+
+/*
+ * Meat of mapping
+ */
+static void __iomem *_sparc_alloc_io(unsigned int busno, unsigned long phys,
+    unsigned long size, char *name)
+{
+	static int printed_full;
+	struct xresource *xres;
+	struct resource *res;
+	char *tack;
+	int tlen;
+	void __iomem *va;	/* P3 diag */
+
+	if (name == NULL) name = "???";
+
+	if ((xres = xres_alloc()) != NULL) {
+		tack = xres->xname;
+		res = &xres->xres;
+	} else {
+		if (!printed_full) {
+			printk("ioremap: done with statics, switching to malloc\n");
+			printed_full = 1;
+		}
+		tlen = strlen(name);
+		tack = kmalloc(sizeof (struct resource) + tlen + 1, GFP_KERNEL);
+		if (tack == NULL) return NULL;
+		memset(tack, 0, sizeof(struct resource));
+		res = (struct resource *) tack;
+		tack += sizeof (struct resource);
+	}
+
+	strlcpy(tack, name, XNMLN+1);
+	res->name = tack;
+
+	va = _sparc_ioremap(res, busno, phys, size);
+	/* printk("ioremap(0x%x:%08lx[0x%lx])=%p\n", busno, phys, size, va); */ /* P3 diag */
+	return va;
+}
+
+/*
+ */
+static void __iomem *
+_sparc_ioremap(struct resource *res, u32 bus, u32 pa, int sz)
+{
+	unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
+
+	if (allocate_resource(&sparc_iomap, res,
+	    (offset + sz + PAGE_SIZE-1) & PAGE_MASK,
+	    sparc_iomap.start, sparc_iomap.end, PAGE_SIZE, NULL, NULL) != 0) {
+		/* Usually we cannot see printks in this case. */
+		prom_printf("alloc_io_res(%s): cannot occupy\n",
+		    (res->name != NULL)? res->name: "???");
+		prom_halt();
+	}
+
+	pa &= PAGE_MASK;
+	srmmu_mapiorange(bus, pa, res->start, resource_size(res));
+
+	return (void __iomem *)(unsigned long)(res->start + offset);
+}
+
+/*
+ * Complementary to _sparc_ioremap().
+ */
+static void _sparc_free_io(struct resource *res)
+{
+	unsigned long plen;
+
+	plen = resource_size(res);
+	BUG_ON((plen & (PAGE_SIZE-1)) != 0);
+	srmmu_unmapiorange(res->start, plen);
+	release_resource(res);
+}
+
+#ifdef CONFIG_SBUS
+
+void sbus_set_sbus64(struct device *dev, int x)
+{
+	printk("sbus_set_sbus64: unsupported\n");
+}
+EXPORT_SYMBOL(sbus_set_sbus64);
+
+/*
+ * Allocate a chunk of memory suitable for DMA.
+ * Typically devices use them for control blocks.
+ * CPU may access them without any explicit flushing.
+ */
+static void *sbus_alloc_coherent(struct device *dev, size_t len,
+				 dma_addr_t *dma_addrp, gfp_t gfp,
+				 unsigned long attrs)
+{
+	struct platform_device *op = to_platform_device(dev);
+	unsigned long len_total = PAGE_ALIGN(len);
+	unsigned long va;
+	struct resource *res;
+	int order;
+
+	/* XXX why are some lengths signed, others unsigned? */
+	if (len <= 0) {
+		return NULL;
+	}
+	/* XXX So what is maxphys for us and how do drivers know it? */
+	if (len > 256*1024) {			/* __get_free_pages() limit */
+		return NULL;
+	}
+
+	order = get_order(len_total);
+	va = __get_free_pages(gfp, order);
+	if (va == 0)
+		goto err_nopages;
+
+	if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL)
+		goto err_nomem;
+
+	if (allocate_resource(&_sparc_dvma, res, len_total,
+	    _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
+		printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
+		goto err_nova;
+	}
+
+	// XXX The sbus_map_dma_area does this for us below, see comments.
+	// srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total);
+	/*
+	 * XXX That's where sdev would be used. Currently we load
+	 * all iommu tables with the same translations.
+	 */
+	if (sbus_map_dma_area(dev, dma_addrp, va, res->start, len_total) != 0)
+		goto err_noiommu;
+
+	res->name = op->dev.of_node->name;
+
+	return (void *)(unsigned long)res->start;
+
+err_noiommu:
+	release_resource(res);
+err_nova:
+	kfree(res);
+err_nomem:
+	free_pages(va, order);
+err_nopages:
+	return NULL;
+}
+
+static void sbus_free_coherent(struct device *dev, size_t n, void *p,
+			       dma_addr_t ba, unsigned long attrs)
+{
+	struct resource *res;
+	struct page *pgv;
+
+	if ((res = lookup_resource(&_sparc_dvma,
+	    (unsigned long)p)) == NULL) {
+		printk("sbus_free_consistent: cannot free %p\n", p);
+		return;
+	}
+
+	if (((unsigned long)p & (PAGE_SIZE-1)) != 0) {
+		printk("sbus_free_consistent: unaligned va %p\n", p);
+		return;
+	}
+
+	n = PAGE_ALIGN(n);
+	if (resource_size(res) != n) {
+		printk("sbus_free_consistent: region 0x%lx asked 0x%zx\n",
+		    (long)resource_size(res), n);
+		return;
+	}
+
+	release_resource(res);
+	kfree(res);
+
+	pgv = virt_to_page(p);
+	sbus_unmap_dma_area(dev, ba, n);
+
+	__free_pages(pgv, get_order(n));
+}
+
+/*
+ * Map a chunk of memory so that devices can see it.
+ * CPU view of this memory may be inconsistent with
+ * a device view and explicit flushing is necessary.
+ */
+static dma_addr_t sbus_map_page(struct device *dev, struct page *page,
+				unsigned long offset, size_t len,
+				enum dma_data_direction dir,
+				unsigned long attrs)
+{
+	void *va = page_address(page) + offset;
+
+	/* XXX why are some lengths signed, others unsigned? */
+	if (len <= 0) {
+		return 0;
+	}
+	/* XXX So what is maxphys for us and how do drivers know it? */
+	if (len > 256*1024) {			/* __get_free_pages() limit */
+		return 0;
+	}
+	return mmu_get_scsi_one(dev, va, len);
+}
+
+static void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n,
+			    enum dma_data_direction dir, unsigned long attrs)
+{
+	mmu_release_scsi_one(dev, ba, n);
+}
+
+static int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n,
+		       enum dma_data_direction dir, unsigned long attrs)
+{
+	mmu_get_scsi_sgl(dev, sg, n);
+	return n;
+}
+
+static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n,
+			  enum dma_data_direction dir, unsigned long attrs)
+{
+	mmu_release_scsi_sgl(dev, sg, n);
+}
+
+static void sbus_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+				 int n,	enum dma_data_direction dir)
+{
+	BUG();
+}
+
+static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+				    int n, enum dma_data_direction dir)
+{
+	BUG();
+}
+
+static int sbus_dma_supported(struct device *dev, u64 mask)
+{
+	return 0;
+}
+
+static const struct dma_map_ops sbus_dma_ops = {
+	.alloc			= sbus_alloc_coherent,
+	.free			= sbus_free_coherent,
+	.map_page		= sbus_map_page,
+	.unmap_page		= sbus_unmap_page,
+	.map_sg			= sbus_map_sg,
+	.unmap_sg		= sbus_unmap_sg,
+	.sync_sg_for_cpu	= sbus_sync_sg_for_cpu,
+	.sync_sg_for_device	= sbus_sync_sg_for_device,
+	.dma_supported		= sbus_dma_supported,
+};
+
+static int __init sparc_register_ioport(void)
+{
+	register_proc_sparc_ioport();
+
+	return 0;
+}
+
+arch_initcall(sparc_register_ioport);
+
+#endif /* CONFIG_SBUS */
+
+
+/* Allocate and map kernel buffer using consistent mode DMA for a device.
+ * hwdev should be valid struct pci_dev pointer for PCI devices.
+ */
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
+{
+	unsigned long len_total = PAGE_ALIGN(size);
+	void *va;
+	struct resource *res;
+	int order;
+
+	if (size == 0) {
+		return NULL;
+	}
+	if (size > 256*1024) {			/* __get_free_pages() limit */
+		return NULL;
+	}
+
+	order = get_order(len_total);
+	va = (void *) __get_free_pages(gfp, order);
+	if (va == NULL) {
+		printk("%s: no %ld pages\n", __func__, len_total>>PAGE_SHIFT);
+		goto err_nopages;
+	}
+
+	if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
+		printk("%s: no core\n", __func__);
+		goto err_nomem;
+	}
+
+	if (allocate_resource(&_sparc_dvma, res, len_total,
+	    _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
+		printk("%s: cannot occupy 0x%lx", __func__, len_total);
+		goto err_nova;
+	}
+	srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total);
+
+	*dma_handle = virt_to_phys(va);
+	return (void *) res->start;
+
+err_nova:
+	kfree(res);
+err_nomem:
+	free_pages((unsigned long)va, order);
+err_nopages:
+	return NULL;
+}
+
+/* Free and unmap a consistent DMA buffer.
+ * cpu_addr is what was returned arch_dma_alloc, size must be the same as what
+ * was passed into arch_dma_alloc, and likewise dma_addr must be the same as
+ * what *dma_ndler was set to.
+ *
+ * References to the memory and mappings associated with cpu_addr/dma_addr
+ * past this call are illegal.
+ */
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_addr, unsigned long attrs)
+{
+	struct resource *res;
+
+	if ((res = lookup_resource(&_sparc_dvma,
+	    (unsigned long)cpu_addr)) == NULL) {
+		printk("%s: cannot free %p\n", __func__, cpu_addr);
+		return;
+	}
+
+	if (((unsigned long)cpu_addr & (PAGE_SIZE-1)) != 0) {
+		printk("%s: unaligned va %p\n", __func__, cpu_addr);
+		return;
+	}
+
+	size = PAGE_ALIGN(size);
+	if (resource_size(res) != size) {
+		printk("%s: region 0x%lx asked 0x%zx\n", __func__,
+		    (long)resource_size(res), size);
+		return;
+	}
+
+	dma_make_coherent(dma_addr, size);
+	srmmu_unmapiorange((unsigned long)cpu_addr, size);
+
+	release_resource(res);
+	kfree(res);
+	free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size));
+}
+
+/* IIep is write-through, not flushing on cpu to device transfer. */
+
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
+{
+	if (dir != PCI_DMA_TODEVICE)
+		dma_make_coherent(paddr, PAGE_ALIGN(size));
+}
+
+const struct dma_map_ops *dma_ops = &sbus_dma_ops;
+EXPORT_SYMBOL(dma_ops);
+
+#ifdef CONFIG_PROC_FS
+
+static int sparc_io_proc_show(struct seq_file *m, void *v)
+{
+	struct resource *root = m->private, *r;
+	const char *nm;
+
+	for (r = root->child; r != NULL; r = r->sibling) {
+		if ((nm = r->name) == NULL) nm = "???";
+		seq_printf(m, "%016llx-%016llx: %s\n",
+				(unsigned long long)r->start,
+				(unsigned long long)r->end, nm);
+	}
+
+	return 0;
+}
+#endif /* CONFIG_PROC_FS */
+
+static void register_proc_sparc_ioport(void)
+{
+#ifdef CONFIG_PROC_FS
+	proc_create_single_data("io_map", 0, NULL, sparc_io_proc_show,
+			&sparc_iomap);
+	proc_create_single_data("dvma_map", 0, NULL, sparc_io_proc_show,
+			&_sparc_dvma);
+#endif
+}
diff --git a/arch/sparc/kernel/irq.h b/arch/sparc/kernel/irq.h
new file mode 100644
index 0000000..b02026a
--- /dev/null
+++ b/arch/sparc/kernel/irq.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/platform_device.h>
+
+#include <asm/cpu_type.h>
+
+struct irq_bucket {
+        struct irq_bucket *next;
+        unsigned int real_irq;
+        unsigned int irq;
+        unsigned int pil;
+};
+
+#define SUN4M_HARD_INT(x)       (0x000000001 << (x))
+#define SUN4M_SOFT_INT(x)       (0x000010000 << (x))
+
+#define SUN4D_MAX_BOARD 10
+#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5)
+
+/* Map between the irq identifier used in hw to the
+ * irq_bucket. The map is sufficient large to hold
+ * the sun4d hw identifiers.
+ */
+extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+
+
+/* sun4m specific type definitions */
+
+/* This maps direct to CPU specific interrupt registers */
+struct sun4m_irq_percpu {
+	u32	pending;
+	u32	clear;
+	u32	set;
+};
+
+/* This maps direct to global interrupt registers */
+struct sun4m_irq_global {
+	u32	pending;
+	u32	mask;
+	u32	mask_clear;
+	u32	mask_set;
+	u32	interrupt_target;
+};
+
+extern struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS];
+extern struct sun4m_irq_global __iomem *sun4m_irq_global;
+
+/* The following definitions describe the individual platform features: */
+#define FEAT_L10_CLOCKSOURCE (1 << 0) /* L10 timer is used as a clocksource */
+#define FEAT_L10_CLOCKEVENT  (1 << 1) /* L10 timer is used as a clockevent */
+#define FEAT_L14_ONESHOT     (1 << 2) /* L14 timer clockevent can oneshot */
+
+/*
+ * Platform specific configuration
+ * The individual platforms assign their platform
+ * specifics in their init functions.
+ */
+struct sparc_config {
+	void (*init_timers)(void);
+	unsigned int (*build_device_irq)(struct platform_device *op,
+	                                 unsigned int real_irq);
+
+	/* generic clockevent features - see FEAT_* above */
+	int features;
+
+	/* clock rate used for clock event timer */
+	int clock_rate;
+
+	/* one period for clock source timer */
+	unsigned int cs_period;
+
+	/* function to obtain offsett for cs period */
+	unsigned int (*get_cycles_offset)(void);
+
+	void (*clear_clock_irq)(void);
+	void (*load_profile_irq)(int cpu, unsigned int limit);
+};
+extern struct sparc_config sparc_config;
+
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil);
+void irq_link(unsigned int irq);
+void irq_unlink(unsigned int irq);
+void handler_irq(unsigned int pil, struct pt_regs *regs);
+
+unsigned long leon_get_irqmask(unsigned int irq);
+
+/* irq_32.c */
+void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs);
+
+/* sun4m_irq.c */
+void sun4m_nmi(struct pt_regs *regs);
+
+/* sun4d_irq.c */
+void sun4d_handler_irq(unsigned int pil, struct pt_regs *regs);
+
+#ifdef CONFIG_SMP
+
+/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
+#define SUN4D_IPI_IRQ 13
+
+void sun4d_ipi_interrupt(void);
+
+#endif
diff --git a/arch/sparc/kernel/irq_32.c b/arch/sparc/kernel/irq_32.c
new file mode 100644
index 0000000..e8452be
--- /dev/null
+++ b/arch/sparc/kernel/irq_32.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Interrupt request handling routines. On the
+ * Sparc the IRQs are basically 'cast in stone'
+ * and you are supposed to probe the prom's device
+ * node trees to find out who's got which IRQ.
+ *
+ *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1995 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ *  Copyright (C) 1995,2002 Pete A. Zaitcev (zaitcev@yahoo.com)
+ *  Copyright (C) 1996 Dave Redman (djhr@tadpole.co.uk)
+ *  Copyright (C) 1998-2000 Anton Blanchard (anton@samba.org)
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpudata.h>
+#include <asm/setup.h>
+#include <asm/pcic.h>
+#include <asm/leon.h>
+
+#include "kernel.h"
+#include "irq.h"
+
+/* platform specific irq setup */
+struct sparc_config sparc_config;
+
+unsigned long arch_local_irq_save(void)
+{
+	unsigned long retval;
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"rd	%%psr, %0\n\t"
+		"or	%0, %2, %1\n\t"
+		"wr	%1, 0, %%psr\n\t"
+		"nop; nop; nop\n"
+		: "=&r" (retval), "=r" (tmp)
+		: "i" (PSR_PIL)
+		: "memory");
+
+	return retval;
+}
+EXPORT_SYMBOL(arch_local_irq_save);
+
+void arch_local_irq_enable(void)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"rd	%%psr, %0\n\t"
+		"andn	%0, %1, %0\n\t"
+		"wr	%0, 0, %%psr\n\t"
+		"nop; nop; nop\n"
+		: "=&r" (tmp)
+		: "i" (PSR_PIL)
+		: "memory");
+}
+EXPORT_SYMBOL(arch_local_irq_enable);
+
+void arch_local_irq_restore(unsigned long old_psr)
+{
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"rd	%%psr, %0\n\t"
+		"and	%2, %1, %2\n\t"
+		"andn	%0, %1, %0\n\t"
+		"wr	%0, %2, %%psr\n\t"
+		"nop; nop; nop\n"
+		: "=&r" (tmp)
+		: "i" (PSR_PIL), "r" (old_psr)
+		: "memory");
+}
+EXPORT_SYMBOL(arch_local_irq_restore);
+
+/*
+ * Dave Redman (djhr@tadpole.co.uk)
+ *
+ * IRQ numbers.. These are no longer restricted to 15..
+ *
+ * this is done to enable SBUS cards and onboard IO to be masked
+ * correctly. using the interrupt level isn't good enough.
+ *
+ * For example:
+ *   A device interrupting at sbus level6 and the Floppy both come in
+ *   at IRQ11, but enabling and disabling them requires writing to
+ *   different bits in the SLAVIO/SEC.
+ *
+ * As a result of these changes sun4m machines could now support
+ * directed CPU interrupts using the existing enable/disable irq code
+ * with tweaks.
+ *
+ * Sun4d complicates things even further.  IRQ numbers are arbitrary
+ * 32-bit values in that case.  Since this is similar to sparc64,
+ * we adopt a virtual IRQ numbering scheme as is done there.
+ * Virutal interrupt numbers are allocated by build_irq().  So NR_IRQS
+ * just becomes a limit of how many interrupt sources we can handle in
+ * a single system.  Even fully loaded SS2000 machines top off at
+ * about 32 interrupt sources or so, therefore a NR_IRQS value of 64
+ * is more than enough.
+  *
+ * We keep a map of per-PIL enable interrupts.  These get wired
+ * up via the irq_chip->startup() method which gets invoked by
+ * the generic IRQ layer during request_irq().
+ */
+
+
+/* Table of allocated irqs. Unused entries has irq == 0 */
+static struct irq_bucket irq_table[NR_IRQS];
+/* Protect access to irq_table */
+static DEFINE_SPINLOCK(irq_table_lock);
+
+/* Map between the irq identifier used in hw to the irq_bucket. */
+struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+/* Protect access to irq_map */
+static DEFINE_SPINLOCK(irq_map_lock);
+
+/* Allocate a new irq from the irq_table */
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil)
+{
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&irq_table_lock, flags);
+	for (i = 1; i < NR_IRQS; i++) {
+		if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil)
+			goto found;
+	}
+
+	for (i = 1; i < NR_IRQS; i++) {
+		if (!irq_table[i].irq)
+			break;
+	}
+
+	if (i < NR_IRQS) {
+		irq_table[i].real_irq = real_irq;
+		irq_table[i].irq = i;
+		irq_table[i].pil = pil;
+	} else {
+		printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+		i = 0;
+	}
+found:
+	spin_unlock_irqrestore(&irq_table_lock, flags);
+
+	return i;
+}
+
+/* Based on a single pil handler_irq may need to call several
+ * interrupt handlers. Use irq_map as entry to irq_table,
+ * and let each entry in irq_table point to the next entry.
+ */
+void irq_link(unsigned int irq)
+{
+	struct irq_bucket *p;
+	unsigned long flags;
+	unsigned int pil;
+
+	BUG_ON(irq >= NR_IRQS);
+
+	spin_lock_irqsave(&irq_map_lock, flags);
+
+	p = &irq_table[irq];
+	pil = p->pil;
+	BUG_ON(pil >= SUN4D_MAX_IRQ);
+	p->next = irq_map[pil];
+	irq_map[pil] = p;
+
+	spin_unlock_irqrestore(&irq_map_lock, flags);
+}
+
+void irq_unlink(unsigned int irq)
+{
+	struct irq_bucket *p, **pnext;
+	unsigned long flags;
+
+	BUG_ON(irq >= NR_IRQS);
+
+	spin_lock_irqsave(&irq_map_lock, flags);
+
+	p = &irq_table[irq];
+	BUG_ON(p->pil >= SUN4D_MAX_IRQ);
+	pnext = &irq_map[p->pil];
+	while (*pnext != p)
+		pnext = &(*pnext)->next;
+	*pnext = p->next;
+
+	spin_unlock_irqrestore(&irq_map_lock, flags);
+}
+
+
+/* /proc/interrupts printing */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	int j;
+
+#ifdef CONFIG_SMP
+	seq_printf(p, "RES: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_data(j).irq_resched_count);
+	seq_printf(p, "     IPI rescheduling interrupts\n");
+	seq_printf(p, "CAL: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_data(j).irq_call_count);
+	seq_printf(p, "     IPI function call interrupts\n");
+#endif
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_data(j).counter);
+	seq_printf(p, "     Non-maskable interrupts\n");
+	return 0;
+}
+
+void handler_irq(unsigned int pil, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+	struct irq_bucket *p;
+
+	BUG_ON(pil > 15);
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+
+	p = irq_map[pil];
+	while (p) {
+		struct irq_bucket *next = p->next;
+
+		generic_handle_irq(p->irq);
+		p = next;
+	}
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+#if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
+static unsigned int floppy_irq;
+
+int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler)
+{
+	unsigned int cpu_irq;
+	int err;
+
+
+	err = request_irq(irq, irq_handler, 0, "floppy", NULL);
+	if (err)
+		return -1;
+
+	/* Save for later use in floppy interrupt handler */
+	floppy_irq = irq;
+
+	cpu_irq = (irq & (NR_IRQS - 1));
+
+	/* Dork with trap table if we get this far. */
+#define INSTANTIATE(table) \
+	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \
+	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \
+		SPARC_BRANCH((unsigned long) floppy_hardint, \
+			     (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\
+	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \
+	table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP;
+
+	INSTANTIATE(sparc_ttable)
+
+#if defined CONFIG_SMP
+	if (sparc_cpu_model != sparc_leon) {
+		struct tt_entry *trap_table;
+
+		trap_table = &trapbase_cpu1;
+		INSTANTIATE(trap_table)
+		trap_table = &trapbase_cpu2;
+		INSTANTIATE(trap_table)
+		trap_table = &trapbase_cpu3;
+		INSTANTIATE(trap_table)
+	}
+#endif
+#undef INSTANTIATE
+	/*
+	 * XXX Correct thing whould be to flush only I- and D-cache lines
+	 * which contain the handler in question. But as of time of the
+	 * writing we have no CPU-neutral interface to fine-grained flushes.
+	 */
+	flush_cache_all();
+	return 0;
+}
+EXPORT_SYMBOL(sparc_floppy_request_irq);
+
+/*
+ * These variables are used to access state from the assembler
+ * interrupt handler, floppy_hardint, so we cannot put these in
+ * the floppy driver image because that would not work in the
+ * modular case.
+ */
+volatile unsigned char *fdc_status;
+EXPORT_SYMBOL(fdc_status);
+
+char *pdma_vaddr;
+EXPORT_SYMBOL(pdma_vaddr);
+
+unsigned long pdma_size;
+EXPORT_SYMBOL(pdma_size);
+
+volatile int doing_pdma;
+EXPORT_SYMBOL(doing_pdma);
+
+char *pdma_base;
+EXPORT_SYMBOL(pdma_base);
+
+unsigned long pdma_areasize;
+EXPORT_SYMBOL(pdma_areasize);
+
+/* Use the generic irq support to call floppy_interrupt
+ * which was setup using request_irq() in sparc_floppy_request_irq().
+ * We only have one floppy interrupt so we do not need to check
+ * for additional handlers being wired up by irq_link()
+ */
+void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+	generic_handle_irq(floppy_irq);
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+#endif
+
+/* djhr
+ * This could probably be made indirect too and assigned in the CPU
+ * bits of the code. That would be much nicer I think and would also
+ * fit in with the idea of being able to tune your kernel for your machine
+ * by removing unrequired machine and device support.
+ *
+ */
+
+void __init init_IRQ(void)
+{
+	switch (sparc_cpu_model) {
+	case sun4m:
+		pcic_probe();
+		if (pcic_present())
+			sun4m_pci_init_IRQ();
+		else
+			sun4m_init_IRQ();
+		break;
+
+	case sun4d:
+		sun4d_init_IRQ();
+		break;
+
+	case sparc_leon:
+		leon_init_IRQ();
+		break;
+
+	default:
+		prom_printf("Cannot initialize IRQs on this Sun machine...");
+		break;
+	}
+}
+
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
new file mode 100644
index 0000000..713670e
--- /dev/null
+++ b/arch/sparc/kernel/irq_64.c
@@ -0,0 +1,1156 @@
+// SPDX-License-Identifier: GPL-2.0
+/* irq.c: UltraSparc IRQ handling/init/registry.
+ *
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998  Eddie C. Dost    (ecd@skynet.be)
+ * Copyright (C) 1998  Jakub Jelinek    (jj@ultra.linux.cz)
+ */
+
+#include <linux/sched.h>
+#include <linux/linkage.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/ftrace.h>
+#include <linux/irq.h>
+
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <linux/atomic.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/upa.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/timer.h>
+#include <asm/smp.h>
+#include <asm/starfire.h>
+#include <linux/uaccess.h>
+#include <asm/cache.h>
+#include <asm/cpudata.h>
+#include <asm/auxio.h>
+#include <asm/head.h>
+#include <asm/hypervisor.h>
+#include <asm/cacheflush.h>
+
+#include "entry.h"
+#include "cpumap.h"
+#include "kstack.h"
+
+struct ino_bucket *ivector_table;
+unsigned long ivector_table_pa;
+
+/* On several sun4u processors, it is illegal to mix bypass and
+ * non-bypass accesses.  Therefore we access all INO buckets
+ * using bypass accesses only.
+ */
+static unsigned long bucket_get_chain_pa(unsigned long bucket_pa)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__("ldxa	[%1] %2, %0"
+			     : "=&r" (ret)
+			     : "r" (bucket_pa +
+				    offsetof(struct ino_bucket,
+					     __irq_chain_pa)),
+			       "i" (ASI_PHYS_USE_EC));
+
+	return ret;
+}
+
+static void bucket_clear_chain_pa(unsigned long bucket_pa)
+{
+	__asm__ __volatile__("stxa	%%g0, [%0] %1"
+			     : /* no outputs */
+			     : "r" (bucket_pa +
+				    offsetof(struct ino_bucket,
+					     __irq_chain_pa)),
+			       "i" (ASI_PHYS_USE_EC));
+}
+
+static unsigned int bucket_get_irq(unsigned long bucket_pa)
+{
+	unsigned int ret;
+
+	__asm__ __volatile__("lduwa	[%1] %2, %0"
+			     : "=&r" (ret)
+			     : "r" (bucket_pa +
+				    offsetof(struct ino_bucket,
+					     __irq)),
+			       "i" (ASI_PHYS_USE_EC));
+
+	return ret;
+}
+
+static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq)
+{
+	__asm__ __volatile__("stwa	%0, [%1] %2"
+			     : /* no outputs */
+			     : "r" (irq),
+			       "r" (bucket_pa +
+				    offsetof(struct ino_bucket,
+					     __irq)),
+			       "i" (ASI_PHYS_USE_EC));
+}
+
+#define irq_work_pa(__cpu)	&(trap_block[(__cpu)].irq_worklist_pa)
+
+static unsigned long hvirq_major __initdata;
+static int __init early_hvirq_major(char *p)
+{
+	int rc = kstrtoul(p, 10, &hvirq_major);
+
+	return rc;
+}
+early_param("hvirq", early_hvirq_major);
+
+static int hv_irq_version;
+
+/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie
+ * based interfaces, but:
+ *
+ * 1) Several OSs, Solaris and Linux included, use them even when only
+ *    negotiating version 1.0 (or failing to negotiate at all).  So the
+ *    hypervisor has a workaround that provides the VIRQ interfaces even
+ *    when only verion 1.0 of the API is in use.
+ *
+ * 2) Second, and more importantly, with major version 2.0 these VIRQ
+ *    interfaces only were actually hooked up for LDC interrupts, even
+ *    though the Hypervisor specification clearly stated:
+ *
+ *	The new interrupt API functions will be available to a guest
+ *	when it negotiates version 2.0 in the interrupt API group 0x2. When
+ *	a guest negotiates version 2.0, all interrupt sources will only
+ *	support using the cookie interface, and any attempt to use the
+ *	version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the
+ *	ENOTSUPPORTED error being returned.
+ *
+ *   with an emphasis on "all interrupt sources".
+ *
+ * To correct this, major version 3.0 was created which does actually
+ * support VIRQs for all interrupt sources (not just LDC devices).  So
+ * if we want to move completely over the cookie based VIRQs we must
+ * negotiate major version 3.0 or later of HV_GRP_INTR.
+ */
+static bool sun4v_cookie_only_virqs(void)
+{
+	if (hv_irq_version >= 3)
+		return true;
+	return false;
+}
+
+static void __init irq_init_hv(void)
+{
+	unsigned long hv_error, major, minor = 0;
+
+	if (tlb_type != hypervisor)
+		return;
+
+	if (hvirq_major)
+		major = hvirq_major;
+	else
+		major = 3;
+
+	hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor);
+	if (!hv_error)
+		hv_irq_version = major;
+	else
+		hv_irq_version = 1;
+
+	pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n",
+		hv_irq_version,
+		sun4v_cookie_only_virqs() ? "enabled" : "disabled");
+}
+
+/* This function is for the timer interrupt.*/
+int __init arch_probe_nr_irqs(void)
+{
+	return 1;
+}
+
+#define DEFAULT_NUM_IVECS	(0xfffU)
+static unsigned int nr_ivec = DEFAULT_NUM_IVECS;
+#define NUM_IVECS (nr_ivec)
+
+static unsigned int __init size_nr_ivec(void)
+{
+	if (tlb_type == hypervisor) {
+		switch (sun4v_chip_type) {
+		/* Athena's devhandle|devino is large.*/
+		case SUN4V_CHIP_SPARC64X:
+			nr_ivec = 0xffff;
+			break;
+		}
+	}
+	return nr_ivec;
+}
+
+struct irq_handler_data {
+	union {
+		struct {
+			unsigned int dev_handle;
+			unsigned int dev_ino;
+		};
+		unsigned long sysino;
+	};
+	struct ino_bucket bucket;
+	unsigned long	iclr;
+	unsigned long	imap;
+};
+
+static inline unsigned int irq_data_to_handle(struct irq_data *data)
+{
+	struct irq_handler_data *ihd = irq_data_get_irq_handler_data(data);
+
+	return ihd->dev_handle;
+}
+
+static inline unsigned int irq_data_to_ino(struct irq_data *data)
+{
+	struct irq_handler_data *ihd = irq_data_get_irq_handler_data(data);
+
+	return ihd->dev_ino;
+}
+
+static inline unsigned long irq_data_to_sysino(struct irq_data *data)
+{
+	struct irq_handler_data *ihd = irq_data_get_irq_handler_data(data);
+
+	return ihd->sysino;
+}
+
+void irq_free(unsigned int irq)
+{
+	void *data = irq_get_handler_data(irq);
+
+	kfree(data);
+	irq_set_handler_data(irq, NULL);
+	irq_free_descs(irq, 1);
+}
+
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
+{
+	int irq;
+
+	irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL, NULL);
+	if (irq <= 0)
+		goto out;
+
+	return irq;
+out:
+	return 0;
+}
+
+static unsigned int cookie_exists(u32 devhandle, unsigned int devino)
+{
+	unsigned long hv_err, cookie;
+	struct ino_bucket *bucket;
+	unsigned int irq = 0U;
+
+	hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie);
+	if (hv_err) {
+		pr_err("HV get cookie failed hv_err = %ld\n", hv_err);
+		goto out;
+	}
+
+	if (cookie & ((1UL << 63UL))) {
+		cookie = ~cookie;
+		bucket = (struct ino_bucket *) __va(cookie);
+		irq = bucket->__irq;
+	}
+out:
+	return irq;
+}
+
+static unsigned int sysino_exists(u32 devhandle, unsigned int devino)
+{
+	unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+	struct ino_bucket *bucket;
+	unsigned int irq;
+
+	bucket = &ivector_table[sysino];
+	irq = bucket_get_irq(__pa(bucket));
+
+	return irq;
+}
+
+void ack_bad_irq(unsigned int irq)
+{
+	pr_crit("BAD IRQ ack %d\n", irq);
+}
+
+void irq_install_pre_handler(int irq,
+			     void (*func)(unsigned int, void *, void *),
+			     void *arg1, void *arg2)
+{
+	pr_warn("IRQ pre handler NOT supported.\n");
+}
+
+/*
+ * /proc/interrupts printing:
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+	int j;
+
+	seq_printf(p, "NMI: ");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", cpu_data(j).__nmi_count);
+	seq_printf(p, "     Non-maskable interrupts\n");
+	return 0;
+}
+
+static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
+{
+	unsigned int tid;
+
+	if (this_is_starfire) {
+		tid = starfire_translate(imap, cpuid);
+		tid <<= IMAP_TID_SHIFT;
+		tid &= IMAP_TID_UPA;
+	} else {
+		if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+			unsigned long ver;
+
+			__asm__ ("rdpr %%ver, %0" : "=r" (ver));
+			if ((ver >> 32UL) == __JALAPENO_ID ||
+			    (ver >> 32UL) == __SERRANO_ID) {
+				tid = cpuid << IMAP_TID_SHIFT;
+				tid &= IMAP_TID_JBUS;
+			} else {
+				unsigned int a = cpuid & 0x1f;
+				unsigned int n = (cpuid >> 5) & 0x1f;
+
+				tid = ((a << IMAP_AID_SHIFT) |
+				       (n << IMAP_NID_SHIFT));
+				tid &= (IMAP_AID_SAFARI |
+					IMAP_NID_SAFARI);
+			}
+		} else {
+			tid = cpuid << IMAP_TID_SHIFT;
+			tid &= IMAP_TID_UPA;
+		}
+	}
+
+	return tid;
+}
+
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
+{
+	cpumask_t mask;
+	int cpuid;
+
+	cpumask_copy(&mask, affinity);
+	if (cpumask_equal(&mask, cpu_online_mask)) {
+		cpuid = map_to_cpu(irq);
+	} else {
+		cpumask_t tmp;
+
+		cpumask_and(&tmp, cpu_online_mask, &mask);
+		cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
+	}
+
+	return cpuid;
+}
+#else
+#define irq_choose_cpu(irq, affinity)	\
+	real_hard_smp_processor_id()
+#endif
+
+static void sun4u_irq_enable(struct irq_data *data)
+{
+	struct irq_handler_data *handler_data;
+
+	handler_data = irq_data_get_irq_handler_data(data);
+	if (likely(handler_data)) {
+		unsigned long cpuid, imap, val;
+		unsigned int tid;
+
+		cpuid = irq_choose_cpu(data->irq,
+				       irq_data_get_affinity_mask(data));
+		imap = handler_data->imap;
+
+		tid = sun4u_compute_tid(imap, cpuid);
+
+		val = upa_readq(imap);
+		val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
+			 IMAP_AID_SAFARI | IMAP_NID_SAFARI);
+		val |= tid | IMAP_VALID;
+		upa_writeq(val, imap);
+		upa_writeq(ICLR_IDLE, handler_data->iclr);
+	}
+}
+
+static int sun4u_set_affinity(struct irq_data *data,
+			       const struct cpumask *mask, bool force)
+{
+	struct irq_handler_data *handler_data;
+
+	handler_data = irq_data_get_irq_handler_data(data);
+	if (likely(handler_data)) {
+		unsigned long cpuid, imap, val;
+		unsigned int tid;
+
+		cpuid = irq_choose_cpu(data->irq, mask);
+		imap = handler_data->imap;
+
+		tid = sun4u_compute_tid(imap, cpuid);
+
+		val = upa_readq(imap);
+		val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
+			 IMAP_AID_SAFARI | IMAP_NID_SAFARI);
+		val |= tid | IMAP_VALID;
+		upa_writeq(val, imap);
+		upa_writeq(ICLR_IDLE, handler_data->iclr);
+	}
+
+	return 0;
+}
+
+/* Don't do anything.  The desc->status check for IRQ_DISABLED in
+ * handler_irq() will skip the handler call and that will leave the
+ * interrupt in the sent state.  The next ->enable() call will hit the
+ * ICLR register to reset the state machine.
+ *
+ * This scheme is necessary, instead of clearing the Valid bit in the
+ * IMAP register, to handle the case of IMAP registers being shared by
+ * multiple INOs (and thus ICLR registers).  Since we use a different
+ * virtual IRQ for each shared IMAP instance, the generic code thinks
+ * there is only one user so it prematurely calls ->disable() on
+ * free_irq().
+ *
+ * We have to provide an explicit ->disable() method instead of using
+ * NULL to get the default.  The reason is that if the generic code
+ * sees that, it also hooks up a default ->shutdown method which
+ * invokes ->mask() which we do not want.  See irq_chip_set_defaults().
+ */
+static void sun4u_irq_disable(struct irq_data *data)
+{
+}
+
+static void sun4u_irq_eoi(struct irq_data *data)
+{
+	struct irq_handler_data *handler_data;
+
+	handler_data = irq_data_get_irq_handler_data(data);
+	if (likely(handler_data))
+		upa_writeq(ICLR_IDLE, handler_data->iclr);
+}
+
+static void sun4v_irq_enable(struct irq_data *data)
+{
+	unsigned long cpuid = irq_choose_cpu(data->irq,
+					     irq_data_get_affinity_mask(data));
+	unsigned int ino = irq_data_to_sysino(data);
+	int err;
+
+	err = sun4v_intr_settarget(ino, cpuid);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+		       "err(%d)\n", ino, cpuid, err);
+	err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_intr_setstate(%x): "
+		       "err(%d)\n", ino, err);
+	err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n",
+		       ino, err);
+}
+
+static int sun4v_set_affinity(struct irq_data *data,
+			       const struct cpumask *mask, bool force)
+{
+	unsigned long cpuid = irq_choose_cpu(data->irq, mask);
+	unsigned int ino = irq_data_to_sysino(data);
+	int err;
+
+	err = sun4v_intr_settarget(ino, cpuid);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
+		       "err(%d)\n", ino, cpuid, err);
+
+	return 0;
+}
+
+static void sun4v_irq_disable(struct irq_data *data)
+{
+	unsigned int ino = irq_data_to_sysino(data);
+	int err;
+
+	err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_intr_setenabled(%x): "
+		       "err(%d)\n", ino, err);
+}
+
+static void sun4v_irq_eoi(struct irq_data *data)
+{
+	unsigned int ino = irq_data_to_sysino(data);
+	int err;
+
+	err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_intr_setstate(%x): "
+		       "err(%d)\n", ino, err);
+}
+
+static void sun4v_virq_enable(struct irq_data *data)
+{
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
+	unsigned long cpuid;
+	int err;
+
+	cpuid = irq_choose_cpu(data->irq, irq_data_get_affinity_mask(data));
+
+	err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+		       "err(%d)\n",
+		       dev_handle, dev_ino, cpuid, err);
+	err = sun4v_vintr_set_state(dev_handle, dev_ino,
+				    HV_INTR_STATE_IDLE);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+		       "HV_INTR_STATE_IDLE): err(%d)\n",
+		       dev_handle, dev_ino, err);
+	err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+				    HV_INTR_ENABLED);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+		       "HV_INTR_ENABLED): err(%d)\n",
+		       dev_handle, dev_ino, err);
+}
+
+static int sun4v_virt_set_affinity(struct irq_data *data,
+				    const struct cpumask *mask, bool force)
+{
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
+	unsigned long cpuid;
+	int err;
+
+	cpuid = irq_choose_cpu(data->irq, mask);
+
+	err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
+		       "err(%d)\n",
+		       dev_handle, dev_ino, cpuid, err);
+
+	return 0;
+}
+
+static void sun4v_virq_disable(struct irq_data *data)
+{
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
+	int err;
+
+
+	err = sun4v_vintr_set_valid(dev_handle, dev_ino,
+				    HV_INTR_DISABLED);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+		       "HV_INTR_DISABLED): err(%d)\n",
+		       dev_handle, dev_ino, err);
+}
+
+static void sun4v_virq_eoi(struct irq_data *data)
+{
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
+	int err;
+
+	err = sun4v_vintr_set_state(dev_handle, dev_ino,
+				    HV_INTR_STATE_IDLE);
+	if (err != HV_EOK)
+		printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
+		       "HV_INTR_STATE_IDLE): err(%d)\n",
+		       dev_handle, dev_ino, err);
+}
+
+static struct irq_chip sun4u_irq = {
+	.name			= "sun4u",
+	.irq_enable		= sun4u_irq_enable,
+	.irq_disable		= sun4u_irq_disable,
+	.irq_eoi		= sun4u_irq_eoi,
+	.irq_set_affinity	= sun4u_set_affinity,
+	.flags			= IRQCHIP_EOI_IF_HANDLED,
+};
+
+static struct irq_chip sun4v_irq = {
+	.name			= "sun4v",
+	.irq_enable		= sun4v_irq_enable,
+	.irq_disable		= sun4v_irq_disable,
+	.irq_eoi		= sun4v_irq_eoi,
+	.irq_set_affinity	= sun4v_set_affinity,
+	.flags			= IRQCHIP_EOI_IF_HANDLED,
+};
+
+static struct irq_chip sun4v_virq = {
+	.name			= "vsun4v",
+	.irq_enable		= sun4v_virq_enable,
+	.irq_disable		= sun4v_virq_disable,
+	.irq_eoi		= sun4v_virq_eoi,
+	.irq_set_affinity	= sun4v_virt_set_affinity,
+	.flags			= IRQCHIP_EOI_IF_HANDLED,
+};
+
+unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
+{
+	struct irq_handler_data *handler_data;
+	struct ino_bucket *bucket;
+	unsigned int irq;
+	int ino;
+
+	BUG_ON(tlb_type == hypervisor);
+
+	ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
+	bucket = &ivector_table[ino];
+	irq = bucket_get_irq(__pa(bucket));
+	if (!irq) {
+		irq = irq_alloc(0, ino);
+		bucket_set_irq(__pa(bucket), irq);
+		irq_set_chip_and_handler_name(irq, &sun4u_irq,
+					      handle_fasteoi_irq, "IVEC");
+	}
+
+	handler_data = irq_get_handler_data(irq);
+	if (unlikely(handler_data))
+		goto out;
+
+	handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+	if (unlikely(!handler_data)) {
+		prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
+		prom_halt();
+	}
+	irq_set_handler_data(irq, handler_data);
+
+	handler_data->imap  = imap;
+	handler_data->iclr  = iclr;
+
+out:
+	return irq;
+}
+
+static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino,
+		void (*handler_data_init)(struct irq_handler_data *data,
+		u32 devhandle, unsigned int devino),
+		struct irq_chip *chip)
+{
+	struct irq_handler_data *data;
+	unsigned int irq;
+
+	irq = irq_alloc(devhandle, devino);
+	if (!irq)
+		goto out;
+
+	data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+	if (unlikely(!data)) {
+		pr_err("IRQ handler data allocation failed.\n");
+		irq_free(irq);
+		irq = 0;
+		goto out;
+	}
+
+	irq_set_handler_data(irq, data);
+	handler_data_init(data, devhandle, devino);
+	irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC");
+	data->imap = ~0UL;
+	data->iclr = ~0UL;
+out:
+	return irq;
+}
+
+static unsigned long cookie_assign(unsigned int irq, u32 devhandle,
+		unsigned int devino)
+{
+	struct irq_handler_data *ihd = irq_get_handler_data(irq);
+	unsigned long hv_error, cookie;
+
+	/* handler_irq needs to find the irq. cookie is seen signed in
+	 * sun4v_dev_mondo and treated as a non ivector_table delivery.
+	 */
+	ihd->bucket.__irq = irq;
+	cookie = ~__pa(&ihd->bucket);
+
+	hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+	if (hv_error)
+		pr_err("HV vintr set cookie failed = %ld\n", hv_error);
+
+	return hv_error;
+}
+
+static void cookie_handler_data(struct irq_handler_data *data,
+				u32 devhandle, unsigned int devino)
+{
+	data->dev_handle = devhandle;
+	data->dev_ino = devino;
+}
+
+static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino,
+				     struct irq_chip *chip)
+{
+	unsigned long hv_error;
+	unsigned int irq;
+
+	irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip);
+
+	hv_error = cookie_assign(irq, devhandle, devino);
+	if (hv_error) {
+		irq_free(irq);
+		irq = 0;
+	}
+
+	return irq;
+}
+
+static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino)
+{
+	unsigned int irq;
+
+	irq = cookie_exists(devhandle, devino);
+	if (irq)
+		goto out;
+
+	irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
+
+out:
+	return irq;
+}
+
+static void sysino_set_bucket(unsigned int irq)
+{
+	struct irq_handler_data *ihd = irq_get_handler_data(irq);
+	struct ino_bucket *bucket;
+	unsigned long sysino;
+
+	sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino);
+	BUG_ON(sysino >= nr_ivec);
+	bucket = &ivector_table[sysino];
+	bucket_set_irq(__pa(bucket), irq);
+}
+
+static void sysino_handler_data(struct irq_handler_data *data,
+				u32 devhandle, unsigned int devino)
+{
+	unsigned long sysino;
+
+	sysino = sun4v_devino_to_sysino(devhandle, devino);
+	data->sysino = sysino;
+}
+
+static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino,
+				     struct irq_chip *chip)
+{
+	unsigned int irq;
+
+	irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip);
+	if (!irq)
+		goto out;
+
+	sysino_set_bucket(irq);
+out:
+	return irq;
+}
+
+static int sun4v_build_sysino(u32 devhandle, unsigned int devino)
+{
+	int irq;
+
+	irq = sysino_exists(devhandle, devino);
+	if (irq)
+		goto out;
+
+	irq = sysino_build_irq(devhandle, devino, &sun4v_irq);
+out:
+	return irq;
+}
+
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
+{
+	unsigned int irq;
+
+	if (sun4v_cookie_only_virqs())
+		irq = sun4v_build_cookie(devhandle, devino);
+	else
+		irq = sun4v_build_sysino(devhandle, devino);
+
+	return irq;
+}
+
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
+{
+	int irq;
+
+	irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
+	if (!irq)
+		goto out;
+
+	/* This is borrowed from the original function.
+	 */
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+
+out:
+	return irq;
+}
+
+void *hardirq_stack[NR_CPUS];
+void *softirq_stack[NR_CPUS];
+
+void __irq_entry handler_irq(int pil, struct pt_regs *regs)
+{
+	unsigned long pstate, bucket_pa;
+	struct pt_regs *old_regs;
+	void *orig_sp;
+
+	clear_softint(1 << pil);
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+
+	/* Grab an atomic snapshot of the pending IVECs.  */
+	__asm__ __volatile__("rdpr	%%pstate, %0\n\t"
+			     "wrpr	%0, %3, %%pstate\n\t"
+			     "ldx	[%2], %1\n\t"
+			     "stx	%%g0, [%2]\n\t"
+			     "wrpr	%0, 0x0, %%pstate\n\t"
+			     : "=&r" (pstate), "=&r" (bucket_pa)
+			     : "r" (irq_work_pa(smp_processor_id())),
+			       "i" (PSTATE_IE)
+			     : "memory");
+
+	orig_sp = set_hardirq_stack();
+
+	while (bucket_pa) {
+		unsigned long next_pa;
+		unsigned int irq;
+
+		next_pa = bucket_get_chain_pa(bucket_pa);
+		irq = bucket_get_irq(bucket_pa);
+		bucket_clear_chain_pa(bucket_pa);
+
+		generic_handle_irq(irq);
+
+		bucket_pa = next_pa;
+	}
+
+	restore_hardirq_stack(orig_sp);
+
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+void do_softirq_own_stack(void)
+{
+	void *orig_sp, *sp = softirq_stack[smp_processor_id()];
+
+	sp += THREAD_SIZE - 192 - STACK_BIAS;
+
+	__asm__ __volatile__("mov %%sp, %0\n\t"
+			     "mov %1, %%sp"
+			     : "=&r" (orig_sp)
+			     : "r" (sp));
+	__do_softirq();
+	__asm__ __volatile__("mov %0, %%sp"
+			     : : "r" (orig_sp));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void fixup_irqs(void)
+{
+	unsigned int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		struct irq_desc *desc = irq_to_desc(irq);
+		struct irq_data *data;
+		unsigned long flags;
+
+		if (!desc)
+			continue;
+		data = irq_desc_get_irq_data(desc);
+		raw_spin_lock_irqsave(&desc->lock, flags);
+		if (desc->action && !irqd_is_per_cpu(data)) {
+			if (data->chip->irq_set_affinity)
+				data->chip->irq_set_affinity(data,
+					irq_data_get_affinity_mask(data),
+					false);
+		}
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+	}
+
+	tick_ops->disable_irq();
+}
+#endif
+
+struct sun5_timer {
+	u64	count0;
+	u64	limit0;
+	u64	count1;
+	u64	limit1;
+};
+
+static struct sun5_timer *prom_timers;
+static u64 prom_limit0, prom_limit1;
+
+static void map_prom_timers(void)
+{
+	struct device_node *dp;
+	const unsigned int *addr;
+
+	/* PROM timer node hangs out in the top level of device siblings... */
+	dp = of_find_node_by_path("/");
+	dp = dp->child;
+	while (dp) {
+		if (!strcmp(dp->name, "counter-timer"))
+			break;
+		dp = dp->sibling;
+	}
+
+	/* Assume if node is not present, PROM uses different tick mechanism
+	 * which we should not care about.
+	 */
+	if (!dp) {
+		prom_timers = (struct sun5_timer *) 0;
+		return;
+	}
+
+	/* If PROM is really using this, it must be mapped by him. */
+	addr = of_get_property(dp, "address", NULL);
+	if (!addr) {
+		prom_printf("PROM does not have timer mapped, trying to continue.\n");
+		prom_timers = (struct sun5_timer *) 0;
+		return;
+	}
+	prom_timers = (struct sun5_timer *) ((unsigned long)addr[0]);
+}
+
+static void kill_prom_timer(void)
+{
+	if (!prom_timers)
+		return;
+
+	/* Save them away for later. */
+	prom_limit0 = prom_timers->limit0;
+	prom_limit1 = prom_timers->limit1;
+
+	/* Just as in sun4c PROM uses timer which ticks at IRQ 14.
+	 * We turn both off here just to be paranoid.
+	 */
+	prom_timers->limit0 = 0;
+	prom_timers->limit1 = 0;
+
+	/* Wheee, eat the interrupt packet too... */
+	__asm__ __volatile__(
+"	mov	0x40, %%g2\n"
+"	ldxa	[%%g0] %0, %%g1\n"
+"	ldxa	[%%g2] %1, %%g1\n"
+"	stxa	%%g0, [%%g0] %0\n"
+"	membar	#Sync\n"
+	: /* no outputs */
+	: "i" (ASI_INTR_RECEIVE), "i" (ASI_INTR_R)
+	: "g1", "g2");
+}
+
+void notrace init_irqwork_curcpu(void)
+{
+	int cpu = hard_smp_processor_id();
+
+	trap_block[cpu].irq_worklist_pa = 0UL;
+}
+
+/* Please be very careful with register_one_mondo() and
+ * sun4v_register_mondo_queues().
+ *
+ * On SMP this gets invoked from the CPU trampoline before
+ * the cpu has fully taken over the trap table from OBP,
+ * and it's kernel stack + %g6 thread register state is
+ * not fully cooked yet.
+ *
+ * Therefore you cannot make any OBP calls, not even prom_printf,
+ * from these two routines.
+ */
+static void notrace register_one_mondo(unsigned long paddr, unsigned long type,
+				       unsigned long qmask)
+{
+	unsigned long num_entries = (qmask + 1) / 64;
+	unsigned long status;
+
+	status = sun4v_cpu_qconf(type, paddr, num_entries);
+	if (status != HV_EOK) {
+		prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, "
+			    "err %lu\n", type, paddr, num_entries, status);
+		prom_halt();
+	}
+}
+
+void notrace sun4v_register_mondo_queues(int this_cpu)
+{
+	struct trap_per_cpu *tb = &trap_block[this_cpu];
+
+	register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO,
+			   tb->cpu_mondo_qmask);
+	register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO,
+			   tb->dev_mondo_qmask);
+	register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR,
+			   tb->resum_qmask);
+	register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR,
+			   tb->nonresum_qmask);
+}
+
+/* Each queue region must be a power of 2 multiple of 64 bytes in
+ * size.  The base real address must be aligned to the size of the
+ * region.  Thus, an 8KB queue must be 8KB aligned, for example.
+ */
+static void __init alloc_one_queue(unsigned long *pa_ptr, unsigned long qmask)
+{
+	unsigned long size = PAGE_ALIGN(qmask + 1);
+	unsigned long order = get_order(size);
+	unsigned long p;
+
+	p = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!p) {
+		prom_printf("SUN4V: Error, cannot allocate queue.\n");
+		prom_halt();
+	}
+
+	*pa_ptr = __pa(p);
+}
+
+static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
+{
+#ifdef CONFIG_SMP
+	unsigned long page;
+	void *mondo, *p;
+
+	BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE);
+
+	/* Make sure mondo block is 64byte aligned */
+	p = kzalloc(127, GFP_KERNEL);
+	if (!p) {
+		prom_printf("SUN4V: Error, cannot allocate mondo block.\n");
+		prom_halt();
+	}
+	mondo = (void *)(((unsigned long)p + 63) & ~0x3f);
+	tb->cpu_mondo_block_pa = __pa(mondo);
+
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page) {
+		prom_printf("SUN4V: Error, cannot allocate cpu list page.\n");
+		prom_halt();
+	}
+
+	tb->cpu_list_pa = __pa(page);
+#endif
+}
+
+/* Allocate mondo and error queues for all possible cpus.  */
+static void __init sun4v_init_mondo_queues(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct trap_per_cpu *tb = &trap_block[cpu];
+
+		alloc_one_queue(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
+		alloc_one_queue(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
+		alloc_one_queue(&tb->resum_mondo_pa, tb->resum_qmask);
+		alloc_one_queue(&tb->resum_kernel_buf_pa, tb->resum_qmask);
+		alloc_one_queue(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
+		alloc_one_queue(&tb->nonresum_kernel_buf_pa,
+				tb->nonresum_qmask);
+	}
+}
+
+static void __init init_send_mondo_info(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct trap_per_cpu *tb = &trap_block[cpu];
+
+		init_cpu_send_mondo_info(tb);
+	}
+}
+
+static struct irqaction timer_irq_action = {
+	.name = "timer",
+};
+
+static void __init irq_ivector_init(void)
+{
+	unsigned long size, order;
+	unsigned int ivecs;
+
+	/* If we are doing cookie only VIRQs then we do not need the ivector
+	 * table to process interrupts.
+	 */
+	if (sun4v_cookie_only_virqs())
+		return;
+
+	ivecs = size_nr_ivec();
+	size = sizeof(struct ino_bucket) * ivecs;
+	order = get_order(size);
+	ivector_table = (struct ino_bucket *)
+		__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!ivector_table) {
+		prom_printf("Fatal error, cannot allocate ivector_table\n");
+		prom_halt();
+	}
+	__flush_dcache_range((unsigned long) ivector_table,
+			     ((unsigned long) ivector_table) + size);
+
+	ivector_table_pa = __pa(ivector_table);
+}
+
+/* Only invoked on boot processor.*/
+void __init init_IRQ(void)
+{
+	irq_init_hv();
+	irq_ivector_init();
+	map_prom_timers();
+	kill_prom_timer();
+
+	if (tlb_type == hypervisor)
+		sun4v_init_mondo_queues();
+
+	init_send_mondo_info();
+
+	if (tlb_type == hypervisor) {
+		/* Load up the boot cpu's entries.  */
+		sun4v_register_mondo_queues(hard_smp_processor_id());
+	}
+
+	/* We need to clear any IRQ's pending in the soft interrupt
+	 * registers, a spurious one could be left around from the
+	 * PROM timer which we just disabled.
+	 */
+	clear_softint(get_softint());
+
+	/* Now that ivector table is initialized, it is safe
+	 * to receive IRQ vector traps.  We will normally take
+	 * one or two right now, in case some device PROM used
+	 * to boot us wants to speak to us.  We just ignore them.
+	 */
+	__asm__ __volatile__("rdpr	%%pstate, %%g1\n\t"
+			     "or	%%g1, %0, %%g1\n\t"
+			     "wrpr	%%g1, 0x0, %%pstate"
+			     : /* No outputs */
+			     : "i" (PSTATE_IE)
+			     : "g1");
+
+	irq_to_desc(0)->action = &timer_irq_action;
+}
diff --git a/arch/sparc/kernel/itlb_miss.S b/arch/sparc/kernel/itlb_miss.S
new file mode 100644
index 0000000..5a5d924
--- /dev/null
+++ b/arch/sparc/kernel/itlb_miss.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ITLB ** ICACHE line 1: Context 0 check and TSB load	*/
+	ldxa	[%g0] ASI_IMMU_TSB_8KB_PTR, %g1	! Get TSB 8K pointer
+	ldxa	[%g0] ASI_IMMU, %g6		! Get TAG TARGET
+	srlx	%g6, 48, %g5			! Get context
+	sllx	%g6, 22, %g6			! Zero out context
+	brz,pn	%g5, kvmap_itlb			! Context 0 processing
+	 srlx	%g6, 22, %g6			! Delay slot
+	TSB_LOAD_QUAD(%g1, %g4)			! Load TSB entry
+	cmp	%g4, %g6			! Compare TAG
+
+/* ITLB ** ICACHE line 2: TSB compare and TLB load	*/
+	bne,pn	%xcc, tsb_miss_itlb		! Miss
+	 mov	FAULT_CODE_ITLB, %g3
+	sethi	%hi(_PAGE_EXEC_4U), %g4
+	andcc	%g5, %g4, %g0			! Executable?
+	be,pn	%xcc, tsb_do_fault
+	 nop					! Delay slot, fill me
+	stxa	%g5, [%g0] ASI_ITLB_DATA_IN	! Load TLB
+	retry					! Trap done
+
+/* ITLB ** ICACHE line 3: 				*/
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+/* ITLB ** ICACHE line 4: 				*/
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
diff --git a/arch/sparc/kernel/ivec.S b/arch/sparc/kernel/ivec.S
new file mode 100644
index 0000000..94ba2c3
--- /dev/null
+++ b/arch/sparc/kernel/ivec.S
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	/* The registers for cross calls will be:
+	 *
+	 * DATA 0: [low 32-bits]  Address of function to call, jmp to this
+	 *         [high 32-bits] MMU Context Argument 0, place in %g5
+	 * DATA 1: Address Argument 1, place in %g1
+	 * DATA 2: Address Argument 2, place in %g7
+	 *
+	 * With this method we can do most of the cross-call tlb/cache
+	 * flushing very quickly.
+	 */
+	.align		32
+	.globl		do_ivec
+	.type		do_ivec,#function
+do_ivec:
+	mov		0x40, %g3
+	ldxa		[%g3 + %g0] ASI_INTR_R, %g3
+	sethi		%hi(KERNBASE), %g4
+	cmp		%g3, %g4
+	bgeu,pn		%xcc, do_ivec_xcall
+	 srlx		%g3, 32, %g5
+	stxa		%g0, [%g0] ASI_INTR_RECEIVE
+	membar		#Sync
+
+	sethi		%hi(ivector_table_pa), %g2
+	ldx		[%g2 + %lo(ivector_table_pa)], %g2
+	sllx		%g3, 4, %g3
+	add		%g2, %g3, %g3
+
+	TRAP_LOAD_IRQ_WORK_PA(%g6, %g1)
+
+	ldx		[%g6], %g5
+	stxa		%g5, [%g3] ASI_PHYS_USE_EC
+	stx		%g3, [%g6]
+	wr		%g0, 1 << PIL_DEVICE_IRQ, %set_softint
+	retry
+do_ivec_xcall:
+	mov		0x50, %g1
+	ldxa		[%g1 + %g0] ASI_INTR_R, %g1
+	srl		%g3, 0, %g3
+
+	mov		0x60, %g7
+	ldxa		[%g7 + %g0] ASI_INTR_R, %g7
+	stxa		%g0, [%g0] ASI_INTR_RECEIVE
+	membar		#Sync
+	ba,pt		%xcc, 1f
+	 nop
+
+	.align		32
+1:	jmpl		%g3, %g0
+	 nop
+	.size		do_ivec,.-do_ivec
diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c
new file mode 100644
index 0000000..7f8eac5
--- /dev/null
+++ b/arch/sparc/kernel/jump_label.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+
+#include <linux/jump_label.h>
+#include <linux/memory.h>
+
+#include <asm/cacheflush.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	u32 *insn = (u32 *) (unsigned long) entry->code;
+	u32 val;
+
+	if (type == JUMP_LABEL_JMP) {
+		s32 off = (s32)entry->target - (s32)entry->code;
+		bool use_v9_branch = false;
+
+		BUG_ON(off & 3);
+
+#ifdef CONFIG_SPARC64
+		if (off <= 0xfffff && off >= -0x100000)
+			use_v9_branch = true;
+#endif
+		if (use_v9_branch) {
+			/* WDISP19 - target is . + immed << 2 */
+			/* ba,pt %xcc, . + off */
+			val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
+		} else {
+			/* WDISP22 - target is . + immed << 2 */
+			BUG_ON(off > 0x7fffff);
+			BUG_ON(off < -0x800000);
+			/* ba . + off */
+			val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
+		}
+	} else {
+		val = 0x01000000;
+	}
+
+	mutex_lock(&text_mutex);
+	*insn = val;
+	flushi(insn);
+	mutex_unlock(&text_mutex);
+}
+
+#endif
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
new file mode 100644
index 0000000..ddffd36
--- /dev/null
+++ b/arch/sparc/kernel/kernel.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SPARC_KERNEL_H
+#define __SPARC_KERNEL_H
+
+#include <linux/interrupt.h>
+#include <linux/ftrace.h>
+
+#include <asm/traps.h>
+#include <asm/head.h>
+#include <asm/io.h>
+
+/* cpu.c */
+extern const char *sparc_pmu_type;
+extern unsigned int fsr_storage;
+extern int ncpus_probed;
+
+#ifdef CONFIG_SPARC64
+/* setup_64.c */
+struct seq_file;
+void cpucap_info(struct seq_file *);
+
+static inline unsigned long kimage_addr_to_ra(const void *p)
+{
+	unsigned long val = (unsigned long) p;
+
+	return kern_base + (val - KERNBASE);
+}
+
+/* sys_sparc_64.c */
+asmlinkage long sys_kern_features(void);
+
+/* unaligned_64.c */
+asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn);
+int handle_popc(u32 insn, struct pt_regs *regs);
+void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr);
+void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr);
+
+/* smp_64.c */
+void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
+void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs);
+void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
+void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
+
+/* kgdb_64.c */
+void __irq_entry smp_kgdb_capture_client(int irq, struct pt_regs *regs);
+
+/* pci.c */
+int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask);
+
+/* signal32.c */
+void do_sigreturn32(struct pt_regs *regs);
+asmlinkage void do_rt_sigreturn32(struct pt_regs *regs);
+void do_signal32(struct pt_regs * regs);
+asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp);
+
+/* time_64.c */
+void __init time_init_early(void);
+
+/* compat_audit.c */
+extern unsigned int sparc32_dir_class[];
+extern unsigned int sparc32_chattr_class[];
+extern unsigned int sparc32_write_class[];
+extern unsigned int sparc32_read_class[];
+extern unsigned int sparc32_signal_class[];
+int sparc32_classify_syscall(unsigned int syscall);
+#endif
+
+#ifdef CONFIG_SPARC32
+/* setup_32.c */
+struct linux_romvec;
+void sparc32_start_kernel(struct linux_romvec *rp);
+
+/* cpu.c */
+void cpu_probe(void);
+
+/* traps_32.c */
+void handle_hw_divzero(struct pt_regs *regs, unsigned long pc,
+                       unsigned long npc, unsigned long psr);
+/* irq_32.c */
+extern struct irqaction static_irqaction[];
+extern int static_irq_count;
+extern spinlock_t irq_action_lock;
+
+void unexpected_irq(int irq, void *dev_id, struct pt_regs * regs);
+void init_IRQ(void);
+
+/* sun4m_irq.c */
+void sun4m_init_IRQ(void);
+void sun4m_unmask_profile_irq(void);
+void sun4m_clear_profile_irq(int cpu);
+
+/* sun4m_smp.c */
+void sun4m_cpu_pre_starting(void *arg);
+void sun4m_cpu_pre_online(void *arg);
+void __init smp4m_boot_cpus(void);
+int smp4m_boot_one_cpu(int i, struct task_struct *idle);
+void __init smp4m_smp_done(void);
+void smp4m_cross_call_irq(void);
+void smp4m_percpu_timer_interrupt(struct pt_regs *regs);
+
+/* sun4d_irq.c */
+extern spinlock_t sun4d_imsk_lock;
+
+void sun4d_init_IRQ(void);
+int sun4d_request_irq(unsigned int irq,
+                      irq_handler_t handler,
+                      unsigned long irqflags,
+                      const char *devname, void *dev_id);
+int show_sun4d_interrupts(struct seq_file *, void *);
+void sun4d_distribute_irqs(void);
+void sun4d_free_irq(unsigned int irq, void *dev_id);
+
+/* sun4d_smp.c */
+void sun4d_cpu_pre_starting(void *arg);
+void sun4d_cpu_pre_online(void *arg);
+void __init smp4d_boot_cpus(void);
+int smp4d_boot_one_cpu(int i, struct task_struct *idle);
+void __init smp4d_smp_done(void);
+void smp4d_cross_call_irq(void);
+void smp4d_percpu_timer_interrupt(struct pt_regs *regs);
+
+/* leon_smp.c */
+void leon_cpu_pre_starting(void *arg);
+void leon_cpu_pre_online(void *arg);
+void leonsmp_ipi_interrupt(void);
+void leon_cross_call_irq(void);
+
+/* head_32.S */
+extern unsigned int t_nmi[];
+extern unsigned int linux_trap_ipi15_sun4d[];
+extern unsigned int linux_trap_ipi15_sun4m[];
+
+extern struct tt_entry trapbase;
+extern struct tt_entry trapbase_cpu1;
+extern struct tt_entry trapbase_cpu2;
+extern struct tt_entry trapbase_cpu3;
+
+extern char cputypval[];
+
+/* entry.S */
+extern unsigned long lvl14_save[4];
+extern unsigned int real_irq_entry[];
+extern unsigned int smp4d_ticker[];
+extern unsigned int patchme_maybe_smp_msg[];
+
+void floppy_hardint(void);
+
+/* trampoline_32.S */
+extern unsigned long sun4m_cpu_startup;
+extern unsigned long sun4d_cpu_startup;
+
+/* process_32.c */
+asmlinkage int sparc_do_fork(unsigned long clone_flags,
+                             unsigned long stack_start,
+                             struct pt_regs *regs,
+                             unsigned long stack_size);
+
+/* signal_32.c */
+asmlinkage void do_sigreturn(struct pt_regs *regs);
+asmlinkage void do_rt_sigreturn(struct pt_regs *regs);
+void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
+                      unsigned long thread_info_flags);
+asmlinkage int do_sys_sigstack(struct sigstack __user *ssptr,
+                               struct sigstack __user *ossptr,
+                               unsigned long sp);
+
+/* ptrace_32.c */
+asmlinkage int syscall_trace(struct pt_regs *regs, int syscall_exit_p);
+
+/* unaligned_32.c */
+asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn);
+asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn);
+
+/* windows.c */
+void try_to_clear_window_buffer(struct pt_regs *regs, int who);
+
+/* auxio_32.c */
+void __init auxio_probe(void);
+void __init auxio_power_probe(void);
+
+/* pcic.c */
+extern void __iomem *pcic_regs;
+void pcic_nmi(unsigned int pend, struct pt_regs *regs);
+
+/* time_32.c */
+void __init time_init(void);
+
+#else /* CONFIG_SPARC32 */
+#endif /* CONFIG_SPARC32 */
+#endif /* !(__SPARC_KERNEL_H) */
diff --git a/arch/sparc/kernel/kgdb_32.c b/arch/sparc/kernel/kgdb_32.c
new file mode 100644
index 0000000..639c8e5
--- /dev/null
+++ b/arch/sparc/kernel/kgdb_32.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/* kgdb.c: KGDB support for 32-bit sparc.
+ *
+ * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kgdb.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+
+#include <asm/kdebug.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+#include <asm/cacheflush.h>
+
+#include "kernel.h"
+#include "entry.h"
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	struct reg_window32 *win;
+	int i;
+
+	gdb_regs[GDB_G0] = 0;
+	for (i = 0; i < 15; i++)
+		gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i];
+
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_L0 + i] = win->locals[i];
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_I0 + i] = win->ins[i];
+
+	for (i = GDB_F0; i <= GDB_F31; i++)
+		gdb_regs[i] = 0;
+
+	gdb_regs[GDB_Y] = regs->y;
+	gdb_regs[GDB_PSR] = regs->psr;
+	gdb_regs[GDB_WIM] = 0;
+	gdb_regs[GDB_TBR] = (unsigned long) &trapbase;
+	gdb_regs[GDB_PC] = regs->pc;
+	gdb_regs[GDB_NPC] = regs->npc;
+	gdb_regs[GDB_FSR] = 0;
+	gdb_regs[GDB_CSR] = 0;
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+	struct thread_info *t = task_thread_info(p);
+	struct reg_window32 *win;
+	int i;
+
+	for (i = GDB_G0; i < GDB_G6; i++)
+		gdb_regs[i] = 0;
+	gdb_regs[GDB_G6] = (unsigned long) t;
+	gdb_regs[GDB_G7] = 0;
+	for (i = GDB_O0; i < GDB_SP; i++)
+		gdb_regs[i] = 0;
+	gdb_regs[GDB_SP] = t->ksp;
+	gdb_regs[GDB_O7] = 0;
+
+	win = (struct reg_window32 *) t->ksp;
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_L0 + i] = win->locals[i];
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_I0 + i] = win->ins[i];
+
+	for (i = GDB_F0; i <= GDB_F31; i++)
+		gdb_regs[i] = 0;
+
+	gdb_regs[GDB_Y] = 0;
+
+	gdb_regs[GDB_PSR] = t->kpsr;
+	gdb_regs[GDB_WIM] = t->kwim;
+	gdb_regs[GDB_TBR] = (unsigned long) &trapbase;
+	gdb_regs[GDB_PC] = t->kpc;
+	gdb_regs[GDB_NPC] = t->kpc + 4;
+	gdb_regs[GDB_FSR] = 0;
+	gdb_regs[GDB_CSR] = 0;
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	struct reg_window32 *win;
+	int i;
+
+	for (i = 0; i < 15; i++)
+		regs->u_regs[UREG_G1 + i] = gdb_regs[GDB_G1 + i];
+
+	/* If the PSR register is changing, we have to preserve
+	 * the CWP field, otherwise window save/restore explodes.
+	 */
+	if (regs->psr != gdb_regs[GDB_PSR]) {
+		unsigned long cwp = regs->psr & PSR_CWP;
+
+		regs->psr = (gdb_regs[GDB_PSR] & ~PSR_CWP) | cwp;
+	}
+
+	regs->pc = gdb_regs[GDB_PC];
+	regs->npc = gdb_regs[GDB_NPC];
+	regs->y = gdb_regs[GDB_Y];
+
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
+	for (i = 0; i < 8; i++)
+		win->locals[i] = gdb_regs[GDB_L0 + i];
+	for (i = 0; i < 8; i++)
+		win->ins[i] = gdb_regs[GDB_I0 + i];
+}
+
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+			       char *remcomInBuffer, char *remcomOutBuffer,
+			       struct pt_regs *linux_regs)
+{
+	unsigned long addr;
+	char *ptr;
+
+	switch (remcomInBuffer[0]) {
+	case 'c':
+		/* try to read optional parameter, pc unchanged if no parm */
+		ptr = &remcomInBuffer[1];
+		if (kgdb_hex2long(&ptr, &addr)) {
+			linux_regs->pc = addr;
+			linux_regs->npc = addr + 4;
+		}
+		/* fall through */
+
+	case 'D':
+	case 'k':
+		if (linux_regs->pc == (unsigned long) arch_kgdb_breakpoint) {
+			linux_regs->pc = linux_regs->npc;
+			linux_regs->npc += 4;
+		}
+		return 0;
+	}
+	return -1;
+}
+
+asmlinkage void kgdb_trap(unsigned long trap_level, struct pt_regs *regs)
+{
+	unsigned long flags;
+
+	if (user_mode(regs)) {
+		do_hw_interrupt(regs, trap_level);
+		return;
+	}
+
+	flushw_all();
+
+	local_irq_save(flags);
+	kgdb_handle_exception(trap_level, SIGTRAP, 0, regs);
+	local_irq_restore(flags);
+}
+
+int kgdb_arch_init(void)
+{
+	return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->pc = ip;
+	regs->npc = regs->pc + 4;
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+	/* Breakpoint instruction: ta 0x7d */
+	.gdb_bpt_instr		= { 0x91, 0xd0, 0x20, 0x7d },
+};
diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c
new file mode 100644
index 0000000..a68bbdd
--- /dev/null
+++ b/arch/sparc/kernel/kgdb_64.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/* kgdb.c: KGDB support for 64-bit sparc.
+ *
+ * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kgdb.h>
+#include <linux/kdebug.h>
+#include <linux/ftrace.h>
+#include <linux/context_tracking.h>
+
+#include <asm/cacheflush.h>
+#include <asm/kdebug.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+#include "kernel.h"
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	struct reg_window *win;
+	int i;
+
+	gdb_regs[GDB_G0] = 0;
+	for (i = 0; i < 15; i++)
+		gdb_regs[GDB_G1 + i] = regs->u_regs[UREG_G1 + i];
+
+	win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS);
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_L0 + i] = win->locals[i];
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_I0 + i] = win->ins[i];
+
+	for (i = GDB_F0; i <= GDB_F62; i++)
+		gdb_regs[i] = 0;
+
+	gdb_regs[GDB_PC] = regs->tpc;
+	gdb_regs[GDB_NPC] = regs->tnpc;
+	gdb_regs[GDB_STATE] = regs->tstate;
+	gdb_regs[GDB_FSR] = 0;
+	gdb_regs[GDB_FPRS] = 0;
+	gdb_regs[GDB_Y] = regs->y;
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+	struct thread_info *t = task_thread_info(p);
+	extern unsigned int switch_to_pc;
+	extern unsigned int ret_from_fork;
+	struct reg_window *win;
+	unsigned long pc, cwp;
+	int i;
+
+	for (i = GDB_G0; i < GDB_G6; i++)
+		gdb_regs[i] = 0;
+	gdb_regs[GDB_G6] = (unsigned long) t;
+	gdb_regs[GDB_G7] = (unsigned long) p;
+	for (i = GDB_O0; i < GDB_SP; i++)
+		gdb_regs[i] = 0;
+	gdb_regs[GDB_SP] = t->ksp;
+	gdb_regs[GDB_O7] = 0;
+
+	win = (struct reg_window *) (t->ksp + STACK_BIAS);
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_L0 + i] = win->locals[i];
+	for (i = 0; i < 8; i++)
+		gdb_regs[GDB_I0 + i] = win->ins[i];
+
+	for (i = GDB_F0; i <= GDB_F62; i++)
+		gdb_regs[i] = 0;
+
+	if (t->new_child)
+		pc = (unsigned long) &ret_from_fork;
+	else
+		pc = (unsigned long) &switch_to_pc;
+
+	gdb_regs[GDB_PC] = pc;
+	gdb_regs[GDB_NPC] = pc + 4;
+
+	cwp = __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP];
+
+	gdb_regs[GDB_STATE] = (TSTATE_PRIV | TSTATE_IE | cwp);
+	gdb_regs[GDB_FSR] = 0;
+	gdb_regs[GDB_FPRS] = 0;
+	gdb_regs[GDB_Y] = 0;
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	struct reg_window *win;
+	int i;
+
+	for (i = 0; i < 15; i++)
+		regs->u_regs[UREG_G1 + i] = gdb_regs[GDB_G1 + i];
+
+	/* If the TSTATE register is changing, we have to preserve
+	 * the CWP field, otherwise window save/restore explodes.
+	 */
+	if (regs->tstate != gdb_regs[GDB_STATE]) {
+		unsigned long cwp = regs->tstate & TSTATE_CWP;
+
+		regs->tstate = (gdb_regs[GDB_STATE] & ~TSTATE_CWP) | cwp;
+	}
+
+	regs->tpc = gdb_regs[GDB_PC];
+	regs->tnpc = gdb_regs[GDB_NPC];
+	regs->y = gdb_regs[GDB_Y];
+
+	win = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS);
+	for (i = 0; i < 8; i++)
+		win->locals[i] = gdb_regs[GDB_L0 + i];
+	for (i = 0; i < 8; i++)
+		win->ins[i] = gdb_regs[GDB_I0 + i];
+}
+
+#ifdef CONFIG_SMP
+void __irq_entry smp_kgdb_capture_client(int irq, struct pt_regs *regs)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__("rdpr      %%pstate, %0\n\t"
+			     "wrpr      %0, %1, %%pstate"
+			     : "=r" (flags)
+			     : "i" (PSTATE_IE));
+
+	flushw_all();
+
+	if (atomic_read(&kgdb_active) != -1)
+		kgdb_nmicallback(raw_smp_processor_id(), regs);
+
+	__asm__ __volatile__("wrpr	%0, 0, %%pstate"
+			     : : "r" (flags));
+}
+#endif
+
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+			       char *remcomInBuffer, char *remcomOutBuffer,
+			       struct pt_regs *linux_regs)
+{
+	unsigned long addr;
+	char *ptr;
+
+	switch (remcomInBuffer[0]) {
+	case 'c':
+		/* try to read optional parameter, pc unchanged if no parm */
+		ptr = &remcomInBuffer[1];
+		if (kgdb_hex2long(&ptr, &addr)) {
+			linux_regs->tpc = addr;
+			linux_regs->tnpc = addr + 4;
+		}
+		/* fall through */
+
+	case 'D':
+	case 'k':
+		if (linux_regs->tpc == (unsigned long) arch_kgdb_breakpoint) {
+			linux_regs->tpc = linux_regs->tnpc;
+			linux_regs->tnpc += 4;
+		}
+		return 0;
+	}
+	return -1;
+}
+
+asmlinkage void kgdb_trap(unsigned long trap_level, struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+	unsigned long flags;
+
+	if (user_mode(regs)) {
+		bad_trap(regs, trap_level);
+		goto out;
+	}
+
+	flushw_all();
+
+	local_irq_save(flags);
+	kgdb_handle_exception(0x172, SIGTRAP, 0, regs);
+	local_irq_restore(flags);
+out:
+	exception_exit(prev_state);
+}
+
+int kgdb_arch_init(void)
+{
+	return 0;
+}
+
+void kgdb_arch_exit(void)
+{
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->tpc = ip;
+	regs->tnpc = regs->tpc + 4;
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+	/* Breakpoint instruction: ta 0x72 */
+	.gdb_bpt_instr		= { 0x91, 0xd0, 0x20, 0x72 },
+};
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
new file mode 100644
index 0000000..dfbca24
--- /dev/null
+++ b/arch/sparc/kernel/kprobes.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+/* arch/sparc64/kernel/kprobes.c
+ *
+ * Copyright (C) 2004 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/extable.h>
+#include <linux/kdebug.h>
+#include <linux/slab.h>
+#include <linux/context_tracking.h>
+#include <asm/signal.h>
+#include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+
+/* We do not have hardware single-stepping on sparc64.
+ * So we implement software single-stepping with breakpoint
+ * traps.  The top-level scheme is similar to that used
+ * in the x86 kprobes implementation.
+ *
+ * In the kprobe->ainsn.insn[] array we store the original
+ * instruction at index zero and a break instruction at
+ * index one.
+ *
+ * When we hit a kprobe we:
+ * - Run the pre-handler
+ * - Remember "regs->tnpc" and interrupt level stored in
+ *   "regs->tstate" so we can restore them later
+ * - Disable PIL interrupts
+ * - Set regs->tpc to point to kprobe->ainsn.insn[0]
+ * - Set regs->tnpc to point to kprobe->ainsn.insn[1]
+ * - Mark that we are actively in a kprobe
+ *
+ * At this point we wait for the second breakpoint at
+ * kprobe->ainsn.insn[1] to hit.  When it does we:
+ * - Run the post-handler
+ * - Set regs->tpc to "remembered" regs->tnpc stored above,
+ *   restore the PIL interrupt level in "regs->tstate" as well
+ * - Make any adjustments necessary to regs->tnpc in order
+ *   to handle relative branches correctly.  See below.
+ * - Mark that we are no longer actively in a kprobe.
+ */
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+	if ((unsigned long) p->addr & 0x3UL)
+		return -EILSEQ;
+
+	p->ainsn.insn[0] = *p->addr;
+	flushi(&p->ainsn.insn[0]);
+
+	p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2;
+	flushi(&p->ainsn.insn[1]);
+
+	p->opcode = *p->addr;
+	return 0;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flushi(p->addr);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+	*p->addr = p->opcode;
+	flushi(p->addr);
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	kcb->prev_kprobe.kp = kprobe_running();
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	kcb->prev_kprobe.orig_tnpc = kcb->kprobe_orig_tnpc;
+	kcb->prev_kprobe.orig_tstate_pil = kcb->kprobe_orig_tstate_pil;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->kprobe_orig_tnpc = kcb->prev_kprobe.orig_tnpc;
+	kcb->kprobe_orig_tstate_pil = kcb->prev_kprobe.orig_tstate_pil;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+				struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, p);
+	kcb->kprobe_orig_tnpc = regs->tnpc;
+	kcb->kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL);
+}
+
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs,
+			struct kprobe_ctlblk *kcb)
+{
+	regs->tstate |= TSTATE_PIL;
+
+	/*single step inline, if it a breakpoint instruction*/
+	if (p->opcode == BREAKPOINT_INSTRUCTION) {
+		regs->tpc = (unsigned long) p->addr;
+		regs->tnpc = kcb->kprobe_orig_tnpc;
+	} else {
+		regs->tpc = (unsigned long) &p->ainsn.insn[0];
+		regs->tnpc = (unsigned long) &p->ainsn.insn[1];
+	}
+}
+
+static int __kprobes kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *p;
+	void *addr = (void *) regs->tpc;
+	int ret = 0;
+	struct kprobe_ctlblk *kcb;
+
+	/*
+	 * We don't want to be preempted for the entire
+	 * duration of kprobe processing
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+
+	if (kprobe_running()) {
+		p = get_kprobe(addr);
+		if (p) {
+			if (kcb->kprobe_status == KPROBE_HIT_SS) {
+				regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+					kcb->kprobe_orig_tstate_pil);
+				goto no_kprobe;
+			}
+			/* We have reentered the kprobe_handler(), since
+			 * another probe was hit while within the handler.
+			 * We here save the original kprobes variables and
+			 * just single step on the instruction of the new probe
+			 * without calling any user handlers.
+			 */
+			save_previous_kprobe(kcb);
+			set_current_kprobe(p, regs, kcb);
+			kprobes_inc_nmissed_count(p);
+			kcb->kprobe_status = KPROBE_REENTER;
+			prepare_singlestep(p, regs, kcb);
+			return 1;
+		} else if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
+			/* The breakpoint instruction was removed by
+			 * another cpu right after we hit, no further
+			 * handling of this interrupt is appropriate
+			 */
+			ret = 1;
+		}
+		goto no_kprobe;
+	}
+
+	p = get_kprobe(addr);
+	if (!p) {
+		if (*(u32 *)addr != BREAKPOINT_INSTRUCTION) {
+			/*
+			 * The breakpoint instruction was removed right
+			 * after we hit it.  Another cpu has removed
+			 * either a probepoint or a debugger breakpoint
+			 * at this address.  In either case, no further
+			 * handling of this interrupt is appropriate.
+			 */
+			ret = 1;
+		}
+		/* Not one of ours: let kernel handle it */
+		goto no_kprobe;
+	}
+
+	set_current_kprobe(p, regs, kcb);
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+	if (p->pre_handler && p->pre_handler(p, regs)) {
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+
+	prepare_singlestep(p, regs, kcb);
+	kcb->kprobe_status = KPROBE_HIT_SS;
+	return 1;
+
+no_kprobe:
+	preempt_enable_no_resched();
+	return ret;
+}
+
+/* If INSN is a relative control transfer instruction,
+ * return the corrected branch destination value.
+ *
+ * regs->tpc and regs->tnpc still hold the values of the
+ * program counters at the time of trap due to the execution
+ * of the BREAKPOINT_INSTRUCTION_2 at p->ainsn.insn[1]
+ * 
+ */
+static unsigned long __kprobes relbranch_fixup(u32 insn, struct kprobe *p,
+					       struct pt_regs *regs)
+{
+	unsigned long real_pc = (unsigned long) p->addr;
+
+	/* Branch not taken, no mods necessary.  */
+	if (regs->tnpc == regs->tpc + 0x4UL)
+		return real_pc + 0x8UL;
+
+	/* The three cases are call, branch w/prediction,
+	 * and traditional branch.
+	 */
+	if ((insn & 0xc0000000) == 0x40000000 ||
+	    (insn & 0xc1c00000) == 0x00400000 ||
+	    (insn & 0xc1c00000) == 0x00800000) {
+		unsigned long ainsn_addr;
+
+		ainsn_addr = (unsigned long) &p->ainsn.insn[0];
+
+		/* The instruction did all the work for us
+		 * already, just apply the offset to the correct
+		 * instruction location.
+		 */
+		return (real_pc + (regs->tnpc - ainsn_addr));
+	}
+
+	/* It is jmpl or some other absolute PC modification instruction,
+	 * leave NPC as-is.
+	 */
+	return regs->tnpc;
+}
+
+/* If INSN is an instruction which writes it's PC location
+ * into a destination register, fix that up.
+ */
+static void __kprobes retpc_fixup(struct pt_regs *regs, u32 insn,
+				  unsigned long real_pc)
+{
+	unsigned long *slot = NULL;
+
+	/* Simplest case is 'call', which always uses %o7 */
+	if ((insn & 0xc0000000) == 0x40000000) {
+		slot = &regs->u_regs[UREG_I7];
+	}
+
+	/* 'jmpl' encodes the register inside of the opcode */
+	if ((insn & 0xc1f80000) == 0x81c00000) {
+		unsigned long rd = ((insn >> 25) & 0x1f);
+
+		if (rd <= 15) {
+			slot = &regs->u_regs[rd];
+		} else {
+			/* Hard case, it goes onto the stack. */
+			flushw_all();
+
+			rd -= 16;
+			slot = (unsigned long *)
+				(regs->u_regs[UREG_FP] + STACK_BIAS);
+			slot += rd;
+		}
+	}
+	if (slot != NULL)
+		*slot = real_pc;
+}
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction which has been replaced by the breakpoint
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is &p->ainsn.insn[0].
+ *
+ * This function prepares to return from the post-single-step
+ * breakpoint trap.
+ */
+static void __kprobes resume_execution(struct kprobe *p,
+		struct pt_regs *regs, struct kprobe_ctlblk *kcb)
+{
+	u32 insn = p->ainsn.insn[0];
+
+	regs->tnpc = relbranch_fixup(insn, p, regs);
+
+	/* This assignment must occur after relbranch_fixup() */
+	regs->tpc = kcb->kprobe_orig_tnpc;
+
+	retpc_fixup(regs, insn, (unsigned long) p->addr);
+
+	regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+			kcb->kprobe_orig_tstate_pil);
+}
+
+static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+	if (!cur)
+		return 0;
+
+	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		cur->post_handler(cur, regs, 0);
+	}
+
+	resume_execution(cur, regs, kcb);
+
+	/*Restore back the original saved kprobes variables and continue. */
+	if (kcb->kprobe_status == KPROBE_REENTER) {
+		restore_previous_kprobe(kcb);
+		goto out;
+	}
+	reset_current_kprobe();
+out:
+	preempt_enable_no_resched();
+
+	return 1;
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe *cur = kprobe_running();
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	const struct exception_table_entry *entry;
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the tpc points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		regs->tpc = (unsigned long)cur->addr;
+		regs->tnpc = kcb->kprobe_orig_tnpc;
+		regs->tstate = ((regs->tstate & ~TSTATE_PIL) |
+				kcb->kprobe_orig_tstate_pil);
+		if (kcb->kprobe_status == KPROBE_REENTER)
+			restore_previous_kprobe(kcb);
+		else
+			reset_current_kprobe();
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * We increment the nmissed count for accounting,
+		 * we can also use npre/npostfault count for accounting
+		 * these specific fault cases.
+		 */
+		kprobes_inc_nmissed_count(cur);
+
+		/*
+		 * We come here because instructions in the pre/post
+		 * handler caused the page_fault, this could happen
+		 * if handler tries to access user space by
+		 * copy_from_user(), get_user() etc. Let the
+		 * user-specified handler try to fix it first.
+		 */
+		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+			return 1;
+
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return 1;
+		}
+
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+				       unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	if (args->regs && user_mode(args->regs))
+		return ret;
+
+	switch (val) {
+	case DIE_DEBUG:
+		if (kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_DEBUG_2:
+		if (post_kprobe_handler(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+asmlinkage void __kprobes kprobe_trap(unsigned long trap_level,
+				      struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	BUG_ON(trap_level != 0x170 && trap_level != 0x171);
+
+	if (user_mode(regs)) {
+		local_irq_enable();
+		bad_trap(regs, trap_level);
+		goto out;
+	}
+
+	/* trap_level == 0x170 --> ta 0x70
+	 * trap_level == 0x171 --> ta 0x71
+	 */
+	if (notify_die((trap_level == 0x170) ? DIE_DEBUG : DIE_DEBUG_2,
+		       (trap_level == 0x170) ? "debug" : "debug_2",
+		       regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP)
+		bad_trap(regs, trap_level);
+out:
+	exception_exit(prev_state);
+}
+
+/* The value stored in the return address register is actually 2
+ * instructions before where the callee will return to.
+ * Sequences usually look something like this
+ *
+ *		call	some_function	<--- return register points here
+ *		 nop			<--- call delay slot
+ *		whatever		<--- where callee returns to
+ *
+ * To keep trampoline_probe_handler logic simpler, we normalize the
+ * value kept in ri->ret_addr so we don't need to keep adjusting it
+ * back and forth.
+ */
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				      struct pt_regs *regs)
+{
+	ri->ret_addr = (kprobe_opcode_t *)(regs->u_regs[UREG_RETPC] + 8);
+
+	/* Replace the return addr with trampoline addr */
+	regs->u_regs[UREG_RETPC] =
+		((unsigned long)kretprobe_trampoline) - 8;
+}
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+					      struct pt_regs *regs)
+{
+	struct kretprobe_instance *ri = NULL;
+	struct hlist_head *head, empty_rp;
+	struct hlist_node *tmp;
+	unsigned long flags, orig_ret_address = 0;
+	unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
+
+	INIT_HLIST_HEAD(&empty_rp);
+	kretprobe_hash_lock(current, &head, &flags);
+
+	/*
+	 * It is possible to have multiple instances associated with a given
+	 * task either because an multiple functions in the call path
+	 * have a return probe installed on them, and/or more than one return
+	 * return probe was registered for a target function.
+	 *
+	 * We can handle this because:
+	 *     - instances are always inserted at the head of the list
+	 *     - when multiple return probes are registered for the same
+	 *       function, the first instance's ret_addr will point to the
+	 *       real return address, and all the rest will point to
+	 *       kretprobe_trampoline
+	 */
+	hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+		if (ri->task != current)
+			/* another task is sharing our hash bucket */
+			continue;
+
+		if (ri->rp && ri->rp->handler)
+			ri->rp->handler(ri, regs);
+
+		orig_ret_address = (unsigned long)ri->ret_addr;
+		recycle_rp_inst(ri, &empty_rp);
+
+		if (orig_ret_address != trampoline_address)
+			/*
+			 * This is the real return address. Any other
+			 * instances associated with this task are for
+			 * other calls deeper on the call stack
+			 */
+			break;
+	}
+
+	kretprobe_assert(ri, orig_ret_address, trampoline_address);
+	regs->tpc = orig_ret_address;
+	regs->tnpc = orig_ret_address + 4;
+
+	kretprobe_hash_unlock(current, &flags);
+
+	hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+		hlist_del(&ri->hlist);
+		kfree(ri);
+	}
+	/*
+	 * By returning a non-zero value, we are telling
+	 * kprobe_handler() that we don't want the post_handler
+	 * to run (and have re-enabled preemption)
+	 */
+	return 1;
+}
+
+static void __used kretprobe_trampoline_holder(void)
+{
+	asm volatile(".global kretprobe_trampoline\n"
+		     "kretprobe_trampoline:\n"
+		     "\tnop\n"
+		     "\tnop\n");
+}
+static struct kprobe trampoline_p = {
+	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.pre_handler = trampoline_probe_handler
+};
+
+int __init arch_init_kprobes(void)
+{
+	return register_kprobe(&trampoline_p);
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+	if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+		return 1;
+
+	return 0;
+}
diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h
new file mode 100644
index 0000000..b3c5e8f
--- /dev/null
+++ b/arch/sparc/kernel/kstack.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KSTACK_H
+#define _KSTACK_H
+
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+/* SP must be STACK_BIAS adjusted already.  */
+static inline bool kstack_valid(struct thread_info *tp, unsigned long sp)
+{
+	unsigned long base = (unsigned long) tp;
+
+	/* Stack pointer must be 16-byte aligned.  */
+	if (sp & (16UL - 1))
+		return false;
+
+	if (sp >= (base + sizeof(struct thread_info)) &&
+	    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+		return true;
+
+	if (hardirq_stack[tp->cpu]) {
+		base = (unsigned long) hardirq_stack[tp->cpu];
+		if (sp >= base &&
+		    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+			return true;
+		base = (unsigned long) softirq_stack[tp->cpu];
+		if (sp >= base &&
+		    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+			return true;
+	}
+	return false;
+}
+
+/* Does "regs" point to a valid pt_regs trap frame?  */
+static inline bool kstack_is_trap_frame(struct thread_info *tp, struct pt_regs *regs)
+{
+	unsigned long base = (unsigned long) tp;
+	unsigned long addr = (unsigned long) regs;
+
+	if (addr >= base &&
+	    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+		goto check_magic;
+
+	if (hardirq_stack[tp->cpu]) {
+		base = (unsigned long) hardirq_stack[tp->cpu];
+		if (addr >= base &&
+		    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+			goto check_magic;
+		base = (unsigned long) softirq_stack[tp->cpu];
+		if (addr >= base &&
+		    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+			goto check_magic;
+	}
+	return false;
+
+check_magic:
+	if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC)
+		return true;
+	return false;
+
+}
+
+static inline __attribute__((always_inline)) void *set_hardirq_stack(void)
+{
+	void *orig_sp, *sp = hardirq_stack[smp_processor_id()];
+
+	__asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp));
+	if (orig_sp < sp ||
+	    orig_sp > (sp + THREAD_SIZE)) {
+		sp += THREAD_SIZE - 192 - STACK_BIAS;
+		__asm__ __volatile__("mov %0, %%sp" : : "r" (sp));
+	}
+
+	return orig_sp;
+}
+
+static inline __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp)
+{
+	__asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp));
+}
+
+#endif /* _KSTACK_H */
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
new file mode 100644
index 0000000..1cf91c0
--- /dev/null
+++ b/arch/sparc/kernel/ktlb.S
@@ -0,0 +1,270 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* arch/sparc64/kernel/ktlb.S: Kernel mapping TLB miss handling.
+ *
+ * Copyright (C) 1995, 1997, 2005, 2008 David S. Miller <davem@davemloft.net>
+ * Copyright (C) 1996 Eddie C. Dost        (ecd@brainaid.de)
+ * Copyright (C) 1996 Miguel de Icaza      (miguel@nuclecu.unam.mx)
+ * Copyright (C) 1996,98,99 Jakub Jelinek  (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/tsb.h>
+
+	.text
+	.align		32
+
+kvmap_itlb:
+	/* g6: TAG TARGET */
+	mov		TLB_TAG_ACCESS, %g4
+	ldxa		[%g4] ASI_IMMU, %g4
+
+	/* The kernel executes in context zero, therefore we do not
+	 * need to clear the context ID bits out of %g4 here.
+	 */
+
+	/* sun4v_itlb_miss branches here with the missing virtual
+	 * address already loaded into %g4
+	 */
+kvmap_itlb_4v:
+
+	/* Catch kernel NULL pointer calls.  */
+	sethi		%hi(PAGE_SIZE), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_itlb_longpath
+	 nop
+
+	KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_itlb_load)
+
+kvmap_itlb_tsb_miss:
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_itlb_vmalloc_addr
+	 mov		0x1, %g5
+	sllx		%g5, 32, %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_itlb_obp
+	 nop
+
+kvmap_itlb_vmalloc_addr:
+	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
+
+	TSB_LOCK_TAG(%g1, %g2, %g7)
+	TSB_WRITE(%g1, %g5, %g6)
+
+	/* fallthrough to TLB load */
+
+kvmap_itlb_load:
+
+661:	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
+	retry
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	nop
+	nop
+	.previous
+
+	/* For sun4v the ASI_ITLB_DATA_IN store and the retry
+	 * instruction get nop'd out and we get here to branch
+	 * to the sun4v tlb load code.  The registers are setup
+	 * as follows:
+	 *
+	 * %g4: vaddr
+	 * %g5: PTE
+	 * %g6:	TAG
+	 *
+	 * The sun4v TLB load wants the PTE in %g3 so we fix that
+	 * up here.
+	 */
+	ba,pt		%xcc, sun4v_itlb_load
+	 mov		%g5, %g3
+
+kvmap_itlb_longpath:
+
+661:	rdpr	%pstate, %g5
+	wrpr	%g5, PSTATE_AG | PSTATE_MG, %pstate
+	.section .sun4v_2insn_patch, "ax"
+	.word	661b
+	SET_GL(1)
+	nop
+	.previous
+
+	rdpr	%tpc, %g5
+	ba,pt	%xcc, sparc64_realfault_common
+	 mov	FAULT_CODE_ITLB, %g4
+
+kvmap_itlb_obp:
+	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
+
+	TSB_LOCK_TAG(%g1, %g2, %g7)
+
+	TSB_WRITE(%g1, %g5, %g6)
+
+	ba,pt		%xcc, kvmap_itlb_load
+	 nop
+
+kvmap_dtlb_obp:
+	OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
+
+	TSB_LOCK_TAG(%g1, %g2, %g7)
+
+	TSB_WRITE(%g1, %g5, %g6)
+
+	ba,pt		%xcc, kvmap_dtlb_load
+	 nop
+
+kvmap_linear_early:
+	sethi		%hi(kern_linear_pte_xor), %g7
+	ldx		[%g7 + %lo(kern_linear_pte_xor)], %g2
+	ba,pt		%xcc, kvmap_dtlb_tsb4m_load
+	 xor		%g2, %g4, %g5
+
+	.align		32
+kvmap_dtlb_tsb4m_load:
+	TSB_LOCK_TAG(%g1, %g2, %g7)
+	TSB_WRITE(%g1, %g5, %g6)
+	ba,pt		%xcc, kvmap_dtlb_load
+	 nop
+
+kvmap_dtlb:
+	/* %g6: TAG TARGET */
+	mov		TLB_TAG_ACCESS, %g4
+	ldxa		[%g4] ASI_DMMU, %g4
+
+	/* The kernel executes in context zero, therefore we do not
+	 * need to clear the context ID bits out of %g4 here.
+	 */
+
+	/* sun4v_dtlb_miss branches here with the missing virtual
+	 * address already loaded into %g4
+	 */
+kvmap_dtlb_4v:
+	brgez,pn	%g4, kvmap_dtlb_nonlinear
+	 nop
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	/* Index through the base page size TSB even for linear
+	 * mappings when using page allocation debugging.
+	 */
+	KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+#else
+	/* Correct TAG_TARGET is already in %g6, check 4mb TSB.  */
+	KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+#endif
+	/* Linear mapping TSB lookup failed.  Fallthrough to kernel
+	 * page table based lookup.
+	 */
+	.globl		kvmap_linear_patch
+kvmap_linear_patch:
+	ba,a,pt		%xcc, kvmap_linear_early
+
+kvmap_dtlb_vmalloc_addr:
+	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+
+	TSB_LOCK_TAG(%g1, %g2, %g7)
+	TSB_WRITE(%g1, %g5, %g6)
+
+	/* fallthrough to TLB load */
+
+kvmap_dtlb_load:
+
+661:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
+	retry
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	nop
+	nop
+	.previous
+
+	/* For sun4v the ASI_DTLB_DATA_IN store and the retry
+	 * instruction get nop'd out and we get here to branch
+	 * to the sun4v tlb load code.  The registers are setup
+	 * as follows:
+	 *
+	 * %g4: vaddr
+	 * %g5: PTE
+	 * %g6:	TAG
+	 *
+	 * The sun4v TLB load wants the PTE in %g3 so we fix that
+	 * up here.
+	 */
+	ba,pt		%xcc, sun4v_dtlb_load
+	 mov		%g5, %g3
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+kvmap_vmemmap:
+	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+	ba,a,pt		%xcc, kvmap_dtlb_load
+#endif
+
+kvmap_dtlb_nonlinear:
+	/* Catch kernel NULL pointer derefs.  */
+	sethi		%hi(PAGE_SIZE), %g5
+	cmp		%g4, %g5
+	bleu,pn		%xcc, kvmap_dtlb_longpath
+	 nop
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	/* Do not use the TSB for vmemmap.  */
+	sethi		%hi(VMEMMAP_BASE), %g5
+	ldx		[%g5 + %lo(VMEMMAP_BASE)], %g5
+	cmp		%g4,%g5
+	bgeu,pn		%xcc, kvmap_vmemmap
+	 nop
+#endif
+
+	KERN_TSB_LOOKUP_TL1(%g4, %g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
+
+kvmap_dtlb_tsbmiss:
+	sethi		%hi(MODULES_VADDR), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_dtlb_longpath
+	 sethi		%hi(VMALLOC_END), %g5
+	ldx		[%g5 + %lo(VMALLOC_END)], %g5
+	cmp		%g4, %g5
+	bgeu,pn		%xcc, kvmap_dtlb_longpath
+	 nop
+
+kvmap_check_obp:
+	sethi		%hi(LOW_OBP_ADDRESS), %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_dtlb_vmalloc_addr
+	 mov		0x1, %g5
+	sllx		%g5, 32, %g5
+	cmp		%g4, %g5
+	blu,pn		%xcc, kvmap_dtlb_obp
+	 nop
+	ba,pt		%xcc, kvmap_dtlb_vmalloc_addr
+	 nop
+
+kvmap_dtlb_longpath:
+
+661:	rdpr	%pstate, %g5
+	wrpr	%g5, PSTATE_AG | PSTATE_MG, %pstate
+	.section .sun4v_2insn_patch, "ax"
+	.word	661b
+	SET_GL(1)
+	ldxa		[%g0] ASI_SCRATCHPAD, %g5
+	.previous
+
+	rdpr	%tl, %g3
+	cmp	%g3, 1
+
+661:	mov	TLB_TAG_ACCESS, %g4
+	ldxa	[%g4] ASI_DMMU, %g5
+	.section .sun4v_2insn_patch, "ax"
+	.word	661b
+	ldx	[%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
+	nop
+	.previous
+
+	/* The kernel executes in context zero, therefore we do not
+	 * need to clear the context ID bits out of %g5 here.
+	 */
+
+	be,pt	%xcc, sparc64_realfault_common
+	 mov	FAULT_CODE_DTLB, %g4
+	ba,pt	%xcc, winfix_trampoline
+	 nop
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
new file mode 100644
index 0000000..c0fa3ef
--- /dev/null
+++ b/arch/sparc/kernel/ldc.c
@@ -0,0 +1,2428 @@
+// SPDX-License-Identifier: GPL-2.0
+/* ldc.c: Logical Domain Channel link-layer protocol driver.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/scatterlist.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/bitmap.h>
+#include <asm/iommu-common.h>
+
+#include <asm/hypervisor.h>
+#include <asm/iommu.h>
+#include <asm/page.h>
+#include <asm/ldc.h>
+#include <asm/mdesc.h>
+
+#define DRV_MODULE_NAME		"ldc"
+#define PFX DRV_MODULE_NAME	": "
+#define DRV_MODULE_VERSION	"1.1"
+#define DRV_MODULE_RELDATE	"July 22, 2008"
+
+#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
+#define COOKIE_PGSZ_CODE_SHIFT	60ULL
+
+
+static char version[] =
+	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+
+/* Packet header layout for unreliable and reliable mode frames.
+ * When in RAW mode, packets are simply straight 64-byte payloads
+ * with no headers.
+ */
+struct ldc_packet {
+	u8			type;
+#define LDC_CTRL		0x01
+#define LDC_DATA		0x02
+#define LDC_ERR			0x10
+
+	u8			stype;
+#define LDC_INFO		0x01
+#define LDC_ACK			0x02
+#define LDC_NACK		0x04
+
+	u8			ctrl;
+#define LDC_VERS		0x01 /* Link Version		*/
+#define LDC_RTS			0x02 /* Request To Send		*/
+#define LDC_RTR			0x03 /* Ready To Receive	*/
+#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
+#define LDC_CTRL_MSK		0x0f
+
+	u8			env;
+#define LDC_LEN			0x3f
+#define LDC_FRAG_MASK		0xc0
+#define LDC_START		0x40
+#define LDC_STOP		0x80
+
+	u32			seqid;
+
+	union {
+		u8		u_data[LDC_PACKET_SIZE - 8];
+		struct {
+			u32	pad;
+			u32	ackid;
+			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
+		} r;
+	} u;
+};
+
+struct ldc_version {
+	u16 major;
+	u16 minor;
+};
+
+/* Ordered from largest major to lowest.  */
+static struct ldc_version ver_arr[] = {
+	{ .major = 1, .minor = 0 },
+};
+
+#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
+#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
+
+struct ldc_channel;
+
+struct ldc_mode_ops {
+	int (*write)(struct ldc_channel *, const void *, unsigned int);
+	int (*read)(struct ldc_channel *, void *, unsigned int);
+};
+
+static const struct ldc_mode_ops raw_ops;
+static const struct ldc_mode_ops nonraw_ops;
+static const struct ldc_mode_ops stream_ops;
+
+int ldom_domaining_enabled;
+
+struct ldc_iommu {
+	/* Protects ldc_unmap.  */
+	spinlock_t			lock;
+	struct ldc_mtable_entry		*page_table;
+	struct iommu_map_table		iommu_map_table;
+};
+
+struct ldc_channel {
+	/* Protects all operations that depend upon channel state.  */
+	spinlock_t			lock;
+
+	unsigned long			id;
+
+	u8				*mssbuf;
+	u32				mssbuf_len;
+	u32				mssbuf_off;
+
+	struct ldc_packet		*tx_base;
+	unsigned long			tx_head;
+	unsigned long			tx_tail;
+	unsigned long			tx_num_entries;
+	unsigned long			tx_ra;
+
+	unsigned long			tx_acked;
+
+	struct ldc_packet		*rx_base;
+	unsigned long			rx_head;
+	unsigned long			rx_tail;
+	unsigned long			rx_num_entries;
+	unsigned long			rx_ra;
+
+	u32				rcv_nxt;
+	u32				snd_nxt;
+
+	unsigned long			chan_state;
+
+	struct ldc_channel_config	cfg;
+	void				*event_arg;
+
+	const struct ldc_mode_ops	*mops;
+
+	struct ldc_iommu		iommu;
+
+	struct ldc_version		ver;
+
+	u8				hs_state;
+#define LDC_HS_CLOSED			0x00
+#define LDC_HS_OPEN			0x01
+#define LDC_HS_GOTVERS			0x02
+#define LDC_HS_SENTRTR			0x03
+#define LDC_HS_GOTRTR			0x04
+#define LDC_HS_COMPLETE			0x10
+
+	u8				flags;
+#define LDC_FLAG_ALLOCED_QUEUES		0x01
+#define LDC_FLAG_REGISTERED_QUEUES	0x02
+#define LDC_FLAG_REGISTERED_IRQS	0x04
+#define LDC_FLAG_RESET			0x10
+
+	u8				mss;
+	u8				state;
+
+#define LDC_IRQ_NAME_MAX		32
+	char				rx_irq_name[LDC_IRQ_NAME_MAX];
+	char				tx_irq_name[LDC_IRQ_NAME_MAX];
+
+	struct hlist_head		mh_list;
+
+	struct hlist_node		list;
+};
+
+#define ldcdbg(TYPE, f, a...) \
+do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
+		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
+} while (0)
+
+#define	LDC_ABORT(lp)	ldc_abort((lp), __func__)
+
+static const char *state_to_str(u8 state)
+{
+	switch (state) {
+	case LDC_STATE_INVALID:
+		return "INVALID";
+	case LDC_STATE_INIT:
+		return "INIT";
+	case LDC_STATE_BOUND:
+		return "BOUND";
+	case LDC_STATE_READY:
+		return "READY";
+	case LDC_STATE_CONNECTED:
+		return "CONNECTED";
+	default:
+		return "<UNKNOWN>";
+	}
+}
+
+static unsigned long __advance(unsigned long off, unsigned long num_entries)
+{
+	off += LDC_PACKET_SIZE;
+	if (off == (num_entries * LDC_PACKET_SIZE))
+		off = 0;
+
+	return off;
+}
+
+static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
+{
+	return __advance(off, lp->rx_num_entries);
+}
+
+static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
+{
+	return __advance(off, lp->tx_num_entries);
+}
+
+static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
+						  unsigned long *new_tail)
+{
+	struct ldc_packet *p;
+	unsigned long t;
+
+	t = tx_advance(lp, lp->tx_tail);
+	if (t == lp->tx_head)
+		return NULL;
+
+	*new_tail = t;
+
+	p = lp->tx_base;
+	return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+/* When we are in reliable or stream mode, have to track the next packet
+ * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
+ * to be careful not to stomp over the queue past that point.  During
+ * the handshake, we don't have TX data packets pending in the queue
+ * and that's why handshake_get_tx_packet() need not be mindful of
+ * lp->tx_acked.
+ */
+static unsigned long head_for_data(struct ldc_channel *lp)
+{
+	if (lp->cfg.mode == LDC_MODE_STREAM)
+		return lp->tx_acked;
+	return lp->tx_head;
+}
+
+static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
+{
+	unsigned long limit, tail, new_tail, diff;
+	unsigned int mss;
+
+	limit = head_for_data(lp);
+	tail = lp->tx_tail;
+	new_tail = tx_advance(lp, tail);
+	if (new_tail == limit)
+		return 0;
+
+	if (limit > new_tail)
+		diff = limit - new_tail;
+	else
+		diff = (limit +
+			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
+	diff /= LDC_PACKET_SIZE;
+	mss = lp->mss;
+
+	if (diff * mss < size)
+		return 0;
+
+	return 1;
+}
+
+static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
+					     unsigned long *new_tail)
+{
+	struct ldc_packet *p;
+	unsigned long h, t;
+
+	h = head_for_data(lp);
+	t = tx_advance(lp, lp->tx_tail);
+	if (t == h)
+		return NULL;
+
+	*new_tail = t;
+
+	p = lp->tx_base;
+	return p + (lp->tx_tail / LDC_PACKET_SIZE);
+}
+
+static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
+{
+	unsigned long orig_tail = lp->tx_tail;
+	int limit = 1000;
+
+	lp->tx_tail = tail;
+	while (limit-- > 0) {
+		unsigned long err;
+
+		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
+		if (!err)
+			return 0;
+
+		if (err != HV_EWOULDBLOCK) {
+			lp->tx_tail = orig_tail;
+			return -EINVAL;
+		}
+		udelay(1);
+	}
+
+	lp->tx_tail = orig_tail;
+	return -EBUSY;
+}
+
+/* This just updates the head value in the hypervisor using
+ * a polling loop with a timeout.  The caller takes care of
+ * upating software state representing the head change, if any.
+ */
+static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
+{
+	int limit = 1000;
+
+	while (limit-- > 0) {
+		unsigned long err;
+
+		err = sun4v_ldc_rx_set_qhead(lp->id, head);
+		if (!err)
+			return 0;
+
+		if (err != HV_EWOULDBLOCK)
+			return -EINVAL;
+
+		udelay(1);
+	}
+
+	return -EBUSY;
+}
+
+static int send_tx_packet(struct ldc_channel *lp,
+			  struct ldc_packet *p,
+			  unsigned long new_tail)
+{
+	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
+
+	return set_tx_tail(lp, new_tail);
+}
+
+static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
+						 u8 stype, u8 ctrl,
+						 void *data, int dlen,
+						 unsigned long *new_tail)
+{
+	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
+
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		p->type = LDC_CTRL;
+		p->stype = stype;
+		p->ctrl = ctrl;
+		if (data)
+			memcpy(p->u.u_data, data, dlen);
+	}
+	return p;
+}
+
+static int start_handshake(struct ldc_channel *lp)
+{
+	struct ldc_packet *p;
+	struct ldc_version *ver;
+	unsigned long new_tail;
+
+	ver = &ver_arr[0];
+
+	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
+	       ver->major, ver->minor);
+
+	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+				   ver, sizeof(*ver), &new_tail);
+	if (p) {
+		int err = send_tx_packet(lp, p, new_tail);
+		if (!err)
+			lp->flags &= ~LDC_FLAG_RESET;
+		return err;
+	}
+	return -EBUSY;
+}
+
+static int send_version_nack(struct ldc_channel *lp,
+			     u16 major, u16 minor)
+{
+	struct ldc_packet *p;
+	struct ldc_version ver;
+	unsigned long new_tail;
+
+	ver.major = major;
+	ver.minor = minor;
+
+	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
+				   &ver, sizeof(ver), &new_tail);
+	if (p) {
+		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
+		       ver.major, ver.minor);
+
+		return send_tx_packet(lp, p, new_tail);
+	}
+	return -EBUSY;
+}
+
+static int send_version_ack(struct ldc_channel *lp,
+			    struct ldc_version *vp)
+{
+	struct ldc_packet *p;
+	unsigned long new_tail;
+
+	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
+				   vp, sizeof(*vp), &new_tail);
+	if (p) {
+		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
+		       vp->major, vp->minor);
+
+		return send_tx_packet(lp, p, new_tail);
+	}
+	return -EBUSY;
+}
+
+static int send_rts(struct ldc_channel *lp)
+{
+	struct ldc_packet *p;
+	unsigned long new_tail;
+
+	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
+				   &new_tail);
+	if (p) {
+		p->env = lp->cfg.mode;
+		p->seqid = 0;
+		lp->rcv_nxt = 0;
+
+		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
+		       p->env, p->seqid);
+
+		return send_tx_packet(lp, p, new_tail);
+	}
+	return -EBUSY;
+}
+
+static int send_rtr(struct ldc_channel *lp)
+{
+	struct ldc_packet *p;
+	unsigned long new_tail;
+
+	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
+				   &new_tail);
+	if (p) {
+		p->env = lp->cfg.mode;
+		p->seqid = 0;
+
+		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
+		       p->env, p->seqid);
+
+		return send_tx_packet(lp, p, new_tail);
+	}
+	return -EBUSY;
+}
+
+static int send_rdx(struct ldc_channel *lp)
+{
+	struct ldc_packet *p;
+	unsigned long new_tail;
+
+	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
+				   &new_tail);
+	if (p) {
+		p->env = 0;
+		p->seqid = ++lp->snd_nxt;
+		p->u.r.ackid = lp->rcv_nxt;
+
+		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
+		       p->env, p->seqid, p->u.r.ackid);
+
+		return send_tx_packet(lp, p, new_tail);
+	}
+	return -EBUSY;
+}
+
+static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
+{
+	struct ldc_packet *p;
+	unsigned long new_tail;
+	int err;
+
+	p = data_get_tx_packet(lp, &new_tail);
+	if (!p)
+		return -EBUSY;
+	memset(p, 0, sizeof(*p));
+	p->type = data_pkt->type;
+	p->stype = LDC_NACK;
+	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
+	p->seqid = lp->snd_nxt + 1;
+	p->u.r.ackid = lp->rcv_nxt;
+
+	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
+	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
+
+	err = send_tx_packet(lp, p, new_tail);
+	if (!err)
+		lp->snd_nxt++;
+
+	return err;
+}
+
+static int ldc_abort(struct ldc_channel *lp, const char *msg)
+{
+	unsigned long hv_err;
+
+	ldcdbg(STATE, "ABORT[%s]\n", msg);
+	ldc_print(lp);
+
+	/* We report but do not act upon the hypervisor errors because
+	 * there really isn't much we can do if they fail at this point.
+	 */
+	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+	if (hv_err)
+		printk(KERN_ERR PFX "ldc_abort: "
+		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
+
+	hv_err = sun4v_ldc_tx_get_state(lp->id,
+					&lp->tx_head,
+					&lp->tx_tail,
+					&lp->chan_state);
+	if (hv_err)
+		printk(KERN_ERR PFX "ldc_abort: "
+		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
+		       lp->id, hv_err);
+
+	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+	if (hv_err)
+		printk(KERN_ERR PFX "ldc_abort: "
+		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
+		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
+
+	/* Refetch the RX queue state as well, because we could be invoked
+	 * here in the queue processing context.
+	 */
+	hv_err = sun4v_ldc_rx_get_state(lp->id,
+					&lp->rx_head,
+					&lp->rx_tail,
+					&lp->chan_state);
+	if (hv_err)
+		printk(KERN_ERR PFX "ldc_abort: "
+		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
+		       lp->id, hv_err);
+
+	return -ECONNRESET;
+}
+
+static struct ldc_version *find_by_major(u16 major)
+{
+	struct ldc_version *ret = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
+		struct ldc_version *v = &ver_arr[i];
+		if (v->major <= major) {
+			ret = v;
+			break;
+		}
+	}
+	return ret;
+}
+
+static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
+{
+	struct ldc_version *vap;
+	int err;
+
+	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
+	       vp->major, vp->minor);
+
+	if (lp->hs_state == LDC_HS_GOTVERS) {
+		lp->hs_state = LDC_HS_OPEN;
+		memset(&lp->ver, 0, sizeof(lp->ver));
+	}
+
+	vap = find_by_major(vp->major);
+	if (!vap) {
+		err = send_version_nack(lp, 0, 0);
+	} else if (vap->major != vp->major) {
+		err = send_version_nack(lp, vap->major, vap->minor);
+	} else {
+		struct ldc_version ver = *vp;
+		if (ver.minor > vap->minor)
+			ver.minor = vap->minor;
+		err = send_version_ack(lp, &ver);
+		if (!err) {
+			lp->ver = ver;
+			lp->hs_state = LDC_HS_GOTVERS;
+		}
+	}
+	if (err)
+		return LDC_ABORT(lp);
+
+	return 0;
+}
+
+static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
+	       vp->major, vp->minor);
+
+	if (lp->hs_state == LDC_HS_GOTVERS) {
+		if (lp->ver.major != vp->major ||
+		    lp->ver.minor != vp->minor)
+			return LDC_ABORT(lp);
+	} else {
+		lp->ver = *vp;
+		lp->hs_state = LDC_HS_GOTVERS;
+	}
+	if (send_rts(lp))
+		return LDC_ABORT(lp);
+	return 0;
+}
+
+static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
+{
+	struct ldc_version *vap;
+	struct ldc_packet *p;
+	unsigned long new_tail;
+
+	if (vp->major == 0 && vp->minor == 0)
+		return LDC_ABORT(lp);
+
+	vap = find_by_major(vp->major);
+	if (!vap)
+		return LDC_ABORT(lp);
+
+	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
+					   vap, sizeof(*vap),
+					   &new_tail);
+	if (!p)
+		return LDC_ABORT(lp);
+
+	return send_tx_packet(lp, p, new_tail);
+}
+
+static int process_version(struct ldc_channel *lp,
+			   struct ldc_packet *p)
+{
+	struct ldc_version *vp;
+
+	vp = (struct ldc_version *) p->u.u_data;
+
+	switch (p->stype) {
+	case LDC_INFO:
+		return process_ver_info(lp, vp);
+
+	case LDC_ACK:
+		return process_ver_ack(lp, vp);
+
+	case LDC_NACK:
+		return process_ver_nack(lp, vp);
+
+	default:
+		return LDC_ABORT(lp);
+	}
+}
+
+static int process_rts(struct ldc_channel *lp,
+		       struct ldc_packet *p)
+{
+	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
+	       p->stype, p->seqid, p->env);
+
+	if (p->stype     != LDC_INFO	   ||
+	    lp->hs_state != LDC_HS_GOTVERS ||
+	    p->env       != lp->cfg.mode)
+		return LDC_ABORT(lp);
+
+	lp->snd_nxt = p->seqid;
+	lp->rcv_nxt = p->seqid;
+	lp->hs_state = LDC_HS_SENTRTR;
+	if (send_rtr(lp))
+		return LDC_ABORT(lp);
+
+	return 0;
+}
+
+static int process_rtr(struct ldc_channel *lp,
+		       struct ldc_packet *p)
+{
+	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
+	       p->stype, p->seqid, p->env);
+
+	if (p->stype     != LDC_INFO ||
+	    p->env       != lp->cfg.mode)
+		return LDC_ABORT(lp);
+
+	lp->snd_nxt = p->seqid;
+	lp->hs_state = LDC_HS_COMPLETE;
+	ldc_set_state(lp, LDC_STATE_CONNECTED);
+	send_rdx(lp);
+
+	return LDC_EVENT_UP;
+}
+
+static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
+{
+	return lp->rcv_nxt + 1 == seqid;
+}
+
+static int process_rdx(struct ldc_channel *lp,
+		       struct ldc_packet *p)
+{
+	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
+	       p->stype, p->seqid, p->env, p->u.r.ackid);
+
+	if (p->stype != LDC_INFO ||
+	    !(rx_seq_ok(lp, p->seqid)))
+		return LDC_ABORT(lp);
+
+	lp->rcv_nxt = p->seqid;
+
+	lp->hs_state = LDC_HS_COMPLETE;
+	ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+	return LDC_EVENT_UP;
+}
+
+static int process_control_frame(struct ldc_channel *lp,
+				 struct ldc_packet *p)
+{
+	switch (p->ctrl) {
+	case LDC_VERS:
+		return process_version(lp, p);
+
+	case LDC_RTS:
+		return process_rts(lp, p);
+
+	case LDC_RTR:
+		return process_rtr(lp, p);
+
+	case LDC_RDX:
+		return process_rdx(lp, p);
+
+	default:
+		return LDC_ABORT(lp);
+	}
+}
+
+static int process_error_frame(struct ldc_channel *lp,
+			       struct ldc_packet *p)
+{
+	return LDC_ABORT(lp);
+}
+
+static int process_data_ack(struct ldc_channel *lp,
+			    struct ldc_packet *ack)
+{
+	unsigned long head = lp->tx_acked;
+	u32 ackid = ack->u.r.ackid;
+
+	while (1) {
+		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
+
+		head = tx_advance(lp, head);
+
+		if (p->seqid == ackid) {
+			lp->tx_acked = head;
+			return 0;
+		}
+		if (head == lp->tx_tail)
+			return LDC_ABORT(lp);
+	}
+
+	return 0;
+}
+
+static void send_events(struct ldc_channel *lp, unsigned int event_mask)
+{
+	if (event_mask & LDC_EVENT_RESET)
+		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
+	if (event_mask & LDC_EVENT_UP)
+		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
+	if (event_mask & LDC_EVENT_DATA_READY)
+		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
+}
+
+static irqreturn_t ldc_rx(int irq, void *dev_id)
+{
+	struct ldc_channel *lp = dev_id;
+	unsigned long orig_state, flags;
+	unsigned int event_mask;
+
+	spin_lock_irqsave(&lp->lock, flags);
+
+	orig_state = lp->chan_state;
+
+	/* We should probably check for hypervisor errors here and
+	 * reset the LDC channel if we get one.
+	 */
+	sun4v_ldc_rx_get_state(lp->id,
+			       &lp->rx_head,
+			       &lp->rx_tail,
+			       &lp->chan_state);
+
+	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
+
+	event_mask = 0;
+
+	if (lp->cfg.mode == LDC_MODE_RAW &&
+	    lp->chan_state == LDC_CHANNEL_UP) {
+		lp->hs_state = LDC_HS_COMPLETE;
+		ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+		/*
+		 * Generate an LDC_EVENT_UP event if the channel
+		 * was not already up.
+		 */
+		if (orig_state != LDC_CHANNEL_UP) {
+			event_mask |= LDC_EVENT_UP;
+			orig_state = lp->chan_state;
+		}
+	}
+
+	/* If we are in reset state, flush the RX queue and ignore
+	 * everything.
+	 */
+	if (lp->flags & LDC_FLAG_RESET) {
+		(void) ldc_rx_reset(lp);
+		goto out;
+	}
+
+	/* Once we finish the handshake, we let the ldc_read()
+	 * paths do all of the control frame and state management.
+	 * Just trigger the callback.
+	 */
+	if (lp->hs_state == LDC_HS_COMPLETE) {
+handshake_complete:
+		if (lp->chan_state != orig_state) {
+			unsigned int event = LDC_EVENT_RESET;
+
+			if (lp->chan_state == LDC_CHANNEL_UP)
+				event = LDC_EVENT_UP;
+
+			event_mask |= event;
+		}
+		if (lp->rx_head != lp->rx_tail)
+			event_mask |= LDC_EVENT_DATA_READY;
+
+		goto out;
+	}
+
+	if (lp->chan_state != orig_state)
+		goto out;
+
+	while (lp->rx_head != lp->rx_tail) {
+		struct ldc_packet *p;
+		unsigned long new;
+		int err;
+
+		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+
+		switch (p->type) {
+		case LDC_CTRL:
+			err = process_control_frame(lp, p);
+			if (err > 0)
+				event_mask |= err;
+			break;
+
+		case LDC_DATA:
+			event_mask |= LDC_EVENT_DATA_READY;
+			err = 0;
+			break;
+
+		case LDC_ERR:
+			err = process_error_frame(lp, p);
+			break;
+
+		default:
+			err = LDC_ABORT(lp);
+			break;
+		}
+
+		if (err < 0)
+			break;
+
+		new = lp->rx_head;
+		new += LDC_PACKET_SIZE;
+		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
+			new = 0;
+		lp->rx_head = new;
+
+		err = __set_rx_head(lp, new);
+		if (err < 0) {
+			(void) LDC_ABORT(lp);
+			break;
+		}
+		if (lp->hs_state == LDC_HS_COMPLETE)
+			goto handshake_complete;
+	}
+
+out:
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	send_events(lp, event_mask);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ldc_tx(int irq, void *dev_id)
+{
+	struct ldc_channel *lp = dev_id;
+	unsigned long flags, orig_state;
+	unsigned int event_mask = 0;
+
+	spin_lock_irqsave(&lp->lock, flags);
+
+	orig_state = lp->chan_state;
+
+	/* We should probably check for hypervisor errors here and
+	 * reset the LDC channel if we get one.
+	 */
+	sun4v_ldc_tx_get_state(lp->id,
+			       &lp->tx_head,
+			       &lp->tx_tail,
+			       &lp->chan_state);
+
+	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
+	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
+
+	if (lp->cfg.mode == LDC_MODE_RAW &&
+	    lp->chan_state == LDC_CHANNEL_UP) {
+		lp->hs_state = LDC_HS_COMPLETE;
+		ldc_set_state(lp, LDC_STATE_CONNECTED);
+
+		/*
+		 * Generate an LDC_EVENT_UP event if the channel
+		 * was not already up.
+		 */
+		if (orig_state != LDC_CHANNEL_UP) {
+			event_mask |= LDC_EVENT_UP;
+			orig_state = lp->chan_state;
+		}
+	}
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	send_events(lp, event_mask);
+
+	return IRQ_HANDLED;
+}
+
+/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
+ * XXX that addition and removal from the ldc_channel_list has
+ * XXX atomicity, otherwise the __ldc_channel_exists() check is
+ * XXX totally pointless as another thread can slip into ldc_alloc()
+ * XXX and add a channel with the same ID.  There also needs to be
+ * XXX a spinlock for ldc_channel_list.
+ */
+static HLIST_HEAD(ldc_channel_list);
+
+static int __ldc_channel_exists(unsigned long id)
+{
+	struct ldc_channel *lp;
+
+	hlist_for_each_entry(lp, &ldc_channel_list, list) {
+		if (lp->id == id)
+			return 1;
+	}
+	return 0;
+}
+
+static int alloc_queue(const char *name, unsigned long num_entries,
+		       struct ldc_packet **base, unsigned long *ra)
+{
+	unsigned long size, order;
+	void *q;
+
+	size = num_entries * LDC_PACKET_SIZE;
+	order = get_order(size);
+
+	q = (void *) __get_free_pages(GFP_KERNEL, order);
+	if (!q) {
+		printk(KERN_ERR PFX "Alloc of %s queue failed with "
+		       "size=%lu order=%lu\n", name, size, order);
+		return -ENOMEM;
+	}
+
+	memset(q, 0, PAGE_SIZE << order);
+
+	*base = q;
+	*ra = __pa(q);
+
+	return 0;
+}
+
+static void free_queue(unsigned long num_entries, struct ldc_packet *q)
+{
+	unsigned long size, order;
+
+	if (!q)
+		return;
+
+	size = num_entries * LDC_PACKET_SIZE;
+	order = get_order(size);
+
+	free_pages((unsigned long)q, order);
+}
+
+static unsigned long ldc_cookie_to_index(u64 cookie, void *arg)
+{
+	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
+	/* struct ldc_iommu *ldc_iommu = (struct ldc_iommu *)arg; */
+
+	cookie &= ~COOKIE_PGSZ_CODE;
+
+	return (cookie >> (13ULL + (szcode * 3ULL)));
+}
+
+static void ldc_demap(struct ldc_iommu *iommu, unsigned long id, u64 cookie,
+		      unsigned long entry, unsigned long npages)
+{
+	struct ldc_mtable_entry *base;
+	unsigned long i, shift;
+
+	shift = (cookie >> COOKIE_PGSZ_CODE_SHIFT) * 3;
+	base = iommu->page_table + entry;
+	for (i = 0; i < npages; i++) {
+		if (base->cookie)
+			sun4v_ldc_revoke(id, cookie + (i << shift),
+					 base->cookie);
+		base->mte = 0;
+	}
+}
+
+/* XXX Make this configurable... XXX */
+#define LDC_IOTABLE_SIZE	(8 * 1024)
+
+static int ldc_iommu_init(const char *name, struct ldc_channel *lp)
+{
+	unsigned long sz, num_tsb_entries, tsbsize, order;
+	struct ldc_iommu *ldc_iommu = &lp->iommu;
+	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
+	struct ldc_mtable_entry *table;
+	unsigned long hv_err;
+	int err;
+
+	num_tsb_entries = LDC_IOTABLE_SIZE;
+	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+	spin_lock_init(&ldc_iommu->lock);
+
+	sz = num_tsb_entries / 8;
+	sz = (sz + 7UL) & ~7UL;
+	iommu->map = kzalloc(sz, GFP_KERNEL);
+	if (!iommu->map) {
+		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
+		return -ENOMEM;
+	}
+	iommu_tbl_pool_init(iommu, num_tsb_entries, PAGE_SHIFT,
+			    NULL, false /* no large pool */,
+			    1 /* npools */,
+			    true /* skip span boundary check */);
+
+	order = get_order(tsbsize);
+
+	table = (struct ldc_mtable_entry *)
+		__get_free_pages(GFP_KERNEL, order);
+	err = -ENOMEM;
+	if (!table) {
+		printk(KERN_ERR PFX "Alloc of MTE table failed, "
+		       "size=%lu order=%lu\n", tsbsize, order);
+		goto out_free_map;
+	}
+
+	memset(table, 0, PAGE_SIZE << order);
+
+	ldc_iommu->page_table = table;
+
+	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
+					 num_tsb_entries);
+	err = -EINVAL;
+	if (hv_err)
+		goto out_free_table;
+
+	return 0;
+
+out_free_table:
+	free_pages((unsigned long) table, order);
+	ldc_iommu->page_table = NULL;
+
+out_free_map:
+	kfree(iommu->map);
+	iommu->map = NULL;
+
+	return err;
+}
+
+static void ldc_iommu_release(struct ldc_channel *lp)
+{
+	struct ldc_iommu *ldc_iommu = &lp->iommu;
+	struct iommu_map_table *iommu = &ldc_iommu->iommu_map_table;
+	unsigned long num_tsb_entries, tsbsize, order;
+
+	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
+
+	num_tsb_entries = iommu->poolsize * iommu->nr_pools;
+	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
+	order = get_order(tsbsize);
+
+	free_pages((unsigned long) ldc_iommu->page_table, order);
+	ldc_iommu->page_table = NULL;
+
+	kfree(iommu->map);
+	iommu->map = NULL;
+}
+
+struct ldc_channel *ldc_alloc(unsigned long id,
+			      const struct ldc_channel_config *cfgp,
+			      void *event_arg,
+			      const char *name)
+{
+	struct ldc_channel *lp;
+	const struct ldc_mode_ops *mops;
+	unsigned long dummy1, dummy2, hv_err;
+	u8 mss, *mssbuf;
+	int err;
+
+	err = -ENODEV;
+	if (!ldom_domaining_enabled)
+		goto out_err;
+
+	err = -EINVAL;
+	if (!cfgp)
+		goto out_err;
+	if (!name)
+		goto out_err;
+
+	switch (cfgp->mode) {
+	case LDC_MODE_RAW:
+		mops = &raw_ops;
+		mss = LDC_PACKET_SIZE;
+		break;
+
+	case LDC_MODE_UNRELIABLE:
+		mops = &nonraw_ops;
+		mss = LDC_PACKET_SIZE - 8;
+		break;
+
+	case LDC_MODE_STREAM:
+		mops = &stream_ops;
+		mss = LDC_PACKET_SIZE - 8 - 8;
+		break;
+
+	default:
+		goto out_err;
+	}
+
+	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
+		goto out_err;
+
+	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
+	err = -ENODEV;
+	if (hv_err == HV_ECHANNEL)
+		goto out_err;
+
+	err = -EEXIST;
+	if (__ldc_channel_exists(id))
+		goto out_err;
+
+	mssbuf = NULL;
+
+	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!lp)
+		goto out_err;
+
+	spin_lock_init(&lp->lock);
+
+	lp->id = id;
+
+	err = ldc_iommu_init(name, lp);
+	if (err)
+		goto out_free_ldc;
+
+	lp->mops = mops;
+	lp->mss = mss;
+
+	lp->cfg = *cfgp;
+	if (!lp->cfg.mtu)
+		lp->cfg.mtu = LDC_DEFAULT_MTU;
+
+	if (lp->cfg.mode == LDC_MODE_STREAM) {
+		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
+		if (!mssbuf) {
+			err = -ENOMEM;
+			goto out_free_iommu;
+		}
+		lp->mssbuf = mssbuf;
+	}
+
+	lp->event_arg = event_arg;
+
+	/* XXX allow setting via ldc_channel_config to override defaults
+	 * XXX or use some formula based upon mtu
+	 */
+	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
+
+	err = alloc_queue("TX", lp->tx_num_entries,
+			  &lp->tx_base, &lp->tx_ra);
+	if (err)
+		goto out_free_mssbuf;
+
+	err = alloc_queue("RX", lp->rx_num_entries,
+			  &lp->rx_base, &lp->rx_ra);
+	if (err)
+		goto out_free_txq;
+
+	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
+
+	lp->hs_state = LDC_HS_CLOSED;
+	ldc_set_state(lp, LDC_STATE_INIT);
+
+	INIT_HLIST_NODE(&lp->list);
+	hlist_add_head(&lp->list, &ldc_channel_list);
+
+	INIT_HLIST_HEAD(&lp->mh_list);
+
+	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+
+	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
+			  lp->rx_irq_name, lp);
+	if (err)
+		goto out_free_txq;
+
+	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
+			  lp->tx_irq_name, lp);
+	if (err) {
+		free_irq(lp->cfg.rx_irq, lp);
+		goto out_free_txq;
+	}
+
+	return lp;
+
+out_free_txq:
+	free_queue(lp->tx_num_entries, lp->tx_base);
+
+out_free_mssbuf:
+	kfree(mssbuf);
+
+out_free_iommu:
+	ldc_iommu_release(lp);
+
+out_free_ldc:
+	kfree(lp);
+
+out_err:
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(ldc_alloc);
+
+void ldc_unbind(struct ldc_channel *lp)
+{
+	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
+		free_irq(lp->cfg.rx_irq, lp);
+		free_irq(lp->cfg.tx_irq, lp);
+		lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
+	}
+
+	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
+		sun4v_ldc_tx_qconf(lp->id, 0, 0);
+		sun4v_ldc_rx_qconf(lp->id, 0, 0);
+		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+	}
+	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
+		free_queue(lp->tx_num_entries, lp->tx_base);
+		free_queue(lp->rx_num_entries, lp->rx_base);
+		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
+	}
+
+	ldc_set_state(lp, LDC_STATE_INIT);
+}
+EXPORT_SYMBOL(ldc_unbind);
+
+void ldc_free(struct ldc_channel *lp)
+{
+	ldc_unbind(lp);
+	hlist_del(&lp->list);
+	kfree(lp->mssbuf);
+	ldc_iommu_release(lp);
+
+	kfree(lp);
+}
+EXPORT_SYMBOL(ldc_free);
+
+/* Bind the channel.  This registers the LDC queues with
+ * the hypervisor and puts the channel into a pseudo-listening
+ * state.  This does not initiate a handshake, ldc_connect() does
+ * that.
+ */
+int ldc_bind(struct ldc_channel *lp)
+{
+	unsigned long hv_err, flags;
+	int err = -EINVAL;
+
+	if (lp->state != LDC_STATE_INIT)
+		return -EINVAL;
+
+	spin_lock_irqsave(&lp->lock, flags);
+
+	enable_irq(lp->cfg.rx_irq);
+	enable_irq(lp->cfg.tx_irq);
+
+	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
+
+	err = -ENODEV;
+	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+	if (hv_err)
+		goto out_free_irqs;
+
+	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+	if (hv_err)
+		goto out_free_irqs;
+
+	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+	if (hv_err)
+		goto out_unmap_tx;
+
+	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+	if (hv_err)
+		goto out_unmap_tx;
+
+	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
+
+	hv_err = sun4v_ldc_tx_get_state(lp->id,
+					&lp->tx_head,
+					&lp->tx_tail,
+					&lp->chan_state);
+	err = -EBUSY;
+	if (hv_err)
+		goto out_unmap_rx;
+
+	lp->tx_acked = lp->tx_head;
+
+	lp->hs_state = LDC_HS_OPEN;
+	ldc_set_state(lp, LDC_STATE_BOUND);
+
+	if (lp->cfg.mode == LDC_MODE_RAW) {
+		/*
+		 * There is no handshake in RAW mode, so handshake
+		 * is completed.
+		 */
+		lp->hs_state = LDC_HS_COMPLETE;
+	}
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return 0;
+
+out_unmap_rx:
+	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
+	sun4v_ldc_rx_qconf(lp->id, 0, 0);
+
+out_unmap_tx:
+	sun4v_ldc_tx_qconf(lp->id, 0, 0);
+
+out_free_irqs:
+	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
+	free_irq(lp->cfg.tx_irq, lp);
+	free_irq(lp->cfg.rx_irq, lp);
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL(ldc_bind);
+
+int ldc_connect(struct ldc_channel *lp)
+{
+	unsigned long flags;
+	int err;
+
+	if (lp->cfg.mode == LDC_MODE_RAW)
+		return -EINVAL;
+
+	spin_lock_irqsave(&lp->lock, flags);
+
+	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
+	    lp->hs_state != LDC_HS_OPEN)
+		err = ((lp->hs_state > LDC_HS_OPEN) ? 0 : -EINVAL);
+	else
+		err = start_handshake(lp);
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL(ldc_connect);
+
+int ldc_disconnect(struct ldc_channel *lp)
+{
+	unsigned long hv_err, flags;
+	int err;
+
+	if (lp->cfg.mode == LDC_MODE_RAW)
+		return -EINVAL;
+
+	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
+	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&lp->lock, flags);
+
+	err = -ENODEV;
+	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
+	if (hv_err)
+		goto out_err;
+
+	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
+	if (hv_err)
+		goto out_err;
+
+	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
+	if (hv_err)
+		goto out_err;
+
+	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
+	if (hv_err)
+		goto out_err;
+
+	ldc_set_state(lp, LDC_STATE_BOUND);
+	lp->hs_state = LDC_HS_OPEN;
+	lp->flags |= LDC_FLAG_RESET;
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return 0;
+
+out_err:
+	sun4v_ldc_tx_qconf(lp->id, 0, 0);
+	sun4v_ldc_rx_qconf(lp->id, 0, 0);
+	free_irq(lp->cfg.tx_irq, lp);
+	free_irq(lp->cfg.rx_irq, lp);
+	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
+		       LDC_FLAG_REGISTERED_QUEUES);
+	ldc_set_state(lp, LDC_STATE_INIT);
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL(ldc_disconnect);
+
+int ldc_state(struct ldc_channel *lp)
+{
+	return lp->state;
+}
+EXPORT_SYMBOL(ldc_state);
+
+void ldc_set_state(struct ldc_channel *lp, u8 state)
+{
+	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
+	       state_to_str(lp->state),
+	       state_to_str(state));
+
+	lp->state = state;
+}
+EXPORT_SYMBOL(ldc_set_state);
+
+int ldc_mode(struct ldc_channel *lp)
+{
+	return lp->cfg.mode;
+}
+EXPORT_SYMBOL(ldc_mode);
+
+int ldc_rx_reset(struct ldc_channel *lp)
+{
+	return __set_rx_head(lp, lp->rx_tail);
+}
+EXPORT_SYMBOL(ldc_rx_reset);
+
+void __ldc_print(struct ldc_channel *lp, const char *caller)
+{
+	pr_info("%s: id=0x%lx flags=0x%x state=%s cstate=0x%lx hsstate=0x%x\n"
+		"\trx_h=0x%lx rx_t=0x%lx rx_n=%ld\n"
+		"\ttx_h=0x%lx tx_t=0x%lx tx_n=%ld\n"
+		"\trcv_nxt=%u snd_nxt=%u\n",
+		caller, lp->id, lp->flags, state_to_str(lp->state),
+		lp->chan_state, lp->hs_state,
+		lp->rx_head, lp->rx_tail, lp->rx_num_entries,
+		lp->tx_head, lp->tx_tail, lp->tx_num_entries,
+		lp->rcv_nxt, lp->snd_nxt);
+}
+EXPORT_SYMBOL(__ldc_print);
+
+static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+	struct ldc_packet *p;
+	unsigned long new_tail, hv_err;
+	int err;
+
+	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
+					&lp->chan_state);
+	if (unlikely(hv_err))
+		return -EBUSY;
+
+	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
+		return LDC_ABORT(lp);
+
+	if (size > LDC_PACKET_SIZE)
+		return -EMSGSIZE;
+
+	p = data_get_tx_packet(lp, &new_tail);
+	if (!p)
+		return -EAGAIN;
+
+	memcpy(p, buf, size);
+
+	err = send_tx_packet(lp, p, new_tail);
+	if (!err)
+		err = size;
+
+	return err;
+}
+
+static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+	struct ldc_packet *p;
+	unsigned long hv_err, new;
+	int err;
+
+	if (size < LDC_PACKET_SIZE)
+		return -EINVAL;
+
+	hv_err = sun4v_ldc_rx_get_state(lp->id,
+					&lp->rx_head,
+					&lp->rx_tail,
+					&lp->chan_state);
+	if (hv_err)
+		return LDC_ABORT(lp);
+
+	if (lp->chan_state == LDC_CHANNEL_DOWN ||
+	    lp->chan_state == LDC_CHANNEL_RESETTING)
+		return -ECONNRESET;
+
+	if (lp->rx_head == lp->rx_tail)
+		return 0;
+
+	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
+	memcpy(buf, p, LDC_PACKET_SIZE);
+
+	new = rx_advance(lp, lp->rx_head);
+	lp->rx_head = new;
+
+	err = __set_rx_head(lp, new);
+	if (err < 0)
+		err = -ECONNRESET;
+	else
+		err = LDC_PACKET_SIZE;
+
+	return err;
+}
+
+static const struct ldc_mode_ops raw_ops = {
+	.write		=	write_raw,
+	.read		=	read_raw,
+};
+
+static int write_nonraw(struct ldc_channel *lp, const void *buf,
+			unsigned int size)
+{
+	unsigned long hv_err, tail;
+	unsigned int copied;
+	u32 seq;
+	int err;
+
+	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
+					&lp->chan_state);
+	if (unlikely(hv_err))
+		return -EBUSY;
+
+	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
+		return LDC_ABORT(lp);
+
+	if (!tx_has_space_for(lp, size))
+		return -EAGAIN;
+
+	seq = lp->snd_nxt;
+	copied = 0;
+	tail = lp->tx_tail;
+	while (copied < size) {
+		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
+		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
+			    p->u.u_data :
+			    p->u.r.r_data);
+		int data_len;
+
+		p->type = LDC_DATA;
+		p->stype = LDC_INFO;
+		p->ctrl = 0;
+
+		data_len = size - copied;
+		if (data_len > lp->mss)
+			data_len = lp->mss;
+
+		BUG_ON(data_len > LDC_LEN);
+
+		p->env = (data_len |
+			  (copied == 0 ? LDC_START : 0) |
+			  (data_len == size - copied ? LDC_STOP : 0));
+
+		p->seqid = ++seq;
+
+		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
+		       p->type,
+		       p->stype,
+		       p->ctrl,
+		       p->env,
+		       p->seqid);
+
+		memcpy(data, buf, data_len);
+		buf += data_len;
+		copied += data_len;
+
+		tail = tx_advance(lp, tail);
+	}
+
+	err = set_tx_tail(lp, tail);
+	if (!err) {
+		lp->snd_nxt = seq;
+		err = size;
+	}
+
+	return err;
+}
+
+static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
+		      struct ldc_packet *first_frag)
+{
+	int err;
+
+	if (first_frag)
+		lp->rcv_nxt = first_frag->seqid - 1;
+
+	err = send_data_nack(lp, p);
+	if (err)
+		return err;
+
+	err = ldc_rx_reset(lp);
+	if (err < 0)
+		return LDC_ABORT(lp);
+
+	return 0;
+}
+
+static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
+{
+	if (p->stype & LDC_ACK) {
+		int err = process_data_ack(lp, p);
+		if (err)
+			return err;
+	}
+	if (p->stype & LDC_NACK)
+		return LDC_ABORT(lp);
+
+	return 0;
+}
+
+static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
+{
+	unsigned long dummy;
+	int limit = 1000;
+
+	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
+	       cur_head, lp->rx_head, lp->rx_tail);
+	while (limit-- > 0) {
+		unsigned long hv_err;
+
+		hv_err = sun4v_ldc_rx_get_state(lp->id,
+						&dummy,
+						&lp->rx_tail,
+						&lp->chan_state);
+		if (hv_err)
+			return LDC_ABORT(lp);
+
+		if (lp->chan_state == LDC_CHANNEL_DOWN ||
+		    lp->chan_state == LDC_CHANNEL_RESETTING)
+			return -ECONNRESET;
+
+		if (cur_head != lp->rx_tail) {
+			ldcdbg(DATA, "DATA WAIT DONE "
+			       "head[%lx] tail[%lx] chan_state[%lx]\n",
+			       dummy, lp->rx_tail, lp->chan_state);
+			return 0;
+		}
+
+		udelay(1);
+	}
+	return -EAGAIN;
+}
+
+static int rx_set_head(struct ldc_channel *lp, unsigned long head)
+{
+	int err = __set_rx_head(lp, head);
+
+	if (err < 0)
+		return LDC_ABORT(lp);
+
+	lp->rx_head = head;
+	return 0;
+}
+
+static void send_data_ack(struct ldc_channel *lp)
+{
+	unsigned long new_tail;
+	struct ldc_packet *p;
+
+	p = data_get_tx_packet(lp, &new_tail);
+	if (likely(p)) {
+		int err;
+
+		memset(p, 0, sizeof(*p));
+		p->type = LDC_DATA;
+		p->stype = LDC_ACK;
+		p->ctrl = 0;
+		p->seqid = lp->snd_nxt + 1;
+		p->u.r.ackid = lp->rcv_nxt;
+
+		err = send_tx_packet(lp, p, new_tail);
+		if (!err)
+			lp->snd_nxt++;
+	}
+}
+
+static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+	struct ldc_packet *first_frag;
+	unsigned long hv_err, new;
+	int err, copied;
+
+	hv_err = sun4v_ldc_rx_get_state(lp->id,
+					&lp->rx_head,
+					&lp->rx_tail,
+					&lp->chan_state);
+	if (hv_err)
+		return LDC_ABORT(lp);
+
+	if (lp->chan_state == LDC_CHANNEL_DOWN ||
+	    lp->chan_state == LDC_CHANNEL_RESETTING)
+		return -ECONNRESET;
+
+	if (lp->rx_head == lp->rx_tail)
+		return 0;
+
+	first_frag = NULL;
+	copied = err = 0;
+	new = lp->rx_head;
+	while (1) {
+		struct ldc_packet *p;
+		int pkt_len;
+
+		BUG_ON(new == lp->rx_tail);
+		p = lp->rx_base + (new / LDC_PACKET_SIZE);
+
+		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
+		       "rcv_nxt[%08x]\n",
+		       p->type,
+		       p->stype,
+		       p->ctrl,
+		       p->env,
+		       p->seqid,
+		       p->u.r.ackid,
+		       lp->rcv_nxt);
+
+		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
+			err = rx_bad_seq(lp, p, first_frag);
+			copied = 0;
+			break;
+		}
+
+		if (p->type & LDC_CTRL) {
+			err = process_control_frame(lp, p);
+			if (err < 0)
+				break;
+			err = 0;
+		}
+
+		lp->rcv_nxt = p->seqid;
+
+		/*
+		 * If this is a control-only packet, there is nothing
+		 * else to do but advance the rx queue since the packet
+		 * was already processed above.
+		 */
+		if (!(p->type & LDC_DATA)) {
+			new = rx_advance(lp, new);
+			break;
+		}
+		if (p->stype & (LDC_ACK | LDC_NACK)) {
+			err = data_ack_nack(lp, p);
+			if (err)
+				break;
+		}
+		if (!(p->stype & LDC_INFO)) {
+			new = rx_advance(lp, new);
+			err = rx_set_head(lp, new);
+			if (err)
+				break;
+			goto no_data;
+		}
+
+		pkt_len = p->env & LDC_LEN;
+
+		/* Every initial packet starts with the START bit set.
+		 *
+		 * Singleton packets will have both START+STOP set.
+		 *
+		 * Fragments will have START set in the first frame, STOP
+		 * set in the last frame, and neither bit set in middle
+		 * frames of the packet.
+		 *
+		 * Therefore if we are at the beginning of a packet and
+		 * we don't see START, or we are in the middle of a fragmented
+		 * packet and do see START, we are unsynchronized and should
+		 * flush the RX queue.
+		 */
+		if ((first_frag == NULL && !(p->env & LDC_START)) ||
+		    (first_frag != NULL &&  (p->env & LDC_START))) {
+			if (!first_frag)
+				new = rx_advance(lp, new);
+
+			err = rx_set_head(lp, new);
+			if (err)
+				break;
+
+			if (!first_frag)
+				goto no_data;
+		}
+		if (!first_frag)
+			first_frag = p;
+
+		if (pkt_len > size - copied) {
+			/* User didn't give us a big enough buffer,
+			 * what to do?  This is a pretty serious error.
+			 *
+			 * Since we haven't updated the RX ring head to
+			 * consume any of the packets, signal the error
+			 * to the user and just leave the RX ring alone.
+			 *
+			 * This seems the best behavior because this allows
+			 * a user of the LDC layer to start with a small
+			 * RX buffer for ldc_read() calls and use -EMSGSIZE
+			 * as a cue to enlarge it's read buffer.
+			 */
+			err = -EMSGSIZE;
+			break;
+		}
+
+		/* Ok, we are gonna eat this one.  */
+		new = rx_advance(lp, new);
+
+		memcpy(buf,
+		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
+			p->u.u_data : p->u.r.r_data), pkt_len);
+		buf += pkt_len;
+		copied += pkt_len;
+
+		if (p->env & LDC_STOP)
+			break;
+
+no_data:
+		if (new == lp->rx_tail) {
+			err = rx_data_wait(lp, new);
+			if (err)
+				break;
+		}
+	}
+
+	if (!err)
+		err = rx_set_head(lp, new);
+
+	if (err && first_frag)
+		lp->rcv_nxt = first_frag->seqid - 1;
+
+	if (!err) {
+		err = copied;
+		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
+			send_data_ack(lp);
+	}
+
+	return err;
+}
+
+static const struct ldc_mode_ops nonraw_ops = {
+	.write		=	write_nonraw,
+	.read		=	read_nonraw,
+};
+
+static int write_stream(struct ldc_channel *lp, const void *buf,
+			unsigned int size)
+{
+	if (size > lp->cfg.mtu)
+		size = lp->cfg.mtu;
+	return write_nonraw(lp, buf, size);
+}
+
+static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+	if (!lp->mssbuf_len) {
+		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
+		if (err < 0)
+			return err;
+
+		lp->mssbuf_len = err;
+		lp->mssbuf_off = 0;
+	}
+
+	if (size > lp->mssbuf_len)
+		size = lp->mssbuf_len;
+	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
+
+	lp->mssbuf_off += size;
+	lp->mssbuf_len -= size;
+
+	return size;
+}
+
+static const struct ldc_mode_ops stream_ops = {
+	.write		=	write_stream,
+	.read		=	read_stream,
+};
+
+int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
+{
+	unsigned long flags;
+	int err;
+
+	if (!buf)
+		return -EINVAL;
+
+	if (!size)
+		return 0;
+
+	spin_lock_irqsave(&lp->lock, flags);
+
+	if (lp->hs_state != LDC_HS_COMPLETE)
+		err = -ENOTCONN;
+	else
+		err = lp->mops->write(lp, buf, size);
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	return err;
+}
+EXPORT_SYMBOL(ldc_write);
+
+int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
+{
+	unsigned long flags;
+	int err;
+
+	ldcdbg(RX, "%s: entered size=%d\n", __func__, size);
+
+	if (!buf)
+		return -EINVAL;
+
+	if (!size)
+		return 0;
+
+	spin_lock_irqsave(&lp->lock, flags);
+
+	if (lp->hs_state != LDC_HS_COMPLETE)
+		err = -ENOTCONN;
+	else
+		err = lp->mops->read(lp, buf, size);
+
+	spin_unlock_irqrestore(&lp->lock, flags);
+
+	ldcdbg(RX, "%s: mode=%d, head=%lu, tail=%lu rv=%d\n", __func__,
+	       lp->cfg.mode, lp->rx_head, lp->rx_tail, err);
+
+	return err;
+}
+EXPORT_SYMBOL(ldc_read);
+
+static u64 pagesize_code(void)
+{
+	switch (PAGE_SIZE) {
+	default:
+	case (8ULL * 1024ULL):
+		return 0;
+	case (64ULL * 1024ULL):
+		return 1;
+	case (512ULL * 1024ULL):
+		return 2;
+	case (4ULL * 1024ULL * 1024ULL):
+		return 3;
+	case (32ULL * 1024ULL * 1024ULL):
+		return 4;
+	case (256ULL * 1024ULL * 1024ULL):
+		return 5;
+	}
+}
+
+static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
+{
+	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
+		(index << PAGE_SHIFT) |
+		page_offset);
+}
+
+
+static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
+					     unsigned long npages)
+{
+	long entry;
+
+	entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table,
+				      npages, NULL, (unsigned long)-1, 0);
+	if (unlikely(entry == IOMMU_ERROR_CODE))
+		return NULL;
+
+	return iommu->page_table + entry;
+}
+
+static u64 perm_to_mte(unsigned int map_perm)
+{
+	u64 mte_base;
+
+	mte_base = pagesize_code();
+
+	if (map_perm & LDC_MAP_SHADOW) {
+		if (map_perm & LDC_MAP_R)
+			mte_base |= LDC_MTE_COPY_R;
+		if (map_perm & LDC_MAP_W)
+			mte_base |= LDC_MTE_COPY_W;
+	}
+	if (map_perm & LDC_MAP_DIRECT) {
+		if (map_perm & LDC_MAP_R)
+			mte_base |= LDC_MTE_READ;
+		if (map_perm & LDC_MAP_W)
+			mte_base |= LDC_MTE_WRITE;
+		if (map_perm & LDC_MAP_X)
+			mte_base |= LDC_MTE_EXEC;
+	}
+	if (map_perm & LDC_MAP_IO) {
+		if (map_perm & LDC_MAP_R)
+			mte_base |= LDC_MTE_IOMMU_R;
+		if (map_perm & LDC_MAP_W)
+			mte_base |= LDC_MTE_IOMMU_W;
+	}
+
+	return mte_base;
+}
+
+static int pages_in_region(unsigned long base, long len)
+{
+	int count = 0;
+
+	do {
+		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
+
+		len -= (new - base);
+		base = new;
+		count++;
+	} while (len > 0);
+
+	return count;
+}
+
+struct cookie_state {
+	struct ldc_mtable_entry		*page_table;
+	struct ldc_trans_cookie		*cookies;
+	u64				mte_base;
+	u64				prev_cookie;
+	u32				pte_idx;
+	u32				nc;
+};
+
+static void fill_cookies(struct cookie_state *sp, unsigned long pa,
+			 unsigned long off, unsigned long len)
+{
+	do {
+		unsigned long tlen, new = pa + PAGE_SIZE;
+		u64 this_cookie;
+
+		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
+
+		tlen = PAGE_SIZE;
+		if (off)
+			tlen = PAGE_SIZE - off;
+		if (tlen > len)
+			tlen = len;
+
+		this_cookie = make_cookie(sp->pte_idx,
+					  pagesize_code(), off);
+
+		off = 0;
+
+		if (this_cookie == sp->prev_cookie) {
+			sp->cookies[sp->nc - 1].cookie_size += tlen;
+		} else {
+			sp->cookies[sp->nc].cookie_addr = this_cookie;
+			sp->cookies[sp->nc].cookie_size = tlen;
+			sp->nc++;
+		}
+		sp->prev_cookie = this_cookie + tlen;
+
+		sp->pte_idx++;
+
+		len -= tlen;
+		pa = new;
+	} while (len > 0);
+}
+
+static int sg_count_one(struct scatterlist *sg)
+{
+	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
+	long len = sg->length;
+
+	if ((sg->offset | len) & (8UL - 1))
+		return -EFAULT;
+
+	return pages_in_region(base + sg->offset, len);
+}
+
+static int sg_count_pages(struct scatterlist *sg, int num_sg)
+{
+	int count;
+	int i;
+
+	count = 0;
+	for (i = 0; i < num_sg; i++) {
+		int err = sg_count_one(sg + i);
+		if (err < 0)
+			return err;
+		count += err;
+	}
+
+	return count;
+}
+
+int ldc_map_sg(struct ldc_channel *lp,
+	       struct scatterlist *sg, int num_sg,
+	       struct ldc_trans_cookie *cookies, int ncookies,
+	       unsigned int map_perm)
+{
+	unsigned long i, npages;
+	struct ldc_mtable_entry *base;
+	struct cookie_state state;
+	struct ldc_iommu *iommu;
+	int err;
+	struct scatterlist *s;
+
+	if (map_perm & ~LDC_MAP_ALL)
+		return -EINVAL;
+
+	err = sg_count_pages(sg, num_sg);
+	if (err < 0)
+		return err;
+
+	npages = err;
+	if (err > ncookies)
+		return -EMSGSIZE;
+
+	iommu = &lp->iommu;
+
+	base = alloc_npages(iommu, npages);
+
+	if (!base)
+		return -ENOMEM;
+
+	state.page_table = iommu->page_table;
+	state.cookies = cookies;
+	state.mte_base = perm_to_mte(map_perm);
+	state.prev_cookie = ~(u64)0;
+	state.pte_idx = (base - iommu->page_table);
+	state.nc = 0;
+
+	for_each_sg(sg, s, num_sg, i) {
+		fill_cookies(&state, page_to_pfn(sg_page(s)) << PAGE_SHIFT,
+			     s->offset, s->length);
+	}
+
+	return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_sg);
+
+int ldc_map_single(struct ldc_channel *lp,
+		   void *buf, unsigned int len,
+		   struct ldc_trans_cookie *cookies, int ncookies,
+		   unsigned int map_perm)
+{
+	unsigned long npages, pa;
+	struct ldc_mtable_entry *base;
+	struct cookie_state state;
+	struct ldc_iommu *iommu;
+
+	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
+		return -EINVAL;
+
+	pa = __pa(buf);
+	if ((pa | len) & (8UL - 1))
+		return -EFAULT;
+
+	npages = pages_in_region(pa, len);
+
+	iommu = &lp->iommu;
+
+	base = alloc_npages(iommu, npages);
+
+	if (!base)
+		return -ENOMEM;
+
+	state.page_table = iommu->page_table;
+	state.cookies = cookies;
+	state.mte_base = perm_to_mte(map_perm);
+	state.prev_cookie = ~(u64)0;
+	state.pte_idx = (base - iommu->page_table);
+	state.nc = 0;
+	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
+	BUG_ON(state.nc > ncookies);
+
+	return state.nc;
+}
+EXPORT_SYMBOL(ldc_map_single);
+
+
+static void free_npages(unsigned long id, struct ldc_iommu *iommu,
+			u64 cookie, u64 size)
+{
+	unsigned long npages, entry;
+
+	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
+
+	entry = ldc_cookie_to_index(cookie, iommu);
+	ldc_demap(iommu, id, cookie, entry, npages);
+	iommu_tbl_range_free(&iommu->iommu_map_table, cookie, npages, entry);
+}
+
+void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
+	       int ncookies)
+{
+	struct ldc_iommu *iommu = &lp->iommu;
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+	for (i = 0; i < ncookies; i++) {
+		u64 addr = cookies[i].cookie_addr;
+		u64 size = cookies[i].cookie_size;
+
+		free_npages(lp->id, iommu, addr, size);
+	}
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+EXPORT_SYMBOL(ldc_unmap);
+
+int ldc_copy(struct ldc_channel *lp, int copy_dir,
+	     void *buf, unsigned int len, unsigned long offset,
+	     struct ldc_trans_cookie *cookies, int ncookies)
+{
+	unsigned int orig_len;
+	unsigned long ra;
+	int i;
+
+	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
+		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
+		       lp->id, copy_dir);
+		return -EINVAL;
+	}
+
+	ra = __pa(buf);
+	if ((ra | len | offset) & (8UL - 1)) {
+		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
+		       "ra[%lx] len[%x] offset[%lx]\n",
+		       lp->id, ra, len, offset);
+		return -EFAULT;
+	}
+
+	if (lp->hs_state != LDC_HS_COMPLETE ||
+	    (lp->flags & LDC_FLAG_RESET)) {
+		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
+		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
+		return -ECONNRESET;
+	}
+
+	orig_len = len;
+	for (i = 0; i < ncookies; i++) {
+		unsigned long cookie_raddr = cookies[i].cookie_addr;
+		unsigned long this_len = cookies[i].cookie_size;
+		unsigned long actual_len;
+
+		if (unlikely(offset)) {
+			unsigned long this_off = offset;
+
+			if (this_off > this_len)
+				this_off = this_len;
+
+			offset -= this_off;
+			this_len -= this_off;
+			if (!this_len)
+				continue;
+			cookie_raddr += this_off;
+		}
+
+		if (this_len > len)
+			this_len = len;
+
+		while (1) {
+			unsigned long hv_err;
+
+			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
+						cookie_raddr, ra,
+						this_len, &actual_len);
+			if (unlikely(hv_err)) {
+				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
+				       "HV error %lu\n",
+				       lp->id, hv_err);
+				if (lp->hs_state != LDC_HS_COMPLETE ||
+				    (lp->flags & LDC_FLAG_RESET))
+					return -ECONNRESET;
+				else
+					return -EFAULT;
+			}
+
+			cookie_raddr += actual_len;
+			ra += actual_len;
+			len -= actual_len;
+			if (actual_len == this_len)
+				break;
+
+			this_len -= actual_len;
+		}
+
+		if (!len)
+			break;
+	}
+
+	/* It is caller policy what to do about short copies.
+	 * For example, a networking driver can declare the
+	 * packet a runt and drop it.
+	 */
+
+	return orig_len - len;
+}
+EXPORT_SYMBOL(ldc_copy);
+
+void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
+			  struct ldc_trans_cookie *cookies, int *ncookies,
+			  unsigned int map_perm)
+{
+	void *buf;
+	int err;
+
+	if (len & (8UL - 1))
+		return ERR_PTR(-EINVAL);
+
+	buf = kzalloc(len, GFP_ATOMIC);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
+	if (err < 0) {
+		kfree(buf);
+		return ERR_PTR(err);
+	}
+	*ncookies = err;
+
+	return buf;
+}
+EXPORT_SYMBOL(ldc_alloc_exp_dring);
+
+void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
+			struct ldc_trans_cookie *cookies, int ncookies)
+{
+	ldc_unmap(lp, cookies, ncookies);
+	kfree(buf);
+}
+EXPORT_SYMBOL(ldc_free_exp_dring);
+
+static int __init ldc_init(void)
+{
+	unsigned long major, minor;
+	struct mdesc_handle *hp;
+	const u64 *v;
+	int err;
+	u64 mp;
+
+	hp = mdesc_grab();
+	if (!hp)
+		return -ENODEV;
+
+	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+	err = -ENODEV;
+	if (mp == MDESC_NODE_NULL)
+		goto out;
+
+	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
+	if (!v)
+		goto out;
+
+	major = 1;
+	minor = 0;
+	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
+		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
+		goto out;
+	}
+
+	printk(KERN_INFO "%s", version);
+
+	if (!*v) {
+		printk(KERN_INFO PFX "Domaining disabled.\n");
+		goto out;
+	}
+	ldom_domaining_enabled = 1;
+	err = 0;
+
+out:
+	mdesc_release(hp);
+	return err;
+}
+
+core_initcall(ldc_init);
diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c
new file mode 100644
index 0000000..519f5ba
--- /dev/null
+++ b/arch/sparc/kernel/led.c
@@ -0,0 +1,146 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/uaccess.h>
+#include <linux/sched/loadavg.h>
+
+#include <asm/auxio.h>
+
+#define LED_MAX_LENGTH 8 /* maximum chars written to proc file */
+
+static inline void led_toggle(void)
+{
+	unsigned char val = get_auxio();
+	unsigned char on, off;
+
+	if (val & AUXIO_LED) {
+		on = 0;
+		off = AUXIO_LED;
+	} else {
+		on = AUXIO_LED;
+		off = 0;
+	}
+
+	set_auxio(on, off);
+}
+
+static struct timer_list led_blink_timer;
+static unsigned long led_blink_timer_timeout;
+
+static void led_blink(struct timer_list *unused)
+{
+	unsigned long timeout = led_blink_timer_timeout;
+
+	led_toggle();
+
+	/* reschedule */
+	if (!timeout) { /* blink according to load */
+		led_blink_timer.expires = jiffies +
+			((1 + (avenrun[0] >> FSHIFT)) * HZ);
+	} else { /* blink at user specified interval */
+		led_blink_timer.expires = jiffies + (timeout * HZ);
+	}
+	add_timer(&led_blink_timer);
+}
+
+static int led_proc_show(struct seq_file *m, void *v)
+{
+	if (get_auxio() & AUXIO_LED)
+		seq_puts(m, "on\n");
+	else
+		seq_puts(m, "off\n");
+	return 0;
+}
+
+static int led_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, led_proc_show, NULL);
+}
+
+static ssize_t led_proc_write(struct file *file, const char __user *buffer,
+			      size_t count, loff_t *ppos)
+{
+	char *buf = NULL;
+
+	if (count > LED_MAX_LENGTH)
+		count = LED_MAX_LENGTH;
+
+	buf = memdup_user_nul(buffer, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	/* work around \n when echo'ing into proc */
+	if (buf[count - 1] == '\n')
+		buf[count - 1] = '\0';
+
+	/* before we change anything we want to stop any running timers,
+	 * otherwise calls such as on will have no persistent effect
+	 */
+	del_timer_sync(&led_blink_timer);
+
+	if (!strcmp(buf, "on")) {
+		auxio_set_led(AUXIO_LED_ON);
+	} else if (!strcmp(buf, "toggle")) {
+		led_toggle();
+	} else if ((*buf > '0') && (*buf <= '9')) {
+		led_blink_timer_timeout = simple_strtoul(buf, NULL, 10);
+		led_blink(&led_blink_timer);
+	} else if (!strcmp(buf, "load")) {
+		led_blink_timer_timeout = 0;
+		led_blink(&led_blink_timer);
+	} else {
+		auxio_set_led(AUXIO_LED_OFF);
+	}
+
+	kfree(buf);
+
+	return count;
+}
+
+static const struct file_operations led_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= led_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= led_proc_write,
+};
+
+static struct proc_dir_entry *led;
+
+#define LED_VERSION	"0.1"
+
+static int __init led_init(void)
+{
+	timer_setup(&led_blink_timer, led_blink, 0);
+
+	led = proc_create("led", 0, NULL, &led_proc_fops);
+	if (!led)
+		return -ENOMEM;
+
+	printk(KERN_INFO
+	       "led: version %s, Lars Kotthoff <metalhead@metalhead.ws>\n",
+	       LED_VERSION);
+
+	return 0;
+}
+
+static void __exit led_exit(void)
+{
+	remove_proc_entry("led", NULL);
+	del_timer_sync(&led_blink_timer);
+}
+
+module_init(led_init);
+module_exit(led_exit);
+
+MODULE_AUTHOR("Lars Kotthoff <metalhead@metalhead.ws>");
+MODULE_DESCRIPTION("Provides control of the front LED on SPARC systems.");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(LED_VERSION);
diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c
new file mode 100644
index 0000000..84b2337
--- /dev/null
+++ b/arch/sparc/kernel/leon_kernel.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+
+#include <asm/oplib.h>
+#include <asm/timer.h>
+#include <asm/prom.h>
+#include <asm/leon.h>
+#include <asm/leon_amba.h>
+#include <asm/traps.h>
+#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+
+#include "kernel.h"
+#include "prom.h"
+#include "irq.h"
+
+struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */
+struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */
+
+int leondebug_irq_disable;
+int leon_debug_irqout;
+static volatile u32 dummy_master_l10_counter;
+unsigned long amba_system_id;
+static DEFINE_SPINLOCK(leon_irq_lock);
+
+static unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */
+static unsigned long leon3_gptimer_ackmask; /* For clearing pending bit */
+unsigned long leon3_gptimer_irq; /* interrupt controller irq number */
+unsigned int sparc_leon_eirq;
+#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu])
+#define LEON_IACK (&leon3_irqctrl_regs->iclear)
+#define LEON_DO_ACK_HW 1
+
+/* Return the last ACKed IRQ by the Extended IRQ controller. It has already
+ * been (automatically) ACKed when the CPU takes the trap.
+ */
+static inline unsigned int leon_eirq_get(int cpu)
+{
+	return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f;
+}
+
+/* Handle one or multiple IRQs from the extended interrupt controller */
+static void leon_handle_ext_irq(struct irq_desc *desc)
+{
+	unsigned int eirq;
+	struct irq_bucket *p;
+	int cpu = sparc_leon3_cpuid();
+
+	eirq = leon_eirq_get(cpu);
+	p = irq_map[eirq];
+	if ((eirq & 0x10) && p && p->irq) /* bit4 tells if IRQ happened */
+		generic_handle_irq(p->irq);
+}
+
+/* The extended IRQ controller has been found, this function registers it */
+static void leon_eirq_setup(unsigned int eirq)
+{
+	unsigned long mask, oldmask;
+	unsigned int veirq;
+
+	if (eirq < 1 || eirq > 0xf) {
+		printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq);
+		return;
+	}
+
+	veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0);
+
+	/*
+	 * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ
+	 * controller have a mask-bit of their own, so this is safe.
+	 */
+	irq_link(veirq);
+	mask = 1 << eirq;
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask));
+	sparc_leon_eirq = eirq;
+}
+
+unsigned long leon_get_irqmask(unsigned int irq)
+{
+	unsigned long mask;
+
+	if (!irq || ((irq > 0xf) && !sparc_leon_eirq)
+	    || ((irq > 0x1f) && sparc_leon_eirq)) {
+		printk(KERN_ERR
+		       "leon_get_irqmask: false irq number: %d\n", irq);
+		mask = 0;
+	} else {
+		mask = LEON_HARD_INT(irq);
+	}
+	return mask;
+}
+
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(const struct cpumask *affinity)
+{
+	cpumask_t mask;
+
+	cpumask_and(&mask, cpu_online_mask, affinity);
+	if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask))
+		return boot_cpu_id;
+	else
+		return cpumask_first(&mask);
+}
+#else
+#define irq_choose_cpu(affinity) boot_cpu_id
+#endif
+
+static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest,
+			     bool force)
+{
+	unsigned long mask, oldmask, flags;
+	int oldcpu, newcpu;
+
+	mask = (unsigned long)data->chip_data;
+	oldcpu = irq_choose_cpu(irq_data_get_affinity_mask(data));
+	newcpu = irq_choose_cpu(dest);
+
+	if (oldcpu == newcpu)
+		goto out;
+
+	/* unmask on old CPU first before enabling on the selected CPU */
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask));
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
+out:
+	return IRQ_SET_MASK_OK;
+}
+
+static void leon_unmask_irq(struct irq_data *data)
+{
+	unsigned long mask, oldmask, flags;
+	int cpu;
+
+	mask = (unsigned long)data->chip_data;
+	cpu = irq_choose_cpu(irq_data_get_affinity_mask(data));
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static void leon_mask_irq(struct irq_data *data)
+{
+	unsigned long mask, oldmask, flags;
+	int cpu;
+
+	mask = (unsigned long)data->chip_data;
+	cpu = irq_choose_cpu(irq_data_get_affinity_mask(data));
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+	LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static unsigned int leon_startup_irq(struct irq_data *data)
+{
+	irq_link(data->irq);
+	leon_unmask_irq(data);
+	return 0;
+}
+
+static void leon_shutdown_irq(struct irq_data *data)
+{
+	leon_mask_irq(data);
+	irq_unlink(data->irq);
+}
+
+/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */
+static void leon_eoi_irq(struct irq_data *data)
+{
+	unsigned long mask = (unsigned long)data->chip_data;
+
+	if (mask & LEON_DO_ACK_HW)
+		LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW);
+}
+
+static struct irq_chip leon_irq = {
+	.name			= "leon",
+	.irq_startup		= leon_startup_irq,
+	.irq_shutdown		= leon_shutdown_irq,
+	.irq_mask		= leon_mask_irq,
+	.irq_unmask		= leon_unmask_irq,
+	.irq_eoi		= leon_eoi_irq,
+	.irq_set_affinity	= leon_set_affinity,
+};
+
+/*
+ * Build a LEON IRQ for the edge triggered LEON IRQ controller:
+ *  Edge (normal) IRQ           - handle_simple_irq, ack=DON'T-CARE, never ack
+ *  Level IRQ (PCI|Level-GPIO)  - handle_fasteoi_irq, ack=1, ack after ISR
+ *  Per-CPU Edge                - handle_percpu_irq, ack=0
+ */
+unsigned int leon_build_device_irq(unsigned int real_irq,
+				    irq_flow_handler_t flow_handler,
+				    const char *name, int do_ack)
+{
+	unsigned int irq;
+	unsigned long mask;
+	struct irq_desc *desc;
+
+	irq = 0;
+	mask = leon_get_irqmask(real_irq);
+	if (mask == 0)
+		goto out;
+
+	irq = irq_alloc(real_irq, real_irq);
+	if (irq == 0)
+		goto out;
+
+	if (do_ack)
+		mask |= LEON_DO_ACK_HW;
+
+	desc = irq_to_desc(irq);
+	if (!desc || !desc->handle_irq || desc->handle_irq == handle_bad_irq) {
+		irq_set_chip_and_handler_name(irq, &leon_irq,
+					      flow_handler, name);
+		irq_set_chip_data(irq, (void *)mask);
+	}
+
+out:
+	return irq;
+}
+
+static unsigned int _leon_build_device_irq(struct platform_device *op,
+					   unsigned int real_irq)
+{
+	return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0);
+}
+
+void leon_update_virq_handling(unsigned int virq,
+			      irq_flow_handler_t flow_handler,
+			      const char *name, int do_ack)
+{
+	unsigned long mask = (unsigned long)irq_get_chip_data(virq);
+
+	mask &= ~LEON_DO_ACK_HW;
+	if (do_ack)
+		mask |= LEON_DO_ACK_HW;
+
+	irq_set_chip_and_handler_name(virq, &leon_irq,
+				      flow_handler, name);
+	irq_set_chip_data(virq, (void *)mask);
+}
+
+static u32 leon_cycles_offset(void)
+{
+	u32 rld, val, ctrl, off;
+
+	rld = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld);
+	val = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val);
+	ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+	if (LEON3_GPTIMER_CTRL_ISPENDING(ctrl)) {
+		val = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val);
+		off = 2 * rld - val;
+	} else {
+		off = rld - val;
+	}
+
+	return off;
+}
+
+#ifdef CONFIG_SMP
+
+/* smp clockevent irq */
+static irqreturn_t leon_percpu_timer_ce_interrupt(int irq, void *unused)
+{
+	struct clock_event_device *ce;
+	int cpu = smp_processor_id();
+
+	leon_clear_profile_irq(cpu);
+
+	if (cpu == boot_cpu_id)
+		timer_interrupt(irq, NULL);
+
+	ce = &per_cpu(sparc32_clockevent, cpu);
+
+	irq_enter();
+	if (ce->event_handler)
+		ce->event_handler(ce);
+	irq_exit();
+
+	return IRQ_HANDLED;
+}
+
+#endif /* CONFIG_SMP */
+
+void __init leon_init_timers(void)
+{
+	int irq, eirq;
+	struct device_node *rootnp, *np, *nnp;
+	struct property *pp;
+	int len;
+	int icsel;
+	int ampopts;
+	int err;
+	u32 config;
+	u32 ctrl;
+
+	sparc_config.get_cycles_offset = leon_cycles_offset;
+	sparc_config.cs_period = 1000000 / HZ;
+	sparc_config.features |= FEAT_L10_CLOCKSOURCE;
+
+#ifndef CONFIG_SMP
+	sparc_config.features |= FEAT_L10_CLOCKEVENT;
+#endif
+
+	leondebug_irq_disable = 0;
+	leon_debug_irqout = 0;
+	master_l10_counter = (u32 __iomem *)&dummy_master_l10_counter;
+	dummy_master_l10_counter = 0;
+
+	rootnp = of_find_node_by_path("/ambapp0");
+	if (!rootnp)
+		goto bad;
+
+	/* Find System ID: GRLIB build ID and optional CHIP ID */
+	pp = of_find_property(rootnp, "systemid", &len);
+	if (pp)
+		amba_system_id = *(unsigned long *)pp->value;
+
+	/* Find IRQMP IRQ Controller Registers base adr otherwise bail out */
+	np = of_find_node_by_name(rootnp, "GAISLER_IRQMP");
+	if (!np) {
+		np = of_find_node_by_name(rootnp, "01_00d");
+		if (!np)
+			goto bad;
+	}
+	pp = of_find_property(np, "reg", &len);
+	if (!pp)
+		goto bad;
+	leon3_irqctrl_regs = *(struct leon3_irqctrl_regs_map **)pp->value;
+
+	/* Find GPTIMER Timer Registers base address otherwise bail out. */
+	nnp = rootnp;
+
+retry:
+	np = of_find_node_by_name(nnp, "GAISLER_GPTIMER");
+	if (!np) {
+		np = of_find_node_by_name(nnp, "01_011");
+		if (!np)
+			goto bad;
+	}
+
+	ampopts = 0;
+	pp = of_find_property(np, "ampopts", &len);
+	if (pp) {
+		ampopts = *(int *)pp->value;
+		if (ampopts == 0) {
+			/* Skip this instance, resource already
+			 * allocated by other OS */
+			nnp = np;
+			goto retry;
+		}
+	}
+
+	/* Select Timer-Instance on Timer Core. Default is zero */
+	leon3_gptimer_idx = ampopts & 0x7;
+
+	pp = of_find_property(np, "reg", &len);
+	if (pp)
+		leon3_gptimer_regs = *(struct leon3_gptimer_regs_map **)
+					pp->value;
+	pp = of_find_property(np, "interrupts", &len);
+	if (pp)
+		leon3_gptimer_irq = *(unsigned int *)pp->value;
+
+	if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq))
+		goto bad;
+
+	ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+			      ctrl | LEON3_GPTIMER_CTRL_PENDING);
+	ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+
+	if ((ctrl & LEON3_GPTIMER_CTRL_PENDING) != 0)
+		leon3_gptimer_ackmask = ~LEON3_GPTIMER_CTRL_PENDING;
+	else
+		leon3_gptimer_ackmask = ~0;
+
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
+				(((1000000 / HZ) - 1)));
+	LEON3_BYPASS_STORE_PA(
+			&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0);
+
+	/*
+	 * The IRQ controller may (if implemented) consist of multiple
+	 * IRQ controllers, each mapped on a 4Kb boundary.
+	 * Each CPU may be routed to different IRQCTRLs, however
+	 * we assume that all CPUs (in SMP system) is routed to the
+	 * same IRQ Controller, and for non-SMP only one IRQCTRL is
+	 * accessed anyway.
+	 * In AMP systems, Linux must run on CPU0 for the time being.
+	 */
+	icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
+	icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
+	leon3_irqctrl_regs += icsel;
+
+	/* Mask all IRQs on boot-cpu IRQ controller */
+	LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0);
+
+	/* Probe extended IRQ controller */
+	eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus)
+		>> 16) & 0xf;
+	if (eirq != 0)
+		leon_eirq_setup(eirq);
+
+#ifdef CONFIG_SMP
+	{
+		unsigned long flags;
+
+		/*
+		 * In SMP, sun4m adds a IPI handler to IRQ trap handler that
+		 * LEON never must take, sun4d and LEON overwrites the branch
+		 * with a NOP.
+		 */
+		local_irq_save(flags);
+		patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */
+		local_ops->cache_all();
+		local_irq_restore(flags);
+	}
+#endif
+
+	config = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config);
+	if (config & (1 << LEON3_GPTIMER_SEPIRQ))
+		leon3_gptimer_irq += leon3_gptimer_idx;
+	else if ((config & LEON3_GPTIMER_TIMERS) > 1)
+		pr_warn("GPTIMER uses shared irqs, using other timers of the same core will fail.\n");
+
+#ifdef CONFIG_SMP
+	/* Install per-cpu IRQ handler for broadcasted ticker */
+	irq = leon_build_device_irq(leon3_gptimer_irq, handle_percpu_irq,
+				    "per-cpu", 0);
+	err = request_irq(irq, leon_percpu_timer_ce_interrupt,
+			  IRQF_PERCPU | IRQF_TIMER, "timer", NULL);
+#else
+	irq = _leon_build_device_irq(NULL, leon3_gptimer_irq);
+	err = request_irq(irq, timer_interrupt, IRQF_TIMER, "timer", NULL);
+#endif
+	if (err) {
+		pr_err("Unable to attach timer IRQ%d\n", irq);
+		prom_halt();
+	}
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+			      LEON3_GPTIMER_EN |
+			      LEON3_GPTIMER_RL |
+			      LEON3_GPTIMER_LD |
+			      LEON3_GPTIMER_IRQEN);
+	return;
+bad:
+	printk(KERN_ERR "No Timer/irqctrl found\n");
+	BUG();
+	return;
+}
+
+static void leon_clear_clock_irq(void)
+{
+	u32 ctrl;
+
+	ctrl = LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl);
+	LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+			      ctrl & leon3_gptimer_ackmask);
+}
+
+static void leon_load_profile_irq(int cpu, unsigned int limit)
+{
+}
+
+void __init leon_trans_init(struct device_node *dp)
+{
+	if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) {
+		struct property *p;
+		p = of_find_property(dp, "mid", (void *)0);
+		if (p) {
+			int mid;
+			dp->name = prom_early_alloc(5 + 1);
+			memcpy(&mid, p->value, p->length);
+			sprintf((char *)dp->name, "cpu%.2d", mid);
+		}
+	}
+}
+
+#ifdef CONFIG_SMP
+void leon_clear_profile_irq(int cpu)
+{
+}
+
+void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu)
+{
+	unsigned long mask, flags, *addr;
+	mask = leon_get_irqmask(irq_nr);
+	spin_lock_irqsave(&leon_irq_lock, flags);
+	addr = (unsigned long *)LEON_IMASK(cpu);
+	LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask));
+	spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+#endif
+
+void __init leon_init_IRQ(void)
+{
+	sparc_config.init_timers      = leon_init_timers;
+	sparc_config.build_device_irq = _leon_build_device_irq;
+	sparc_config.clock_rate       = 1000000;
+	sparc_config.clear_clock_irq  = leon_clear_clock_irq;
+	sparc_config.load_profile_irq = leon_load_profile_irq;
+}
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
new file mode 100644
index 0000000..e5e5ff6
--- /dev/null
+++ b/arch/sparc/kernel/leon_pci.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * leon_pci.c: LEON Host PCI support
+ *
+ * Copyright (C) 2011 Aeroflex Gaisler AB, Daniel Hellstrom
+ *
+ * Code is partially derived from pcic.c
+ */
+
+#include <linux/of_device.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/export.h>
+#include <asm/leon.h>
+#include <asm/leon_pci.h>
+
+/* The LEON architecture does not rely on a BIOS or bootloader to setup
+ * PCI for us. The Linux generic routines are used to setup resources,
+ * reset values of configuration-space register settings are preserved.
+ *
+ * PCI Memory and Prefetchable Memory is direct-mapped. However I/O Space is
+ * accessed through a Window which is translated to low 64KB in PCI space, the
+ * first 4KB is not used so 60KB is available.
+ */
+void leon_pci_init(struct platform_device *ofdev, struct leon_pci_info *info)
+{
+	LIST_HEAD(resources);
+	struct pci_bus *root_bus;
+	struct pci_host_bridge *bridge;
+	int ret;
+
+	bridge = pci_alloc_host_bridge(0);
+	if (!bridge)
+		return;
+
+	pci_add_resource_offset(&resources, &info->io_space,
+				info->io_space.start - 0x1000);
+	pci_add_resource(&resources, &info->mem_space);
+	info->busn.flags = IORESOURCE_BUS;
+	pci_add_resource(&resources, &info->busn);
+
+	list_splice_init(&resources, &bridge->windows);
+	bridge->dev.parent = &ofdev->dev;
+	bridge->sysdata = info;
+	bridge->busnr = 0;
+	bridge->ops = info->ops;
+	bridge->swizzle_irq = pci_common_swizzle;
+	bridge->map_irq = info->map_irq;
+
+	ret = pci_scan_root_bus_bridge(bridge);
+	if (ret) {
+		pci_free_host_bridge(bridge);
+		return;
+	}
+
+	root_bus = bridge->bus;
+
+	/* Assign devices with resources */
+	pci_assign_unassigned_resources();
+	pci_bus_add_devices(root_bus);
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, oldcmd;
+	int i;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	oldcmd = cmd;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = &dev->resource[i];
+
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<i)))
+			continue;
+
+		if (res->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (res->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	if (cmd != oldcmd) {
+		pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
diff --git a/arch/sparc/kernel/leon_pci_grpci1.c b/arch/sparc/kernel/leon_pci_grpci1.c
new file mode 100644
index 0000000..e6935d0
--- /dev/null
+++ b/arch/sparc/kernel/leon_pci_grpci1.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * leon_pci_grpci1.c: GRPCI1 Host PCI driver
+ *
+ * Copyright (C) 2013 Aeroflex Gaisler AB
+ *
+ * This GRPCI1 driver does not support PCI interrupts taken from
+ * GPIO pins. Interrupt generation at PCI parity and system error
+ * detection is by default turned off since some GRPCI1 cores does
+ * not support detection. It can be turned on from the bootloader
+ * using the all_pci_errors property.
+ *
+ * Contributors: Daniel Hellstrom <daniel@gaisler.com>
+ */
+
+#include <linux/of_device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of_irq.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+
+#include <asm/leon_pci.h>
+#include <asm/sections.h>
+#include <asm/vaddrs.h>
+#include <asm/leon.h>
+#include <asm/io.h>
+
+#include "irq.h"
+
+/* Enable/Disable Debugging Configuration Space Access */
+#undef GRPCI1_DEBUG_CFGACCESS
+
+/*
+ * GRPCI1 APB Register MAP
+ */
+struct grpci1_regs {
+	unsigned int cfg_stat;		/* 0x00 Configuration / Status */
+	unsigned int bar0;		/* 0x04 BAR0 (RO) */
+	unsigned int page0;		/* 0x08 PAGE0 (RO) */
+	unsigned int bar1;		/* 0x0C BAR1 (RO) */
+	unsigned int page1;		/* 0x10 PAGE1 */
+	unsigned int iomap;		/* 0x14 IO Map */
+	unsigned int stat_cmd;		/* 0x18 PCI Status & Command (RO) */
+	unsigned int irq;		/* 0x1C Interrupt register */
+};
+
+#define REGLOAD(a)	(be32_to_cpu(__raw_readl(&(a))))
+#define REGSTORE(a, v)	(__raw_writel(cpu_to_be32(v), &(a)))
+
+#define PAGE0_BTEN_BIT    0
+#define PAGE0_BTEN        (1 << PAGE0_BTEN_BIT)
+
+#define CFGSTAT_HOST_BIT  13
+#define CFGSTAT_CTO_BIT   8
+#define CFGSTAT_HOST      (1 << CFGSTAT_HOST_BIT)
+#define CFGSTAT_CTO       (1 << CFGSTAT_CTO_BIT)
+
+#define IRQ_DPE (1 << 9)
+#define IRQ_SSE (1 << 8)
+#define IRQ_RMA (1 << 7)
+#define IRQ_RTA (1 << 6)
+#define IRQ_STA (1 << 5)
+#define IRQ_DPED (1 << 4)
+#define IRQ_INTD (1 << 3)
+#define IRQ_INTC (1 << 2)
+#define IRQ_INTB (1 << 1)
+#define IRQ_INTA (1 << 0)
+#define IRQ_DEF_ERRORS (IRQ_RMA | IRQ_RTA | IRQ_STA)
+#define IRQ_ALL_ERRORS (IRQ_DPED | IRQ_DEF_ERRORS | IRQ_SSE | IRQ_DPE)
+#define IRQ_INTX (IRQ_INTA | IRQ_INTB | IRQ_INTC | IRQ_INTD)
+#define IRQ_MASK_BIT 16
+
+#define DEF_PCI_ERRORS (PCI_STATUS_SIG_TARGET_ABORT | \
+			PCI_STATUS_REC_TARGET_ABORT | \
+			PCI_STATUS_REC_MASTER_ABORT)
+#define ALL_PCI_ERRORS (PCI_STATUS_PARITY | PCI_STATUS_DETECTED_PARITY | \
+			PCI_STATUS_SIG_SYSTEM_ERROR | DEF_PCI_ERRORS)
+
+#define TGT 256
+
+struct grpci1_priv {
+	struct leon_pci_info	info; /* must be on top of this structure */
+	struct grpci1_regs __iomem *regs;		/* GRPCI register map */
+	struct device		*dev;
+	int			pci_err_mask;	/* STATUS register error mask */
+	int			irq;		/* LEON irqctrl GRPCI IRQ */
+	unsigned char		irq_map[4];	/* GRPCI nexus PCI INTX# IRQs */
+	unsigned int		irq_err;	/* GRPCI nexus Virt Error IRQ */
+
+	/* AHB PCI Windows */
+	unsigned long		pci_area;	/* MEMORY */
+	unsigned long		pci_area_end;
+	unsigned long		pci_io;		/* I/O */
+	unsigned long		pci_conf;	/* CONFIGURATION */
+	unsigned long		pci_conf_end;
+	unsigned long		pci_io_va;
+};
+
+static struct grpci1_priv *grpci1priv;
+
+static int grpci1_cfg_w32(struct grpci1_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 val);
+
+static int grpci1_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct grpci1_priv *priv = dev->bus->sysdata;
+	int irq_group;
+
+	/* Use default IRQ decoding on PCI BUS0 according slot numbering */
+	irq_group = slot & 0x3;
+	pin = ((pin - 1) + irq_group) & 0x3;
+
+	return priv->irq_map[pin];
+}
+
+static int grpci1_cfg_r32(struct grpci1_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 *val)
+{
+	u32 *pci_conf, tmp, cfg;
+
+	if (where & 0x3)
+		return -EINVAL;
+
+	if (bus == 0) {
+		devfn += (0x8 * 6); /* start at AD16=Device0 */
+	} else if (bus == TGT) {
+		bus = 0;
+		devfn = 0; /* special case: bridge controller itself */
+	}
+
+	/* Select bus */
+	cfg = REGLOAD(priv->regs->cfg_stat);
+	REGSTORE(priv->regs->cfg_stat, (cfg & ~(0xf << 23)) | (bus << 23));
+
+	/* do read access */
+	pci_conf = (u32 *) (priv->pci_conf | (devfn << 8) | (where & 0xfc));
+	tmp = LEON3_BYPASS_LOAD_PA(pci_conf);
+
+	/* check if master abort was received */
+	if (REGLOAD(priv->regs->cfg_stat) & CFGSTAT_CTO) {
+		*val = 0xffffffff;
+		/* Clear Master abort bit in PCI cfg space (is set) */
+		tmp = REGLOAD(priv->regs->stat_cmd);
+		grpci1_cfg_w32(priv, TGT, 0, PCI_COMMAND, tmp);
+	} else {
+		/* Bus always little endian (unaffected by byte-swapping) */
+		*val = swab32(tmp);
+	}
+
+	return 0;
+}
+
+static int grpci1_cfg_r16(struct grpci1_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 *val)
+{
+	u32 v;
+	int ret;
+
+	if (where & 0x1)
+		return -EINVAL;
+	ret = grpci1_cfg_r32(priv, bus, devfn, where & ~0x3, &v);
+	*val = 0xffff & (v >> (8 * (where & 0x3)));
+	return ret;
+}
+
+static int grpci1_cfg_r8(struct grpci1_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 *val)
+{
+	u32 v;
+	int ret;
+
+	ret = grpci1_cfg_r32(priv, bus, devfn, where & ~0x3, &v);
+	*val = 0xff & (v >> (8 * (where & 3)));
+
+	return ret;
+}
+
+static int grpci1_cfg_w32(struct grpci1_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 val)
+{
+	unsigned int *pci_conf;
+	u32 cfg;
+
+	if (where & 0x3)
+		return -EINVAL;
+
+	if (bus == 0) {
+		devfn += (0x8 * 6); /* start at AD16=Device0 */
+	} else if (bus == TGT) {
+		bus = 0;
+		devfn = 0; /* special case: bridge controller itself */
+	}
+
+	/* Select bus */
+	cfg = REGLOAD(priv->regs->cfg_stat);
+	REGSTORE(priv->regs->cfg_stat, (cfg & ~(0xf << 23)) | (bus << 23));
+
+	pci_conf = (unsigned int *) (priv->pci_conf |
+						(devfn << 8) | (where & 0xfc));
+	LEON3_BYPASS_STORE_PA(pci_conf, swab32(val));
+
+	return 0;
+}
+
+static int grpci1_cfg_w16(struct grpci1_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 val)
+{
+	int ret;
+	u32 v;
+
+	if (where & 0x1)
+		return -EINVAL;
+	ret = grpci1_cfg_r32(priv, bus, devfn, where&~3, &v);
+	if (ret)
+		return ret;
+	v = (v & ~(0xffff << (8 * (where & 0x3)))) |
+	    ((0xffff & val) << (8 * (where & 0x3)));
+	return grpci1_cfg_w32(priv, bus, devfn, where & ~0x3, v);
+}
+
+static int grpci1_cfg_w8(struct grpci1_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 val)
+{
+	int ret;
+	u32 v;
+
+	ret = grpci1_cfg_r32(priv, bus, devfn, where & ~0x3, &v);
+	if (ret != 0)
+		return ret;
+	v = (v & ~(0xff << (8 * (where & 0x3)))) |
+	    ((0xff & val) << (8 * (where & 0x3)));
+	return grpci1_cfg_w32(priv, bus, devfn, where & ~0x3, v);
+}
+
+/* Read from Configuration Space. When entering here the PCI layer has taken
+ * the pci_lock spinlock and IRQ is off.
+ */
+static int grpci1_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int where, int size, u32 *val)
+{
+	struct grpci1_priv *priv = grpci1priv;
+	unsigned int busno = bus->number;
+	int ret;
+
+	if (PCI_SLOT(devfn) > 15 || busno > 15) {
+		*val = ~0;
+		return 0;
+	}
+
+	switch (size) {
+	case 1:
+		ret = grpci1_cfg_r8(priv, busno, devfn, where, val);
+		break;
+	case 2:
+		ret = grpci1_cfg_r16(priv, busno, devfn, where, val);
+		break;
+	case 4:
+		ret = grpci1_cfg_r32(priv, busno, devfn, where, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+#ifdef GRPCI1_DEBUG_CFGACCESS
+	printk(KERN_INFO
+		"grpci1_read_config: [%02x:%02x:%x] ofs=%d val=%x size=%d\n",
+		busno, PCI_SLOT(devfn), PCI_FUNC(devfn), where, *val, size);
+#endif
+
+	return ret;
+}
+
+/* Write to Configuration Space. When entering here the PCI layer has taken
+ * the pci_lock spinlock and IRQ is off.
+ */
+static int grpci1_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 val)
+{
+	struct grpci1_priv *priv = grpci1priv;
+	unsigned int busno = bus->number;
+
+	if (PCI_SLOT(devfn) > 15 || busno > 15)
+		return 0;
+
+#ifdef GRPCI1_DEBUG_CFGACCESS
+	printk(KERN_INFO
+		"grpci1_write_config: [%02x:%02x:%x] ofs=%d size=%d val=%x\n",
+		busno, PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+#endif
+
+	switch (size) {
+	default:
+		return -EINVAL;
+	case 1:
+		return grpci1_cfg_w8(priv, busno, devfn, where, val);
+	case 2:
+		return grpci1_cfg_w16(priv, busno, devfn, where, val);
+	case 4:
+		return grpci1_cfg_w32(priv, busno, devfn, where, val);
+	}
+}
+
+static struct pci_ops grpci1_ops = {
+	.read =		grpci1_read_config,
+	.write =	grpci1_write_config,
+};
+
+/* GENIRQ IRQ chip implementation for grpci1 irqmode=0..2. In configuration
+ * 3 where all PCI Interrupts has a separate IRQ on the system IRQ controller
+ * this is not needed and the standard IRQ controller can be used.
+ */
+
+static void grpci1_mask_irq(struct irq_data *data)
+{
+	u32 irqidx;
+	struct grpci1_priv *priv = grpci1priv;
+
+	irqidx = (u32)data->chip_data - 1;
+	if (irqidx > 3) /* only mask PCI interrupts here */
+		return;
+	irqidx += IRQ_MASK_BIT;
+
+	REGSTORE(priv->regs->irq, REGLOAD(priv->regs->irq) & ~(1 << irqidx));
+}
+
+static void grpci1_unmask_irq(struct irq_data *data)
+{
+	u32 irqidx;
+	struct grpci1_priv *priv = grpci1priv;
+
+	irqidx = (u32)data->chip_data - 1;
+	if (irqidx > 3) /* only unmask PCI interrupts here */
+		return;
+	irqidx += IRQ_MASK_BIT;
+
+	REGSTORE(priv->regs->irq, REGLOAD(priv->regs->irq) | (1 << irqidx));
+}
+
+static unsigned int grpci1_startup_irq(struct irq_data *data)
+{
+	grpci1_unmask_irq(data);
+	return 0;
+}
+
+static void grpci1_shutdown_irq(struct irq_data *data)
+{
+	grpci1_mask_irq(data);
+}
+
+static struct irq_chip grpci1_irq = {
+	.name		= "grpci1",
+	.irq_startup	= grpci1_startup_irq,
+	.irq_shutdown	= grpci1_shutdown_irq,
+	.irq_mask	= grpci1_mask_irq,
+	.irq_unmask	= grpci1_unmask_irq,
+};
+
+/* Handle one or multiple IRQs from the PCI core */
+static void grpci1_pci_flow_irq(struct irq_desc *desc)
+{
+	struct grpci1_priv *priv = grpci1priv;
+	int i, ack = 0;
+	unsigned int irqreg;
+
+	irqreg = REGLOAD(priv->regs->irq);
+	irqreg = (irqreg >> IRQ_MASK_BIT) & irqreg;
+
+	/* Error Interrupt? */
+	if (irqreg & IRQ_ALL_ERRORS) {
+		generic_handle_irq(priv->irq_err);
+		ack = 1;
+	}
+
+	/* PCI Interrupt? */
+	if (irqreg & IRQ_INTX) {
+		/* Call respective PCI Interrupt handler */
+		for (i = 0; i < 4; i++) {
+			if (irqreg & (1 << i))
+				generic_handle_irq(priv->irq_map[i]);
+		}
+		ack = 1;
+	}
+
+	/*
+	 * Call "first level" IRQ chip end-of-irq handler. It will ACK LEON IRQ
+	 * Controller, this must be done after IRQ sources have been handled to
+	 * avoid double IRQ generation
+	 */
+	if (ack)
+		desc->irq_data.chip->irq_eoi(&desc->irq_data);
+}
+
+/* Create a virtual IRQ */
+static unsigned int grpci1_build_device_irq(unsigned int irq)
+{
+	unsigned int virq = 0, pil;
+
+	pil = 1 << 8;
+	virq = irq_alloc(irq, pil);
+	if (virq == 0)
+		goto out;
+
+	irq_set_chip_and_handler_name(virq, &grpci1_irq, handle_simple_irq,
+				      "pcilvl");
+	irq_set_chip_data(virq, (void *)irq);
+
+out:
+	return virq;
+}
+
+/*
+ * Initialize mappings AMBA<->PCI, clear IRQ state, setup PCI interface
+ *
+ * Target BARs:
+ *  BAR0: unused in this implementation
+ *  BAR1: peripheral DMA to host's memory (size at least 256MByte)
+ *  BAR2..BAR5: not implemented in hardware
+ */
+static void grpci1_hw_init(struct grpci1_priv *priv)
+{
+	u32 ahbadr, bar_sz, data, pciadr;
+	struct grpci1_regs __iomem *regs = priv->regs;
+
+	/* set 1:1 mapping between AHB -> PCI memory space */
+	REGSTORE(regs->cfg_stat, priv->pci_area & 0xf0000000);
+
+	/* map PCI accesses to target BAR1 to Linux kernel memory 1:1 */
+	ahbadr = 0xf0000000 & (u32)__pa(PAGE_ALIGN((unsigned long) &_end));
+	REGSTORE(regs->page1, ahbadr);
+
+	/* translate I/O accesses to 0, I/O Space always @ PCI low 64Kbytes */
+	REGSTORE(regs->iomap, REGLOAD(regs->iomap) & 0x0000ffff);
+
+	/* disable and clear pending interrupts */
+	REGSTORE(regs->irq, 0);
+
+	/* Setup BAR0 outside access range so that it does not conflict with
+	 * peripheral DMA. There is no need to set up the PAGE0 register.
+	 */
+	grpci1_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_0, 0xffffffff);
+	grpci1_cfg_r32(priv, TGT, 0, PCI_BASE_ADDRESS_0, &bar_sz);
+	bar_sz = ~bar_sz + 1;
+	pciadr = priv->pci_area - bar_sz;
+	grpci1_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_0, pciadr);
+
+	/*
+	 * Setup the Host's PCI Target BAR1 for other peripherals to access,
+	 * and do DMA to the host's memory.
+	 */
+	grpci1_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_1, ahbadr);
+
+	/*
+	 * Setup Latency Timer and cache line size. Default cache line
+	 * size will result in poor performance (256 word fetches), 0xff
+	 * will set it according to the max size of the PCI FIFO.
+	 */
+	grpci1_cfg_w8(priv, TGT, 0, PCI_CACHE_LINE_SIZE, 0xff);
+	grpci1_cfg_w8(priv, TGT, 0, PCI_LATENCY_TIMER, 0x40);
+
+	/* set as bus master, enable pci memory responses, clear status bits */
+	grpci1_cfg_r32(priv, TGT, 0, PCI_COMMAND, &data);
+	data |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	grpci1_cfg_w32(priv, TGT, 0, PCI_COMMAND, data);
+}
+
+static irqreturn_t grpci1_jump_interrupt(int irq, void *arg)
+{
+	struct grpci1_priv *priv = arg;
+	dev_err(priv->dev, "Jump IRQ happened\n");
+	return IRQ_NONE;
+}
+
+/* Handle GRPCI1 Error Interrupt */
+static irqreturn_t grpci1_err_interrupt(int irq, void *arg)
+{
+	struct grpci1_priv *priv = arg;
+	u32 status;
+
+	grpci1_cfg_r16(priv, TGT, 0, PCI_STATUS, &status);
+	status &= priv->pci_err_mask;
+
+	if (status == 0)
+		return IRQ_NONE;
+
+	if (status & PCI_STATUS_PARITY)
+		dev_err(priv->dev, "Data Parity Error\n");
+
+	if (status & PCI_STATUS_SIG_TARGET_ABORT)
+		dev_err(priv->dev, "Signalled Target Abort\n");
+
+	if (status & PCI_STATUS_REC_TARGET_ABORT)
+		dev_err(priv->dev, "Received Target Abort\n");
+
+	if (status & PCI_STATUS_REC_MASTER_ABORT)
+		dev_err(priv->dev, "Received Master Abort\n");
+
+	if (status & PCI_STATUS_SIG_SYSTEM_ERROR)
+		dev_err(priv->dev, "Signalled System Error\n");
+
+	if (status & PCI_STATUS_DETECTED_PARITY)
+		dev_err(priv->dev, "Parity Error\n");
+
+	/* Clear handled INT TYPE IRQs */
+	grpci1_cfg_w16(priv, TGT, 0, PCI_STATUS, status);
+
+	return IRQ_HANDLED;
+}
+
+static int grpci1_of_probe(struct platform_device *ofdev)
+{
+	struct grpci1_regs __iomem *regs;
+	struct grpci1_priv *priv;
+	int err, len;
+	const int *tmp;
+	u32 cfg, size, err_mask;
+	struct resource *res;
+
+	if (grpci1priv) {
+		dev_err(&ofdev->dev, "only one GRPCI1 supported\n");
+		return -ENODEV;
+	}
+
+	if (ofdev->num_resources < 3) {
+		dev_err(&ofdev->dev, "not enough APB/AHB resources\n");
+		return -EIO;
+	}
+
+	priv = devm_kzalloc(&ofdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(&ofdev->dev, "memory allocation failed\n");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(ofdev, priv);
+	priv->dev = &ofdev->dev;
+
+	/* find device register base address */
+	res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(&ofdev->dev, res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	/*
+	 * check that we're in Host Slot and that we can act as a Host Bridge
+	 * and not only as target/peripheral.
+	 */
+	cfg = REGLOAD(regs->cfg_stat);
+	if ((cfg & CFGSTAT_HOST) == 0) {
+		dev_err(&ofdev->dev, "not in host system slot\n");
+		return -EIO;
+	}
+
+	/* check that BAR1 support 256 MByte so that we can map kernel space */
+	REGSTORE(regs->page1, 0xffffffff);
+	size = ~REGLOAD(regs->page1) + 1;
+	if (size < 0x10000000) {
+		dev_err(&ofdev->dev, "BAR1 must be at least 256MByte\n");
+		return -EIO;
+	}
+
+	/* hardware must support little-endian PCI (byte-twisting) */
+	if ((REGLOAD(regs->page0) & PAGE0_BTEN) == 0) {
+		dev_err(&ofdev->dev, "byte-twisting is required\n");
+		return -EIO;
+	}
+
+	priv->regs = regs;
+	priv->irq = irq_of_parse_and_map(ofdev->dev.of_node, 0);
+	dev_info(&ofdev->dev, "host found at 0x%p, irq%d\n", regs, priv->irq);
+
+	/* Find PCI Memory, I/O and Configuration Space Windows */
+	priv->pci_area = ofdev->resource[1].start;
+	priv->pci_area_end = ofdev->resource[1].end+1;
+	priv->pci_io = ofdev->resource[2].start;
+	priv->pci_conf = ofdev->resource[2].start + 0x10000;
+	priv->pci_conf_end = priv->pci_conf + 0x10000;
+	priv->pci_io_va = (unsigned long)ioremap(priv->pci_io, 0x10000);
+	if (!priv->pci_io_va) {
+		dev_err(&ofdev->dev, "unable to map PCI I/O area\n");
+		return -EIO;
+	}
+
+	printk(KERN_INFO
+		"GRPCI1: MEMORY SPACE [0x%08lx - 0x%08lx]\n"
+		"        I/O    SPACE [0x%08lx - 0x%08lx]\n"
+		"        CONFIG SPACE [0x%08lx - 0x%08lx]\n",
+		priv->pci_area, priv->pci_area_end-1,
+		priv->pci_io, priv->pci_conf-1,
+		priv->pci_conf, priv->pci_conf_end-1);
+
+	/*
+	 * I/O Space resources in I/O Window mapped into Virtual Adr Space
+	 * We never use low 4KB because some devices seem have problems using
+	 * address 0.
+	 */
+	priv->info.io_space.name = "GRPCI1 PCI I/O Space";
+	priv->info.io_space.start = priv->pci_io_va + 0x1000;
+	priv->info.io_space.end = priv->pci_io_va + 0x10000 - 1;
+	priv->info.io_space.flags = IORESOURCE_IO;
+
+	/*
+	 * grpci1 has no prefetchable memory, map everything as
+	 * non-prefetchable memory
+	 */
+	priv->info.mem_space.name = "GRPCI1 PCI MEM Space";
+	priv->info.mem_space.start = priv->pci_area;
+	priv->info.mem_space.end = priv->pci_area_end - 1;
+	priv->info.mem_space.flags = IORESOURCE_MEM;
+
+	if (request_resource(&iomem_resource, &priv->info.mem_space) < 0) {
+		dev_err(&ofdev->dev, "unable to request PCI memory area\n");
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	if (request_resource(&ioport_resource, &priv->info.io_space) < 0) {
+		dev_err(&ofdev->dev, "unable to request PCI I/O area\n");
+		err = -ENOMEM;
+		goto err2;
+	}
+
+	/* setup maximum supported PCI buses */
+	priv->info.busn.name = "GRPCI1 busn";
+	priv->info.busn.start = 0;
+	priv->info.busn.end = 15;
+
+	grpci1priv = priv;
+
+	/* Initialize hardware */
+	grpci1_hw_init(priv);
+
+	/*
+	 * Get PCI Interrupt to System IRQ mapping and setup IRQ handling
+	 * Error IRQ. All PCI and PCI-Error interrupts are shared using the
+	 * same system IRQ.
+	 */
+	leon_update_virq_handling(priv->irq, grpci1_pci_flow_irq, "pcilvl", 0);
+
+	priv->irq_map[0] = grpci1_build_device_irq(1);
+	priv->irq_map[1] = grpci1_build_device_irq(2);
+	priv->irq_map[2] = grpci1_build_device_irq(3);
+	priv->irq_map[3] = grpci1_build_device_irq(4);
+	priv->irq_err = grpci1_build_device_irq(5);
+
+	printk(KERN_INFO "        PCI INTA..D#: IRQ%d, IRQ%d, IRQ%d, IRQ%d\n",
+		priv->irq_map[0], priv->irq_map[1], priv->irq_map[2],
+		priv->irq_map[3]);
+
+	/* Enable IRQs on LEON IRQ controller */
+	err = devm_request_irq(&ofdev->dev, priv->irq, grpci1_jump_interrupt, 0,
+				"GRPCI1_JUMP", priv);
+	if (err) {
+		dev_err(&ofdev->dev, "ERR IRQ request failed: %d\n", err);
+		goto err3;
+	}
+
+	/* Setup IRQ handler for access errors */
+	err = devm_request_irq(&ofdev->dev, priv->irq_err,
+				grpci1_err_interrupt, IRQF_SHARED, "GRPCI1_ERR",
+				priv);
+	if (err) {
+		dev_err(&ofdev->dev, "ERR VIRQ request failed: %d\n", err);
+		goto err3;
+	}
+
+	tmp = of_get_property(ofdev->dev.of_node, "all_pci_errors", &len);
+	if (tmp && (len == 4)) {
+		priv->pci_err_mask = ALL_PCI_ERRORS;
+		err_mask = IRQ_ALL_ERRORS << IRQ_MASK_BIT;
+	} else {
+		priv->pci_err_mask = DEF_PCI_ERRORS;
+		err_mask = IRQ_DEF_ERRORS << IRQ_MASK_BIT;
+	}
+
+	/*
+	 * Enable Error Interrupts. PCI interrupts are unmasked once request_irq
+	 * is called by the PCI Device drivers
+	 */
+	REGSTORE(regs->irq, err_mask);
+
+	/* Init common layer and scan buses */
+	priv->info.ops = &grpci1_ops;
+	priv->info.map_irq = grpci1_map_irq;
+	leon_pci_init(ofdev, &priv->info);
+
+	return 0;
+
+err3:
+	release_resource(&priv->info.io_space);
+err2:
+	release_resource(&priv->info.mem_space);
+err1:
+	iounmap((void __iomem *)priv->pci_io_va);
+	grpci1priv = NULL;
+	return err;
+}
+
+static const struct of_device_id grpci1_of_match[] __initconst = {
+	{
+	 .name = "GAISLER_PCIFBRG",
+	 },
+	{
+	 .name = "01_014",
+	 },
+	{},
+};
+
+static struct platform_driver grpci1_of_driver = {
+	.driver = {
+		.name = "grpci1",
+		.of_match_table = grpci1_of_match,
+	},
+	.probe = grpci1_of_probe,
+};
+
+static int __init grpci1_init(void)
+{
+	return platform_driver_register(&grpci1_of_driver);
+}
+
+subsys_initcall(grpci1_init);
diff --git a/arch/sparc/kernel/leon_pci_grpci2.c b/arch/sparc/kernel/leon_pci_grpci2.c
new file mode 100644
index 0000000..ca22f93
--- /dev/null
+++ b/arch/sparc/kernel/leon_pci_grpci2.c
@@ -0,0 +1,913 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * leon_pci_grpci2.c: GRPCI2 Host PCI driver
+ *
+ * Copyright (C) 2011 Aeroflex Gaisler AB, Daniel Hellstrom
+ *
+ */
+
+#include <linux/of_device.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <asm/io.h>
+#include <asm/leon.h>
+#include <asm/vaddrs.h>
+#include <asm/sections.h>
+#include <asm/leon_pci.h>
+
+#include "irq.h"
+
+struct grpci2_barcfg {
+	unsigned long pciadr;	/* PCI Space Address */
+	unsigned long ahbadr;	/* PCI Base address mapped to this AHB addr */
+};
+
+/* Device Node Configuration options:
+ *  - barcfgs    : Custom Configuration of Host's 6 target BARs
+ *  - irq_mask   : Limit which PCI interrupts are enabled
+ *  - do_reset   : Force PCI Reset on startup
+ *
+ * barcfgs
+ * =======
+ *
+ * Optional custom Target BAR configuration (see struct grpci2_barcfg). All
+ * addresses are physical. Array always contains 6 elements (len=2*4*6 bytes)
+ *
+ * -1 means not configured (let host driver do default setup).
+ *
+ * [i*2+0] = PCI Address of BAR[i] on target interface
+ * [i*2+1] = Accessing PCI address of BAR[i] result in this AMBA address
+ *
+ *
+ * irq_mask
+ * ========
+ *
+ * Limit which PCI interrupts are enabled. 0=Disable, 1=Enable. By default
+ * all are enabled. Use this when PCI interrupt pins are floating on PCB.
+ * int, len=4.
+ *  bit0 = PCI INTA#
+ *  bit1 = PCI INTB#
+ *  bit2 = PCI INTC#
+ *  bit3 = PCI INTD#
+ *
+ *
+ * reset
+ * =====
+ *
+ * Force PCI reset on startup. int, len=4
+ */
+
+/* Enable Debugging Configuration Space Access */
+#undef GRPCI2_DEBUG_CFGACCESS
+
+/*
+ * GRPCI2 APB Register MAP
+ */
+struct grpci2_regs {
+	unsigned int ctrl;		/* 0x00 Control */
+	unsigned int sts_cap;		/* 0x04 Status / Capabilities */
+	int res1;			/* 0x08 */
+	unsigned int io_map;		/* 0x0C I/O Map address */
+	unsigned int dma_ctrl;		/* 0x10 DMA */
+	unsigned int dma_bdbase;	/* 0x14 DMA */
+	int res2[2];			/* 0x18 */
+	unsigned int bars[6];		/* 0x20 read-only PCI BARs */
+	int res3[2];			/* 0x38 */
+	unsigned int ahbmst_map[16];	/* 0x40 AHB->PCI Map per AHB Master */
+
+	/* PCI Trace Buffer Registers (OPTIONAL) */
+	unsigned int t_ctrl;		/* 0x80 */
+	unsigned int t_cnt;		/* 0x84 */
+	unsigned int t_adpat;		/* 0x88 */
+	unsigned int t_admask;		/* 0x8C */
+	unsigned int t_sigpat;		/* 0x90 */
+	unsigned int t_sigmask;		/* 0x94 */
+	unsigned int t_adstate;		/* 0x98 */
+	unsigned int t_sigstate;	/* 0x9C */
+};
+
+#define REGLOAD(a)	(be32_to_cpu(__raw_readl(&(a))))
+#define REGSTORE(a, v)	(__raw_writel(cpu_to_be32(v), &(a)))
+
+#define CTRL_BUS_BIT 16
+
+#define CTRL_RESET (1<<31)
+#define CTRL_SI (1<<27)
+#define CTRL_PE (1<<26)
+#define CTRL_EI (1<<25)
+#define CTRL_ER (1<<24)
+#define CTRL_BUS (0xff<<CTRL_BUS_BIT)
+#define CTRL_HOSTINT 0xf
+
+#define STS_HOST_BIT	31
+#define STS_MST_BIT	30
+#define STS_TAR_BIT	29
+#define STS_DMA_BIT	28
+#define STS_DI_BIT	27
+#define STS_HI_BIT	26
+#define STS_IRQMODE_BIT	24
+#define STS_TRACE_BIT	23
+#define STS_CFGERRVALID_BIT 20
+#define STS_CFGERR_BIT	19
+#define STS_INTTYPE_BIT	12
+#define STS_INTSTS_BIT	8
+#define STS_FDEPTH_BIT	2
+#define STS_FNUM_BIT	0
+
+#define STS_HOST	(1<<STS_HOST_BIT)
+#define STS_MST		(1<<STS_MST_BIT)
+#define STS_TAR		(1<<STS_TAR_BIT)
+#define STS_DMA		(1<<STS_DMA_BIT)
+#define STS_DI		(1<<STS_DI_BIT)
+#define STS_HI		(1<<STS_HI_BIT)
+#define STS_IRQMODE	(0x3<<STS_IRQMODE_BIT)
+#define STS_TRACE	(1<<STS_TRACE_BIT)
+#define STS_CFGERRVALID	(1<<STS_CFGERRVALID_BIT)
+#define STS_CFGERR	(1<<STS_CFGERR_BIT)
+#define STS_INTTYPE	(0x3f<<STS_INTTYPE_BIT)
+#define STS_INTSTS	(0xf<<STS_INTSTS_BIT)
+#define STS_FDEPTH	(0x7<<STS_FDEPTH_BIT)
+#define STS_FNUM	(0x3<<STS_FNUM_BIT)
+
+#define STS_ISYSERR	(1<<17)
+#define STS_IDMA	(1<<16)
+#define STS_IDMAERR	(1<<15)
+#define STS_IMSTABRT	(1<<14)
+#define STS_ITGTABRT	(1<<13)
+#define STS_IPARERR	(1<<12)
+
+#define STS_ERR_IRQ (STS_ISYSERR | STS_IMSTABRT | STS_ITGTABRT | STS_IPARERR)
+
+struct grpci2_bd_chan {
+	unsigned int ctrl;	/* 0x00 DMA Control */
+	unsigned int nchan;	/* 0x04 Next DMA Channel Address */
+	unsigned int nbd;	/* 0x08 Next Data Descriptor in chan */
+	unsigned int res;	/* 0x0C Reserved */
+};
+
+#define BD_CHAN_EN		0x80000000
+#define BD_CHAN_TYPE		0x00300000
+#define BD_CHAN_BDCNT		0x0000ffff
+#define BD_CHAN_EN_BIT		31
+#define BD_CHAN_TYPE_BIT	20
+#define BD_CHAN_BDCNT_BIT	0
+
+struct grpci2_bd_data {
+	unsigned int ctrl;	/* 0x00 DMA Data Control */
+	unsigned int pci_adr;	/* 0x04 PCI Start Address */
+	unsigned int ahb_adr;	/* 0x08 AHB Start address */
+	unsigned int next;	/* 0x0C Next Data Descriptor in chan */
+};
+
+#define BD_DATA_EN		0x80000000
+#define BD_DATA_IE		0x40000000
+#define BD_DATA_DR		0x20000000
+#define BD_DATA_TYPE		0x00300000
+#define BD_DATA_ER		0x00080000
+#define BD_DATA_LEN		0x0000ffff
+#define BD_DATA_EN_BIT		31
+#define BD_DATA_IE_BIT		30
+#define BD_DATA_DR_BIT		29
+#define BD_DATA_TYPE_BIT	20
+#define BD_DATA_ER_BIT		19
+#define BD_DATA_LEN_BIT		0
+
+/* GRPCI2 Capability */
+struct grpci2_cap_first {
+	unsigned int ctrl;
+	unsigned int pci2ahb_map[6];
+	unsigned int ext2ahb_map;
+	unsigned int io_map;
+	unsigned int pcibar_size[6];
+};
+#define CAP9_CTRL_OFS 0
+#define CAP9_BAR_OFS 0x4
+#define CAP9_IOMAP_OFS 0x20
+#define CAP9_BARSIZE_OFS 0x24
+
+#define TGT 256
+
+struct grpci2_priv {
+	struct leon_pci_info	info; /* must be on top of this structure */
+	struct grpci2_regs __iomem *regs;
+	char			irq;
+	char			irq_mode; /* IRQ Mode from CAPSTS REG */
+	char			bt_enabled;
+	char			do_reset;
+	char			irq_mask;
+	u32			pciid; /* PCI ID of Host */
+	unsigned char		irq_map[4];
+
+	/* Virtual IRQ numbers */
+	unsigned int		virq_err;
+	unsigned int		virq_dma;
+
+	/* AHB PCI Windows */
+	unsigned long		pci_area;	/* MEMORY */
+	unsigned long		pci_area_end;
+	unsigned long		pci_io;		/* I/O */
+	unsigned long		pci_conf;	/* CONFIGURATION */
+	unsigned long		pci_conf_end;
+	unsigned long		pci_io_va;
+
+	struct grpci2_barcfg	tgtbars[6];
+};
+
+static DEFINE_SPINLOCK(grpci2_dev_lock);
+static struct grpci2_priv *grpci2priv;
+
+static int grpci2_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct grpci2_priv *priv = dev->bus->sysdata;
+	int irq_group;
+
+	/* Use default IRQ decoding on PCI BUS0 according slot numbering */
+	irq_group = slot & 0x3;
+	pin = ((pin - 1) + irq_group) & 0x3;
+
+	return priv->irq_map[pin];
+}
+
+static int grpci2_cfg_r32(struct grpci2_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 *val)
+{
+	unsigned int *pci_conf;
+	unsigned long flags;
+	u32 tmp;
+
+	if (where & 0x3)
+		return -EINVAL;
+
+	if (bus == 0) {
+		devfn += (0x8 * 6); /* start at AD16=Device0 */
+	} else if (bus == TGT) {
+		bus = 0;
+		devfn = 0; /* special case: bridge controller itself */
+	}
+
+	/* Select bus */
+	spin_lock_irqsave(&grpci2_dev_lock, flags);
+	REGSTORE(priv->regs->ctrl, (REGLOAD(priv->regs->ctrl) & ~(0xff << 16)) |
+				   (bus << 16));
+	spin_unlock_irqrestore(&grpci2_dev_lock, flags);
+
+	/* clear old status */
+	REGSTORE(priv->regs->sts_cap, (STS_CFGERR | STS_CFGERRVALID));
+
+	pci_conf = (unsigned int *) (priv->pci_conf |
+						(devfn << 8) | (where & 0xfc));
+	tmp = LEON3_BYPASS_LOAD_PA(pci_conf);
+
+	/* Wait until GRPCI2 signals that CFG access is done, it should be
+	 * done instantaneously unless a DMA operation is ongoing...
+	 */
+	while ((REGLOAD(priv->regs->sts_cap) & STS_CFGERRVALID) == 0)
+		;
+
+	if (REGLOAD(priv->regs->sts_cap) & STS_CFGERR) {
+		*val = 0xffffffff;
+	} else {
+		/* Bus always little endian (unaffected by byte-swapping) */
+		*val = swab32(tmp);
+	}
+
+	return 0;
+}
+
+static int grpci2_cfg_r16(struct grpci2_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 *val)
+{
+	u32 v;
+	int ret;
+
+	if (where & 0x1)
+		return -EINVAL;
+	ret = grpci2_cfg_r32(priv, bus, devfn, where & ~0x3, &v);
+	*val = 0xffff & (v >> (8 * (where & 0x3)));
+	return ret;
+}
+
+static int grpci2_cfg_r8(struct grpci2_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 *val)
+{
+	u32 v;
+	int ret;
+
+	ret = grpci2_cfg_r32(priv, bus, devfn, where & ~0x3, &v);
+	*val = 0xff & (v >> (8 * (where & 3)));
+
+	return ret;
+}
+
+static int grpci2_cfg_w32(struct grpci2_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 val)
+{
+	unsigned int *pci_conf;
+	unsigned long flags;
+
+	if (where & 0x3)
+		return -EINVAL;
+
+	if (bus == 0) {
+		devfn += (0x8 * 6); /* start at AD16=Device0 */
+	} else if (bus == TGT) {
+		bus = 0;
+		devfn = 0; /* special case: bridge controller itself */
+	}
+
+	/* Select bus */
+	spin_lock_irqsave(&grpci2_dev_lock, flags);
+	REGSTORE(priv->regs->ctrl, (REGLOAD(priv->regs->ctrl) & ~(0xff << 16)) |
+				   (bus << 16));
+	spin_unlock_irqrestore(&grpci2_dev_lock, flags);
+
+	/* clear old status */
+	REGSTORE(priv->regs->sts_cap, (STS_CFGERR | STS_CFGERRVALID));
+
+	pci_conf = (unsigned int *) (priv->pci_conf |
+						(devfn << 8) | (where & 0xfc));
+	LEON3_BYPASS_STORE_PA(pci_conf, swab32(val));
+
+	/* Wait until GRPCI2 signals that CFG access is done, it should be
+	 * done instantaneously unless a DMA operation is ongoing...
+	 */
+	while ((REGLOAD(priv->regs->sts_cap) & STS_CFGERRVALID) == 0)
+		;
+
+	return 0;
+}
+
+static int grpci2_cfg_w16(struct grpci2_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 val)
+{
+	int ret;
+	u32 v;
+
+	if (where & 0x1)
+		return -EINVAL;
+	ret = grpci2_cfg_r32(priv, bus, devfn, where&~3, &v);
+	if (ret)
+		return ret;
+	v = (v & ~(0xffff << (8 * (where & 0x3)))) |
+	    ((0xffff & val) << (8 * (where & 0x3)));
+	return grpci2_cfg_w32(priv, bus, devfn, where & ~0x3, v);
+}
+
+static int grpci2_cfg_w8(struct grpci2_priv *priv, unsigned int bus,
+				unsigned int devfn, int where, u32 val)
+{
+	int ret;
+	u32 v;
+
+	ret = grpci2_cfg_r32(priv, bus, devfn, where & ~0x3, &v);
+	if (ret != 0)
+		return ret;
+	v = (v & ~(0xff << (8 * (where & 0x3)))) |
+	    ((0xff & val) << (8 * (where & 0x3)));
+	return grpci2_cfg_w32(priv, bus, devfn, where & ~0x3, v);
+}
+
+/* Read from Configuration Space. When entering here the PCI layer has taken
+ * the pci_lock spinlock and IRQ is off.
+ */
+static int grpci2_read_config(struct pci_bus *bus, unsigned int devfn,
+			      int where, int size, u32 *val)
+{
+	struct grpci2_priv *priv = grpci2priv;
+	unsigned int busno = bus->number;
+	int ret;
+
+	if (PCI_SLOT(devfn) > 15 || busno > 255) {
+		*val = ~0;
+		return 0;
+	}
+
+	switch (size) {
+	case 1:
+		ret = grpci2_cfg_r8(priv, busno, devfn, where, val);
+		break;
+	case 2:
+		ret = grpci2_cfg_r16(priv, busno, devfn, where, val);
+		break;
+	case 4:
+		ret = grpci2_cfg_r32(priv, busno, devfn, where, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+#ifdef GRPCI2_DEBUG_CFGACCESS
+	printk(KERN_INFO "grpci2_read_config: [%02x:%02x:%x] ofs=%d val=%x "
+		"size=%d\n", busno, PCI_SLOT(devfn), PCI_FUNC(devfn), where,
+		*val, size);
+#endif
+
+	return ret;
+}
+
+/* Write to Configuration Space. When entering here the PCI layer has taken
+ * the pci_lock spinlock and IRQ is off.
+ */
+static int grpci2_write_config(struct pci_bus *bus, unsigned int devfn,
+			       int where, int size, u32 val)
+{
+	struct grpci2_priv *priv = grpci2priv;
+	unsigned int busno = bus->number;
+
+	if (PCI_SLOT(devfn) > 15 || busno > 255)
+		return 0;
+
+#ifdef GRPCI2_DEBUG_CFGACCESS
+	printk(KERN_INFO "grpci2_write_config: [%02x:%02x:%x] ofs=%d size=%d "
+		"val=%x\n", busno, PCI_SLOT(devfn), PCI_FUNC(devfn),
+		where, size, val);
+#endif
+
+	switch (size) {
+	default:
+		return -EINVAL;
+	case 1:
+		return grpci2_cfg_w8(priv, busno, devfn, where, val);
+	case 2:
+		return grpci2_cfg_w16(priv, busno, devfn, where, val);
+	case 4:
+		return grpci2_cfg_w32(priv, busno, devfn, where, val);
+	}
+}
+
+static struct pci_ops grpci2_ops = {
+	.read =		grpci2_read_config,
+	.write =	grpci2_write_config,
+};
+
+/* GENIRQ IRQ chip implementation for GRPCI2 irqmode=0..2. In configuration
+ * 3 where all PCI Interrupts has a separate IRQ on the system IRQ controller
+ * this is not needed and the standard IRQ controller can be used.
+ */
+
+static void grpci2_mask_irq(struct irq_data *data)
+{
+	unsigned long flags;
+	unsigned int irqidx;
+	struct grpci2_priv *priv = grpci2priv;
+
+	irqidx = (unsigned int)data->chip_data - 1;
+	if (irqidx > 3) /* only mask PCI interrupts here */
+		return;
+
+	spin_lock_irqsave(&grpci2_dev_lock, flags);
+	REGSTORE(priv->regs->ctrl, REGLOAD(priv->regs->ctrl) & ~(1 << irqidx));
+	spin_unlock_irqrestore(&grpci2_dev_lock, flags);
+}
+
+static void grpci2_unmask_irq(struct irq_data *data)
+{
+	unsigned long flags;
+	unsigned int irqidx;
+	struct grpci2_priv *priv = grpci2priv;
+
+	irqidx = (unsigned int)data->chip_data - 1;
+	if (irqidx > 3) /* only unmask PCI interrupts here */
+		return;
+
+	spin_lock_irqsave(&grpci2_dev_lock, flags);
+	REGSTORE(priv->regs->ctrl, REGLOAD(priv->regs->ctrl) | (1 << irqidx));
+	spin_unlock_irqrestore(&grpci2_dev_lock, flags);
+}
+
+static unsigned int grpci2_startup_irq(struct irq_data *data)
+{
+	grpci2_unmask_irq(data);
+	return 0;
+}
+
+static void grpci2_shutdown_irq(struct irq_data *data)
+{
+	grpci2_mask_irq(data);
+}
+
+static struct irq_chip grpci2_irq = {
+	.name		= "grpci2",
+	.irq_startup	= grpci2_startup_irq,
+	.irq_shutdown	= grpci2_shutdown_irq,
+	.irq_mask	= grpci2_mask_irq,
+	.irq_unmask	= grpci2_unmask_irq,
+};
+
+/* Handle one or multiple IRQs from the PCI core */
+static void grpci2_pci_flow_irq(struct irq_desc *desc)
+{
+	struct grpci2_priv *priv = grpci2priv;
+	int i, ack = 0;
+	unsigned int ctrl, sts_cap, pci_ints;
+
+	ctrl = REGLOAD(priv->regs->ctrl);
+	sts_cap = REGLOAD(priv->regs->sts_cap);
+
+	/* Error Interrupt? */
+	if (sts_cap & STS_ERR_IRQ) {
+		generic_handle_irq(priv->virq_err);
+		ack = 1;
+	}
+
+	/* PCI Interrupt? */
+	pci_ints = ((~sts_cap) >> STS_INTSTS_BIT) & ctrl & CTRL_HOSTINT;
+	if (pci_ints) {
+		/* Call respective PCI Interrupt handler */
+		for (i = 0; i < 4; i++) {
+			if (pci_ints & (1 << i))
+				generic_handle_irq(priv->irq_map[i]);
+		}
+		ack = 1;
+	}
+
+	/*
+	 * Decode DMA Interrupt only when shared with Err and PCI INTX#, when
+	 * the DMA is a unique IRQ the DMA interrupts doesn't end up here, they
+	 * goes directly to DMA ISR.
+	 */
+	if ((priv->irq_mode == 0) && (sts_cap & (STS_IDMA | STS_IDMAERR))) {
+		generic_handle_irq(priv->virq_dma);
+		ack = 1;
+	}
+
+	/*
+	 * Call "first level" IRQ chip end-of-irq handler. It will ACK LEON IRQ
+	 * Controller, this must be done after IRQ sources have been handled to
+	 * avoid double IRQ generation
+	 */
+	if (ack)
+		desc->irq_data.chip->irq_eoi(&desc->irq_data);
+}
+
+/* Create a virtual IRQ */
+static unsigned int grpci2_build_device_irq(unsigned int irq)
+{
+	unsigned int virq = 0, pil;
+
+	pil = 1 << 8;
+	virq = irq_alloc(irq, pil);
+	if (virq == 0)
+		goto out;
+
+	irq_set_chip_and_handler_name(virq, &grpci2_irq, handle_simple_irq,
+				      "pcilvl");
+	irq_set_chip_data(virq, (void *)irq);
+
+out:
+	return virq;
+}
+
+static void grpci2_hw_init(struct grpci2_priv *priv)
+{
+	u32 ahbadr, pciadr, bar_sz, capptr, io_map, data;
+	struct grpci2_regs __iomem *regs = priv->regs;
+	int i;
+	struct grpci2_barcfg *barcfg = priv->tgtbars;
+
+	/* Reset any earlier setup */
+	if (priv->do_reset) {
+		printk(KERN_INFO "GRPCI2: Resetting PCI bus\n");
+		REGSTORE(regs->ctrl, CTRL_RESET);
+		ssleep(1); /* Wait for boards to settle */
+	}
+	REGSTORE(regs->ctrl, 0);
+	REGSTORE(regs->sts_cap, ~0); /* Clear Status */
+	REGSTORE(regs->dma_ctrl, 0);
+	REGSTORE(regs->dma_bdbase, 0);
+
+	/* Translate I/O accesses to 0, I/O Space always @ PCI low 64Kbytes */
+	REGSTORE(regs->io_map, REGLOAD(regs->io_map) & 0x0000ffff);
+
+	/* set 1:1 mapping between AHB -> PCI memory space, for all Masters
+	 * Each AHB master has it's own mapping registers. Max 16 AHB masters.
+	 */
+	for (i = 0; i < 16; i++)
+		REGSTORE(regs->ahbmst_map[i], priv->pci_area);
+
+	/* Get the GRPCI2 Host PCI ID */
+	grpci2_cfg_r32(priv, TGT, 0, PCI_VENDOR_ID, &priv->pciid);
+
+	/* Get address to first (always defined) capability structure */
+	grpci2_cfg_r8(priv, TGT, 0, PCI_CAPABILITY_LIST, &capptr);
+
+	/* Enable/Disable Byte twisting */
+	grpci2_cfg_r32(priv, TGT, 0, capptr+CAP9_IOMAP_OFS, &io_map);
+	io_map = (io_map & ~0x1) | (priv->bt_enabled ? 1 : 0);
+	grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_IOMAP_OFS, io_map);
+
+	/* Setup the Host's PCI Target BARs for other peripherals to access,
+	 * and do DMA to the host's memory. The target BARs can be sized and
+	 * enabled individually.
+	 *
+	 * User may set custom target BARs, but default is:
+	 * The first BARs is used to map kernel low (DMA is part of normal
+	 * region on sparc which is SRMMU_MAXMEM big) main memory 1:1 to the
+	 * PCI bus, the other BARs are disabled. We assume that the first BAR
+	 * is always available.
+	 */
+	for (i = 0; i < 6; i++) {
+		if (barcfg[i].pciadr != ~0 && barcfg[i].ahbadr != ~0) {
+			/* Target BARs must have the proper alignment */
+			ahbadr = barcfg[i].ahbadr;
+			pciadr = barcfg[i].pciadr;
+			bar_sz = ((pciadr - 1) & ~pciadr) + 1;
+		} else {
+			if (i == 0) {
+				/* Map main memory */
+				bar_sz = 0xf0000008; /* 256MB prefetchable */
+				ahbadr = 0xf0000000 & (u32)__pa(PAGE_ALIGN(
+					(unsigned long) &_end));
+				pciadr = ahbadr;
+			} else {
+				bar_sz = 0;
+				ahbadr = 0;
+				pciadr = 0;
+			}
+		}
+		grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_BARSIZE_OFS+i*4,
+				bar_sz);
+		grpci2_cfg_w32(priv, TGT, 0, PCI_BASE_ADDRESS_0+i*4, pciadr);
+		grpci2_cfg_w32(priv, TGT, 0, capptr+CAP9_BAR_OFS+i*4, ahbadr);
+		printk(KERN_INFO "        TGT BAR[%d]: 0x%08x (PCI)-> 0x%08x\n",
+			i, pciadr, ahbadr);
+	}
+
+	/* set as bus master and enable pci memory responses */
+	grpci2_cfg_r32(priv, TGT, 0, PCI_COMMAND, &data);
+	data |= (PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+	grpci2_cfg_w32(priv, TGT, 0, PCI_COMMAND, data);
+
+	/* Enable Error respone (CPU-TRAP) on illegal memory access. */
+	REGSTORE(regs->ctrl, CTRL_ER | CTRL_PE);
+}
+
+static irqreturn_t grpci2_jump_interrupt(int irq, void *arg)
+{
+	printk(KERN_ERR "GRPCI2: Jump IRQ happened\n");
+	return IRQ_NONE;
+}
+
+/* Handle GRPCI2 Error Interrupt */
+static irqreturn_t grpci2_err_interrupt(int irq, void *arg)
+{
+	struct grpci2_priv *priv = arg;
+	struct grpci2_regs __iomem *regs = priv->regs;
+	unsigned int status;
+
+	status = REGLOAD(regs->sts_cap);
+	if ((status & STS_ERR_IRQ) == 0)
+		return IRQ_NONE;
+
+	if (status & STS_IPARERR)
+		printk(KERN_ERR "GRPCI2: Parity Error\n");
+
+	if (status & STS_ITGTABRT)
+		printk(KERN_ERR "GRPCI2: Target Abort\n");
+
+	if (status & STS_IMSTABRT)
+		printk(KERN_ERR "GRPCI2: Master Abort\n");
+
+	if (status & STS_ISYSERR)
+		printk(KERN_ERR "GRPCI2: System Error\n");
+
+	/* Clear handled INT TYPE IRQs */
+	REGSTORE(regs->sts_cap, status & STS_ERR_IRQ);
+
+	return IRQ_HANDLED;
+}
+
+static int grpci2_of_probe(struct platform_device *ofdev)
+{
+	struct grpci2_regs __iomem *regs;
+	struct grpci2_priv *priv;
+	int err, i, len;
+	const int *tmp;
+	unsigned int capability;
+
+	if (grpci2priv) {
+		printk(KERN_ERR "GRPCI2: only one GRPCI2 core supported\n");
+		return -ENODEV;
+	}
+
+	if (ofdev->num_resources < 3) {
+		printk(KERN_ERR "GRPCI2: not enough APB/AHB resources\n");
+		return -EIO;
+	}
+
+	/* Find Device Address */
+	regs = of_ioremap(&ofdev->resource[0], 0,
+			  resource_size(&ofdev->resource[0]),
+			  "grlib-grpci2 regs");
+	if (regs == NULL) {
+		printk(KERN_ERR "GRPCI2: ioremap failed\n");
+		return -EIO;
+	}
+
+	/*
+	 * Check that we're in Host Slot and that we can act as a Host Bridge
+	 * and not only as target.
+	 */
+	capability = REGLOAD(regs->sts_cap);
+	if ((capability & STS_HOST) || !(capability & STS_MST)) {
+		printk(KERN_INFO "GRPCI2: not in host system slot\n");
+		err = -EIO;
+		goto err1;
+	}
+
+	priv = grpci2priv = kzalloc(sizeof(struct grpci2_priv), GFP_KERNEL);
+	if (grpci2priv == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+	priv->regs = regs;
+	priv->irq = ofdev->archdata.irqs[0]; /* BASE IRQ */
+	priv->irq_mode = (capability & STS_IRQMODE) >> STS_IRQMODE_BIT;
+
+	printk(KERN_INFO "GRPCI2: host found at %p, irq%d\n", regs, priv->irq);
+
+	/* Byte twisting should be made configurable from kernel command line */
+	priv->bt_enabled = 1;
+
+	/* Let user do custom Target BAR assignment */
+	tmp = of_get_property(ofdev->dev.of_node, "barcfg", &len);
+	if (tmp && (len == 2*4*6))
+		memcpy(priv->tgtbars, tmp, 2*4*6);
+	else
+		memset(priv->tgtbars, -1, 2*4*6);
+
+	/* Limit IRQ unmasking in irq_mode 2 and 3 */
+	tmp = of_get_property(ofdev->dev.of_node, "irq_mask", &len);
+	if (tmp && (len == 4))
+		priv->do_reset = *tmp;
+	else
+		priv->irq_mask = 0xf;
+
+	/* Optional PCI reset. Force PCI reset on startup */
+	tmp = of_get_property(ofdev->dev.of_node, "reset", &len);
+	if (tmp && (len == 4))
+		priv->do_reset = *tmp;
+	else
+		priv->do_reset = 0;
+
+	/* Find PCI Memory, I/O and Configuration Space Windows */
+	priv->pci_area = ofdev->resource[1].start;
+	priv->pci_area_end = ofdev->resource[1].end+1;
+	priv->pci_io = ofdev->resource[2].start;
+	priv->pci_conf = ofdev->resource[2].start + 0x10000;
+	priv->pci_conf_end = priv->pci_conf + 0x10000;
+	priv->pci_io_va = (unsigned long)ioremap(priv->pci_io, 0x10000);
+	if (!priv->pci_io_va) {
+		err = -EIO;
+		goto err2;
+	}
+
+	printk(KERN_INFO
+		"GRPCI2: MEMORY SPACE [0x%08lx - 0x%08lx]\n"
+		"        I/O    SPACE [0x%08lx - 0x%08lx]\n"
+		"        CONFIG SPACE [0x%08lx - 0x%08lx]\n",
+		priv->pci_area, priv->pci_area_end-1,
+		priv->pci_io, priv->pci_conf-1,
+		priv->pci_conf, priv->pci_conf_end-1);
+
+	/*
+	 * I/O Space resources in I/O Window mapped into Virtual Adr Space
+	 * We never use low 4KB because some devices seem have problems using
+	 * address 0.
+	 */
+	memset(&priv->info.io_space, 0, sizeof(struct resource));
+	priv->info.io_space.name = "GRPCI2 PCI I/O Space";
+	priv->info.io_space.start = priv->pci_io_va + 0x1000;
+	priv->info.io_space.end = priv->pci_io_va + 0x10000 - 1;
+	priv->info.io_space.flags = IORESOURCE_IO;
+
+	/*
+	 * GRPCI2 has no prefetchable memory, map everything as
+	 * non-prefetchable memory
+	 */
+	memset(&priv->info.mem_space, 0, sizeof(struct resource));
+	priv->info.mem_space.name = "GRPCI2 PCI MEM Space";
+	priv->info.mem_space.start = priv->pci_area;
+	priv->info.mem_space.end = priv->pci_area_end - 1;
+	priv->info.mem_space.flags = IORESOURCE_MEM;
+
+	if (request_resource(&iomem_resource, &priv->info.mem_space) < 0)
+		goto err3;
+	if (request_resource(&ioport_resource, &priv->info.io_space) < 0)
+		goto err4;
+
+	/* setup maximum supported PCI buses */
+	priv->info.busn.name = "GRPCI2 busn";
+	priv->info.busn.start = 0;
+	priv->info.busn.end = 255;
+
+	grpci2_hw_init(priv);
+
+	/*
+	 * Get PCI Interrupt to System IRQ mapping and setup IRQ handling
+	 * Error IRQ always on PCI INTA.
+	 */
+	if (priv->irq_mode < 2) {
+		/* All PCI interrupts are shared using the same system IRQ */
+		leon_update_virq_handling(priv->irq, grpci2_pci_flow_irq,
+					 "pcilvl", 0);
+
+		priv->irq_map[0] = grpci2_build_device_irq(1);
+		priv->irq_map[1] = grpci2_build_device_irq(2);
+		priv->irq_map[2] = grpci2_build_device_irq(3);
+		priv->irq_map[3] = grpci2_build_device_irq(4);
+
+		priv->virq_err = grpci2_build_device_irq(5);
+		if (priv->irq_mode & 1)
+			priv->virq_dma = ofdev->archdata.irqs[1];
+		else
+			priv->virq_dma = grpci2_build_device_irq(6);
+
+		/* Enable IRQs on LEON IRQ controller */
+		err = request_irq(priv->irq, grpci2_jump_interrupt, 0,
+					"GRPCI2_JUMP", priv);
+		if (err)
+			printk(KERN_ERR "GRPCI2: ERR IRQ request failed\n");
+	} else {
+		/* All PCI interrupts have an unique IRQ interrupt */
+		for (i = 0; i < 4; i++) {
+			/* Make LEON IRQ layer handle level IRQ by acking */
+			leon_update_virq_handling(ofdev->archdata.irqs[i],
+						 handle_fasteoi_irq, "pcilvl",
+						 1);
+			priv->irq_map[i] = ofdev->archdata.irqs[i];
+		}
+		priv->virq_err = priv->irq_map[0];
+		if (priv->irq_mode & 1)
+			priv->virq_dma = ofdev->archdata.irqs[4];
+		else
+			priv->virq_dma = priv->irq_map[0];
+
+		/* Unmask all PCI interrupts, request_irq will not do that */
+		REGSTORE(regs->ctrl, REGLOAD(regs->ctrl)|(priv->irq_mask&0xf));
+	}
+
+	/* Setup IRQ handler for non-configuration space access errors */
+	err = request_irq(priv->virq_err, grpci2_err_interrupt, IRQF_SHARED,
+				"GRPCI2_ERR", priv);
+	if (err) {
+		printk(KERN_DEBUG "GRPCI2: ERR VIRQ request failed: %d\n", err);
+		goto err5;
+	}
+
+	/*
+	 * Enable Error Interrupts. PCI interrupts are unmasked once request_irq
+	 * is called by the PCI Device drivers
+	 */
+	REGSTORE(regs->ctrl, REGLOAD(regs->ctrl) | CTRL_EI | CTRL_SI);
+
+	/* Init common layer and scan buses */
+	priv->info.ops = &grpci2_ops;
+	priv->info.map_irq = grpci2_map_irq;
+	leon_pci_init(ofdev, &priv->info);
+
+	return 0;
+
+err5:
+	release_resource(&priv->info.io_space);
+err4:
+	release_resource(&priv->info.mem_space);
+err3:
+	err = -ENOMEM;
+	iounmap((void __iomem *)priv->pci_io_va);
+err2:
+	kfree(priv);
+err1:
+	of_iounmap(&ofdev->resource[0], regs,
+		resource_size(&ofdev->resource[0]));
+	return err;
+}
+
+static const struct of_device_id grpci2_of_match[] __initconst = {
+	{
+	 .name = "GAISLER_GRPCI2",
+	 },
+	{
+	 .name = "01_07c",
+	 },
+	{},
+};
+
+static struct platform_driver grpci2_of_driver = {
+	.driver = {
+		.name = "grpci2",
+		.of_match_table = grpci2_of_match,
+	},
+	.probe = grpci2_of_probe,
+};
+
+static int __init grpci2_init(void)
+{
+	return platform_driver_register(&grpci2_of_driver);
+}
+
+subsys_initcall(grpci2_init);
diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c
new file mode 100644
index 0000000..065e2d4
--- /dev/null
+++ b/arch/sparc/kernel/leon_pmc.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* leon_pmc.c: LEON Power-down cpu_idle() handler
+ *
+ * Copyright (C) 2011 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ */
+
+#include <linux/init.h>
+#include <linux/pm.h>
+
+#include <asm/leon_amba.h>
+#include <asm/cpu_type.h>
+#include <asm/leon.h>
+#include <asm/processor.h>
+
+/* List of Systems that need fixup instructions around power-down instruction */
+static unsigned int pmc_leon_fixup_ids[] = {
+	AEROFLEX_UT699,
+	GAISLER_GR712RC,
+	LEON4_NEXTREME1,
+	0
+};
+
+static int pmc_leon_need_fixup(void)
+{
+	unsigned int systemid = amba_system_id >> 16;
+	unsigned int *id;
+
+	id = &pmc_leon_fixup_ids[0];
+	while (*id != 0) {
+		if (*id == systemid)
+			return 1;
+		id++;
+	}
+
+	return 0;
+}
+
+/*
+ * CPU idle callback function for systems that need some extra handling
+ * See .../arch/sparc/kernel/process.c
+ */
+static void pmc_leon_idle_fixup(void)
+{
+	/* Prepare an address to a non-cachable region. APB is always
+	 * none-cachable. One instruction is executed after the Sleep
+	 * instruction, we make sure to read the bus and throw away the
+	 * value by accessing a non-cachable area, also we make sure the
+	 * MMU does not get a TLB miss here by using the MMU BYPASS ASI.
+	 */
+	register unsigned int address = (unsigned int)leon3_irqctrl_regs;
+
+	/* Interrupts need to be enabled to not hang the CPU */
+	local_irq_enable();
+
+	__asm__ __volatile__ (
+		"wr	%%g0, %%asr19\n"
+		"lda	[%0] %1, %%g0\n"
+		:
+		: "r"(address), "i"(ASI_LEON_BYPASS));
+}
+
+/*
+ * CPU idle callback function
+ * See .../arch/sparc/kernel/process.c
+ */
+static void pmc_leon_idle(void)
+{
+	/* Interrupts need to be enabled to not hang the CPU */
+	local_irq_enable();
+
+	/* For systems without power-down, this will be no-op */
+	__asm__ __volatile__ ("wr	%g0, %asr19\n\t");
+}
+
+/* Install LEON Power Down function */
+static int __init leon_pmc_install(void)
+{
+	if (sparc_cpu_model == sparc_leon) {
+		/* Assign power management IDLE handler */
+		if (pmc_leon_need_fixup())
+			sparc_idle = pmc_leon_idle_fixup;
+		else
+			sparc_idle = pmc_leon_idle;
+
+		printk(KERN_INFO "leon: power management initialized\n");
+	}
+
+	return 0;
+}
+
+/* This driver is not critical to the boot process, don't care
+ * if initialized late.
+ */
+late_initcall(leon_pmc_install);
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
new file mode 100644
index 0000000..da6f148
--- /dev/null
+++ b/arch/sparc/kernel/leon_smp.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: GPL-2.0
+/* leon_smp.c: Sparc-Leon SMP support.
+ *
+ * based on sun4m_smp.c
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+ */
+
+#include <asm/head.h>
+
+#include <linux/kernel.h>
+#include <linux/sched/mm.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/of.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/profile.h>
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/cpu.h>
+#include <linux/clockchips.h>
+
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/irq_regs.h>
+#include <asm/traps.h>
+
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/oplib.h>
+#include <asm/cpudata.h>
+#include <asm/asi.h>
+#include <asm/leon.h>
+#include <asm/leon_amba.h>
+#include <asm/timer.h>
+
+#include "kernel.h"
+
+#include "irq.h"
+
+extern ctxd_t *srmmu_ctx_table_phys;
+static int smp_processors_ready;
+extern volatile unsigned long cpu_callin_map[NR_CPUS];
+extern cpumask_t smp_commenced_mask;
+void leon_configure_cache_smp(void);
+static void leon_ipi_init(void);
+
+/* IRQ number of LEON IPIs */
+int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT;
+
+static inline unsigned long do_swap(volatile unsigned long *ptr,
+				    unsigned long val)
+{
+	__asm__ __volatile__("swapa [%2] %3, %0\n\t" : "=&r"(val)
+			     : "0"(val), "r"(ptr), "i"(ASI_LEON_DCACHE_MISS)
+			     : "memory");
+	return val;
+}
+
+void leon_cpu_pre_starting(void *arg)
+{
+	leon_configure_cache_smp();
+}
+
+void leon_cpu_pre_online(void *arg)
+{
+	int cpuid = hard_smp_processor_id();
+
+	/* Allow master to continue. The master will then give us the
+	 * go-ahead by setting the smp_commenced_mask and will wait without
+	 * timeouts until our setup is completed fully (signified by
+	 * our bit being set in the cpu_online_mask).
+	 */
+	do_swap(&cpu_callin_map[cpuid], 1);
+
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	/* Fix idle thread fields. */
+	__asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(&current_set[cpuid])
+			     : "memory" /* paranoid */);
+
+	/* Attach to the address space of init_task. */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
+		mb();
+}
+
+/*
+ *	Cycle through the processors asking the PROM to start each one.
+ */
+
+extern struct linux_prom_registers smp_penguin_ctable;
+
+void leon_configure_cache_smp(void)
+{
+	unsigned long cfg = sparc_leon3_get_dcachecfg();
+	int me = smp_processor_id();
+
+	if (ASI_LEON3_SYSCTRL_CFG_SSIZE(cfg) > 4) {
+		printk(KERN_INFO "Note: SMP with snooping only works on 4k cache, found %dk(0x%x) on cpu %d, disabling caches\n",
+		     (unsigned int)ASI_LEON3_SYSCTRL_CFG_SSIZE(cfg),
+		     (unsigned int)cfg, (unsigned int)me);
+		sparc_leon3_disable_cache();
+	} else {
+		if (cfg & ASI_LEON3_SYSCTRL_CFG_SNOOPING) {
+			sparc_leon3_enable_snooping();
+		} else {
+			printk(KERN_INFO "Note: You have to enable snooping in the vhdl model cpu %d, disabling caches\n",
+			     me);
+			sparc_leon3_disable_cache();
+		}
+	}
+
+	local_ops->cache_all();
+	local_ops->tlb_all();
+}
+
+static void leon_smp_setbroadcast(unsigned int mask)
+{
+	int broadcast =
+	    ((LEON3_BYPASS_LOAD_PA(&(leon3_irqctrl_regs->mpstatus)) >>
+	      LEON3_IRQMPSTATUS_BROADCAST) & 1);
+	if (!broadcast) {
+		prom_printf("######## !!!! The irqmp-ctrl must have broadcast enabled, smp wont work !!!!! ####### nr cpus: %d\n",
+		     leon_smp_nrcpus());
+		if (leon_smp_nrcpus() > 1) {
+			BUG();
+		} else {
+			prom_printf("continue anyway\n");
+			return;
+		}
+	}
+	LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpbroadcast), mask);
+}
+
+int leon_smp_nrcpus(void)
+{
+	int nrcpu =
+	    ((LEON3_BYPASS_LOAD_PA(&(leon3_irqctrl_regs->mpstatus)) >>
+	      LEON3_IRQMPSTATUS_CPUNR) & 0xf) + 1;
+	return nrcpu;
+}
+
+void __init leon_boot_cpus(void)
+{
+	int nrcpu = leon_smp_nrcpus();
+	int me = smp_processor_id();
+
+	/* Setup IPI */
+	leon_ipi_init();
+
+	printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me,
+	       (unsigned int)nrcpu, (unsigned int)NR_CPUS,
+	       (unsigned int)&(leon3_irqctrl_regs->mpstatus));
+
+	leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me);
+	leon_enable_irq_cpu(LEON3_IRQ_TICKER, me);
+	leon_enable_irq_cpu(leon_ipi_irq, me);
+
+	leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER);
+
+	leon_configure_cache_smp();
+	local_ops->cache_all();
+
+}
+
+int leon_boot_one_cpu(int i, struct task_struct *idle)
+{
+	int timeout;
+
+	current_set[i] = task_thread_info(idle);
+
+	/* See trampoline.S:leon_smp_cpu_startup for details...
+	 * Initialize the contexts table
+	 * Since the call to prom_startcpu() trashes the structure,
+	 * we need to re-initialize it for each cpu
+	 */
+	smp_penguin_ctable.which_io = 0;
+	smp_penguin_ctable.phys_addr = (unsigned int)srmmu_ctx_table_phys;
+	smp_penguin_ctable.reg_size = 0;
+
+	/* whirrr, whirrr, whirrrrrrrrr... */
+	printk(KERN_INFO "Starting CPU %d : (irqmp: 0x%x)\n", (unsigned int)i,
+	       (unsigned int)&leon3_irqctrl_regs->mpstatus);
+	local_ops->cache_all();
+
+	/* Make sure all IRQs are of from the start for this new CPU */
+	LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0);
+
+	/* Wake one CPU */
+	LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i);
+
+	/* wheee... it's going... */
+	for (timeout = 0; timeout < 10000; timeout++) {
+		if (cpu_callin_map[i])
+			break;
+		udelay(200);
+	}
+	printk(KERN_INFO "Started CPU %d\n", (unsigned int)i);
+
+	if (!(cpu_callin_map[i])) {
+		printk(KERN_ERR "Processor %d is stuck.\n", i);
+		return -ENODEV;
+	} else {
+		leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i);
+		leon_enable_irq_cpu(LEON3_IRQ_TICKER, i);
+		leon_enable_irq_cpu(leon_ipi_irq, i);
+	}
+
+	local_ops->cache_all();
+	return 0;
+}
+
+void __init leon_smp_done(void)
+{
+
+	int i, first;
+	int *prev;
+
+	/* setup cpu list for irq rotation */
+	first = 0;
+	prev = &first;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_online(i)) {
+			*prev = i;
+			prev = &cpu_data(i).next;
+		}
+	}
+	*prev = first;
+	local_ops->cache_all();
+
+	/* Free unneeded trap tables */
+	if (!cpu_present(1)) {
+		free_reserved_page(virt_to_page(&trapbase_cpu1));
+	}
+	if (!cpu_present(2)) {
+		free_reserved_page(virt_to_page(&trapbase_cpu2));
+	}
+	if (!cpu_present(3)) {
+		free_reserved_page(virt_to_page(&trapbase_cpu3));
+	}
+	/* Ok, they are spinning and ready to go. */
+	smp_processors_ready = 1;
+
+}
+
+struct leon_ipi_work {
+	int single;
+	int msk;
+	int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work);
+
+/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ
+ * is used for all three types of IPIs.
+ */
+static void __init leon_ipi_init(void)
+{
+	int cpu, len;
+	struct leon_ipi_work *work;
+	struct property *pp;
+	struct device_node *rootnp;
+	struct tt_entry *trap_table;
+	unsigned long flags;
+
+	/* Find IPI IRQ or stick with default value */
+	rootnp = of_find_node_by_path("/ambapp0");
+	if (rootnp) {
+		pp = of_find_property(rootnp, "ipi_num", &len);
+		if (pp && (*(int *)pp->value))
+			leon_ipi_irq = *(int *)pp->value;
+	}
+	printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq);
+
+	/* Adjust so that we jump directly to smpleon_ipi */
+	local_irq_save(flags);
+	trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)];
+	trap_table->inst_three += smpleon_ipi - real_irq_entry;
+	local_ops->cache_all();
+	local_irq_restore(flags);
+
+	for_each_possible_cpu(cpu) {
+		work = &per_cpu(leon_ipi_work, cpu);
+		work->single = work->msk = work->resched = 0;
+	}
+}
+
+static void leon_send_ipi(int cpu, int level)
+{
+	unsigned long mask;
+	mask = leon_get_irqmask(level);
+	LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->force[cpu], mask);
+}
+
+static void leon_ipi_single(int cpu)
+{
+	struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+	/* Mark work */
+	work->single = 1;
+
+	/* Generate IRQ on the CPU */
+	leon_send_ipi(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_mask_one(int cpu)
+{
+	struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+	/* Mark work */
+	work->msk = 1;
+
+	/* Generate IRQ on the CPU */
+	leon_send_ipi(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_resched(int cpu)
+{
+	struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+	/* Mark work */
+	work->resched = 1;
+
+	/* Generate IRQ on the CPU (any IRQ will cause resched) */
+	leon_send_ipi(cpu, leon_ipi_irq);
+}
+
+void leonsmp_ipi_interrupt(void)
+{
+	struct leon_ipi_work *work = this_cpu_ptr(&leon_ipi_work);
+
+	if (work->single) {
+		work->single = 0;
+		smp_call_function_single_interrupt();
+	}
+	if (work->msk) {
+		work->msk = 0;
+		smp_call_function_interrupt();
+	}
+	if (work->resched) {
+		work->resched = 0;
+		smp_resched_interrupt();
+	}
+}
+
+static struct smp_funcall {
+	smpfunc_t func;
+	unsigned long arg1;
+	unsigned long arg2;
+	unsigned long arg3;
+	unsigned long arg4;
+	unsigned long arg5;
+	unsigned long processors_in[NR_CPUS];	/* Set when ipi entered. */
+	unsigned long processors_out[NR_CPUS];	/* Set when ipi exited. */
+} ccall_info __attribute__((aligned(8)));
+
+static DEFINE_SPINLOCK(cross_call_lock);
+
+/* Cross calls must be serialized, at least currently. */
+static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
+			    unsigned long arg2, unsigned long arg3,
+			    unsigned long arg4)
+{
+	if (smp_processors_ready) {
+		register int high = NR_CPUS - 1;
+		unsigned long flags;
+
+		spin_lock_irqsave(&cross_call_lock, flags);
+
+		{
+			/* If you make changes here, make sure gcc generates proper code... */
+			register smpfunc_t f asm("i0") = func;
+			register unsigned long a1 asm("i1") = arg1;
+			register unsigned long a2 asm("i2") = arg2;
+			register unsigned long a3 asm("i3") = arg3;
+			register unsigned long a4 asm("i4") = arg4;
+			register unsigned long a5 asm("i5") = 0;
+
+			__asm__ __volatile__("std %0, [%6]\n\t"
+					     "std %2, [%6 + 8]\n\t"
+					     "std %4, [%6 + 16]\n\t" : :
+					     "r"(f), "r"(a1), "r"(a2), "r"(a3),
+					     "r"(a4), "r"(a5),
+					     "r"(&ccall_info.func));
+		}
+
+		/* Init receive/complete mapping, plus fire the IPI's off. */
+		{
+			register int i;
+
+			cpumask_clear_cpu(smp_processor_id(), &mask);
+			cpumask_and(&mask, cpu_online_mask, &mask);
+			for (i = 0; i <= high; i++) {
+				if (cpumask_test_cpu(i, &mask)) {
+					ccall_info.processors_in[i] = 0;
+					ccall_info.processors_out[i] = 0;
+					leon_send_ipi(i, LEON3_IRQ_CROSS_CALL);
+
+				}
+			}
+		}
+
+		{
+			register int i;
+
+			i = 0;
+			do {
+				if (!cpumask_test_cpu(i, &mask))
+					continue;
+
+				while (!ccall_info.processors_in[i])
+					barrier();
+			} while (++i <= high);
+
+			i = 0;
+			do {
+				if (!cpumask_test_cpu(i, &mask))
+					continue;
+
+				while (!ccall_info.processors_out[i])
+					barrier();
+			} while (++i <= high);
+		}
+
+		spin_unlock_irqrestore(&cross_call_lock, flags);
+	}
+}
+
+/* Running cross calls. */
+void leon_cross_call_irq(void)
+{
+	int i = smp_processor_id();
+
+	ccall_info.processors_in[i] = 1;
+	ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3,
+			ccall_info.arg4, ccall_info.arg5);
+	ccall_info.processors_out[i] = 1;
+}
+
+static const struct sparc32_ipi_ops leon_ipi_ops = {
+	.cross_call = leon_cross_call,
+	.resched    = leon_ipi_resched,
+	.single     = leon_ipi_single,
+	.mask_one   = leon_ipi_mask_one,
+};
+
+void __init leon_init_smp(void)
+{
+	/* Patch ipi15 trap table */
+	t_nmi[1] = t_nmi[1] + (linux_trap_ipi15_leon - linux_trap_ipi15_sun4m);
+
+	sparc32_ipi_ops = &leon_ipi_ops;
+}
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
new file mode 100644
index 0000000..39a2503
--- /dev/null
+++ b/arch/sparc/kernel/mdesc.c
@@ -0,0 +1,1351 @@
+// SPDX-License-Identifier: GPL-2.0
+/* mdesc.c: Sun4V machine description handling.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/memblock.h>
+#include <linux/log2.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/refcount.h>
+
+#include <asm/cpudata.h>
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+#include <asm/prom.h>
+#include <linux/uaccess.h>
+#include <asm/oplib.h>
+#include <asm/smp.h>
+#include <asm/adi.h>
+
+/* Unlike the OBP device tree, the machine description is a full-on
+ * DAG.  An arbitrary number of ARCs are possible from one
+ * node to other nodes and thus we can't use the OBP device_node
+ * data structure to represent these nodes inside of the kernel.
+ *
+ * Actually, it isn't even a DAG, because there are back pointers
+ * which create cycles in the graph.
+ *
+ * mdesc_hdr and mdesc_elem describe the layout of the data structure
+ * we get from the Hypervisor.
+ */
+struct mdesc_hdr {
+	u32	version; /* Transport version */
+	u32	node_sz; /* node block size */
+	u32	name_sz; /* name block size */
+	u32	data_sz; /* data block size */
+} __attribute__((aligned(16)));
+
+struct mdesc_elem {
+	u8	tag;
+#define MD_LIST_END	0x00
+#define MD_NODE		0x4e
+#define MD_NODE_END	0x45
+#define MD_NOOP		0x20
+#define MD_PROP_ARC	0x61
+#define MD_PROP_VAL	0x76
+#define MD_PROP_STR	0x73
+#define MD_PROP_DATA	0x64
+	u8	name_len;
+	u16	resv;
+	u32	name_offset;
+	union {
+		struct {
+			u32	data_len;
+			u32	data_offset;
+		} data;
+		u64	val;
+	} d;
+};
+
+struct mdesc_mem_ops {
+	struct mdesc_handle *(*alloc)(unsigned int mdesc_size);
+	void (*free)(struct mdesc_handle *handle);
+};
+
+struct mdesc_handle {
+	struct list_head	list;
+	struct mdesc_mem_ops	*mops;
+	void			*self_base;
+	refcount_t		refcnt;
+	unsigned int		handle_size;
+	struct mdesc_hdr	mdesc;
+};
+
+typedef int (*mdesc_node_info_get_f)(struct mdesc_handle *, u64,
+				     union md_node_info *);
+typedef void (*mdesc_node_info_rel_f)(union md_node_info *);
+typedef bool (*mdesc_node_match_f)(union md_node_info *, union md_node_info *);
+
+struct md_node_ops {
+	char			*name;
+	mdesc_node_info_get_f	get_info;
+	mdesc_node_info_rel_f	rel_info;
+	mdesc_node_match_f	node_match;
+};
+
+static int get_vdev_port_node_info(struct mdesc_handle *md, u64 node,
+				   union md_node_info *node_info);
+static void rel_vdev_port_node_info(union md_node_info *node_info);
+static bool vdev_port_node_match(union md_node_info *a_node_info,
+				 union md_node_info *b_node_info);
+
+static int get_ds_port_node_info(struct mdesc_handle *md, u64 node,
+				 union md_node_info *node_info);
+static void rel_ds_port_node_info(union md_node_info *node_info);
+static bool ds_port_node_match(union md_node_info *a_node_info,
+			       union md_node_info *b_node_info);
+
+/* supported node types which can be registered */
+static struct md_node_ops md_node_ops_table[] = {
+	{"virtual-device-port", get_vdev_port_node_info,
+	 rel_vdev_port_node_info, vdev_port_node_match},
+	{"domain-services-port", get_ds_port_node_info,
+	 rel_ds_port_node_info, ds_port_node_match},
+	{NULL, NULL, NULL, NULL}
+};
+
+static void mdesc_get_node_ops(const char *node_name,
+			       mdesc_node_info_get_f *get_info_f,
+			       mdesc_node_info_rel_f *rel_info_f,
+			       mdesc_node_match_f *match_f)
+{
+	int i;
+
+	if (get_info_f)
+		*get_info_f = NULL;
+
+	if (rel_info_f)
+		*rel_info_f = NULL;
+
+	if (match_f)
+		*match_f = NULL;
+
+	if (!node_name)
+		return;
+
+	for (i = 0; md_node_ops_table[i].name != NULL; i++) {
+		if (strcmp(md_node_ops_table[i].name, node_name) == 0) {
+			if (get_info_f)
+				*get_info_f = md_node_ops_table[i].get_info;
+
+			if (rel_info_f)
+				*rel_info_f = md_node_ops_table[i].rel_info;
+
+			if (match_f)
+				*match_f = md_node_ops_table[i].node_match;
+
+			break;
+		}
+	}
+}
+
+static void mdesc_handle_init(struct mdesc_handle *hp,
+			      unsigned int handle_size,
+			      void *base)
+{
+	BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1));
+
+	memset(hp, 0, handle_size);
+	INIT_LIST_HEAD(&hp->list);
+	hp->self_base = base;
+	refcount_set(&hp->refcnt, 1);
+	hp->handle_size = handle_size;
+}
+
+static struct mdesc_handle * __init mdesc_memblock_alloc(unsigned int mdesc_size)
+{
+	unsigned int handle_size, alloc_size;
+	struct mdesc_handle *hp;
+	unsigned long paddr;
+
+	handle_size = (sizeof(struct mdesc_handle) -
+		       sizeof(struct mdesc_hdr) +
+		       mdesc_size);
+	alloc_size = PAGE_ALIGN(handle_size);
+
+	paddr = memblock_alloc(alloc_size, PAGE_SIZE);
+
+	hp = NULL;
+	if (paddr) {
+		hp = __va(paddr);
+		mdesc_handle_init(hp, handle_size, hp);
+	}
+	return hp;
+}
+
+static void __init mdesc_memblock_free(struct mdesc_handle *hp)
+{
+	unsigned int alloc_size;
+	unsigned long start;
+
+	BUG_ON(refcount_read(&hp->refcnt) != 0);
+	BUG_ON(!list_empty(&hp->list));
+
+	alloc_size = PAGE_ALIGN(hp->handle_size);
+	start = __pa(hp);
+	free_bootmem_late(start, alloc_size);
+}
+
+static struct mdesc_mem_ops memblock_mdesc_ops = {
+	.alloc = mdesc_memblock_alloc,
+	.free  = mdesc_memblock_free,
+};
+
+static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
+{
+	unsigned int handle_size;
+	struct mdesc_handle *hp;
+	unsigned long addr;
+	void *base;
+
+	handle_size = (sizeof(struct mdesc_handle) -
+		       sizeof(struct mdesc_hdr) +
+		       mdesc_size);
+	base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+	if (!base)
+		return NULL;
+
+	addr = (unsigned long)base;
+	addr = (addr + 15UL) & ~15UL;
+	hp = (struct mdesc_handle *) addr;
+
+	mdesc_handle_init(hp, handle_size, base);
+
+	return hp;
+}
+
+static void mdesc_kfree(struct mdesc_handle *hp)
+{
+	BUG_ON(refcount_read(&hp->refcnt) != 0);
+	BUG_ON(!list_empty(&hp->list));
+
+	kfree(hp->self_base);
+}
+
+static struct mdesc_mem_ops kmalloc_mdesc_memops = {
+	.alloc = mdesc_kmalloc,
+	.free  = mdesc_kfree,
+};
+
+static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size,
+					struct mdesc_mem_ops *mops)
+{
+	struct mdesc_handle *hp = mops->alloc(mdesc_size);
+
+	if (hp)
+		hp->mops = mops;
+
+	return hp;
+}
+
+static void mdesc_free(struct mdesc_handle *hp)
+{
+	hp->mops->free(hp);
+}
+
+static struct mdesc_handle *cur_mdesc;
+static LIST_HEAD(mdesc_zombie_list);
+static DEFINE_SPINLOCK(mdesc_lock);
+
+struct mdesc_handle *mdesc_grab(void)
+{
+	struct mdesc_handle *hp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdesc_lock, flags);
+	hp = cur_mdesc;
+	if (hp)
+		refcount_inc(&hp->refcnt);
+	spin_unlock_irqrestore(&mdesc_lock, flags);
+
+	return hp;
+}
+EXPORT_SYMBOL(mdesc_grab);
+
+void mdesc_release(struct mdesc_handle *hp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdesc_lock, flags);
+	if (refcount_dec_and_test(&hp->refcnt)) {
+		list_del_init(&hp->list);
+		hp->mops->free(hp);
+	}
+	spin_unlock_irqrestore(&mdesc_lock, flags);
+}
+EXPORT_SYMBOL(mdesc_release);
+
+static DEFINE_MUTEX(mdesc_mutex);
+static struct mdesc_notifier_client *client_list;
+
+void mdesc_register_notifier(struct mdesc_notifier_client *client)
+{
+	bool supported = false;
+	u64 node;
+	int i;
+
+	mutex_lock(&mdesc_mutex);
+
+	/* check to see if the node is supported for registration */
+	for (i = 0; md_node_ops_table[i].name != NULL; i++) {
+		if (strcmp(md_node_ops_table[i].name, client->node_name) == 0) {
+			supported = true;
+			break;
+		}
+	}
+
+	if (!supported) {
+		pr_err("MD: %s node not supported\n", client->node_name);
+		mutex_unlock(&mdesc_mutex);
+		return;
+	}
+
+	client->next = client_list;
+	client_list = client;
+
+	mdesc_for_each_node_by_name(cur_mdesc, node, client->node_name)
+		client->add(cur_mdesc, node, client->node_name);
+
+	mutex_unlock(&mdesc_mutex);
+}
+
+static const u64 *parent_cfg_handle(struct mdesc_handle *hp, u64 node)
+{
+	const u64 *id;
+	u64 a;
+
+	id = NULL;
+	mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+		u64 target;
+
+		target = mdesc_arc_target(hp, a);
+		id = mdesc_get_property(hp, target,
+					"cfg-handle", NULL);
+		if (id)
+			break;
+	}
+
+	return id;
+}
+
+static int get_vdev_port_node_info(struct mdesc_handle *md, u64 node,
+				   union md_node_info *node_info)
+{
+	const u64 *parent_cfg_hdlp;
+	const char *name;
+	const u64 *idp;
+
+	/*
+	 * Virtual device nodes are distinguished by:
+	 * 1. "id" property
+	 * 2. "name" property
+	 * 3. parent node "cfg-handle" property
+	 */
+	idp = mdesc_get_property(md, node, "id", NULL);
+	name = mdesc_get_property(md, node, "name", NULL);
+	parent_cfg_hdlp = parent_cfg_handle(md, node);
+
+	if (!idp || !name || !parent_cfg_hdlp)
+		return -1;
+
+	node_info->vdev_port.id = *idp;
+	node_info->vdev_port.name = kstrdup_const(name, GFP_KERNEL);
+	node_info->vdev_port.parent_cfg_hdl = *parent_cfg_hdlp;
+
+	return 0;
+}
+
+static void rel_vdev_port_node_info(union md_node_info *node_info)
+{
+	if (node_info && node_info->vdev_port.name) {
+		kfree_const(node_info->vdev_port.name);
+		node_info->vdev_port.name = NULL;
+	}
+}
+
+static bool vdev_port_node_match(union md_node_info *a_node_info,
+				 union md_node_info *b_node_info)
+{
+	if (a_node_info->vdev_port.id != b_node_info->vdev_port.id)
+		return false;
+
+	if (a_node_info->vdev_port.parent_cfg_hdl !=
+	    b_node_info->vdev_port.parent_cfg_hdl)
+		return false;
+
+	if (strncmp(a_node_info->vdev_port.name,
+		    b_node_info->vdev_port.name, MDESC_MAX_STR_LEN) != 0)
+		return false;
+
+	return true;
+}
+
+static int get_ds_port_node_info(struct mdesc_handle *md, u64 node,
+				 union md_node_info *node_info)
+{
+	const u64 *idp;
+
+	/* DS port nodes use the "id" property to distinguish them */
+	idp = mdesc_get_property(md, node, "id", NULL);
+	if (!idp)
+		return -1;
+
+	node_info->ds_port.id = *idp;
+
+	return 0;
+}
+
+static void rel_ds_port_node_info(union md_node_info *node_info)
+{
+}
+
+static bool ds_port_node_match(union md_node_info *a_node_info,
+			       union md_node_info *b_node_info)
+{
+	if (a_node_info->ds_port.id != b_node_info->ds_port.id)
+		return false;
+
+	return true;
+}
+
+/* Run 'func' on nodes which are in A but not in B.  */
+static void invoke_on_missing(const char *name,
+			      struct mdesc_handle *a,
+			      struct mdesc_handle *b,
+			      void (*func)(struct mdesc_handle *, u64,
+					   const char *node_name))
+{
+	mdesc_node_info_get_f get_info_func;
+	mdesc_node_info_rel_f rel_info_func;
+	mdesc_node_match_f node_match_func;
+	union md_node_info a_node_info;
+	union md_node_info b_node_info;
+	bool found;
+	u64 a_node;
+	u64 b_node;
+	int rv;
+
+	/*
+	 * Find the get_info, rel_info and node_match ops for the given
+	 * node name
+	 */
+	mdesc_get_node_ops(name, &get_info_func, &rel_info_func,
+			   &node_match_func);
+
+	/* If we didn't find a match, the node type is not supported */
+	if (!get_info_func || !rel_info_func || !node_match_func) {
+		pr_err("MD: %s node type is not supported\n", name);
+		return;
+	}
+
+	mdesc_for_each_node_by_name(a, a_node, name) {
+		found = false;
+
+		rv = get_info_func(a, a_node, &a_node_info);
+		if (rv != 0) {
+			pr_err("MD: Cannot find 1 or more required match properties for %s node.\n",
+			       name);
+			continue;
+		}
+
+		/* Check each node in B for node matching a_node */
+		mdesc_for_each_node_by_name(b, b_node, name) {
+			rv = get_info_func(b, b_node, &b_node_info);
+			if (rv != 0)
+				continue;
+
+			if (node_match_func(&a_node_info, &b_node_info)) {
+				found = true;
+				rel_info_func(&b_node_info);
+				break;
+			}
+
+			rel_info_func(&b_node_info);
+		}
+
+		rel_info_func(&a_node_info);
+
+		if (!found)
+			func(a, a_node, name);
+	}
+}
+
+static void notify_one(struct mdesc_notifier_client *p,
+		       struct mdesc_handle *old_hp,
+		       struct mdesc_handle *new_hp)
+{
+	invoke_on_missing(p->node_name, old_hp, new_hp, p->remove);
+	invoke_on_missing(p->node_name, new_hp, old_hp, p->add);
+}
+
+static void mdesc_notify_clients(struct mdesc_handle *old_hp,
+				 struct mdesc_handle *new_hp)
+{
+	struct mdesc_notifier_client *p = client_list;
+
+	while (p) {
+		notify_one(p, old_hp, new_hp);
+		p = p->next;
+	}
+}
+
+void mdesc_update(void)
+{
+	unsigned long len, real_len, status;
+	struct mdesc_handle *hp, *orig_hp;
+	unsigned long flags;
+
+	mutex_lock(&mdesc_mutex);
+
+	(void) sun4v_mach_desc(0UL, 0UL, &len);
+
+	hp = mdesc_alloc(len, &kmalloc_mdesc_memops);
+	if (!hp) {
+		printk(KERN_ERR "MD: mdesc alloc fails\n");
+		goto out;
+	}
+
+	status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
+	if (status != HV_EOK || real_len > len) {
+		printk(KERN_ERR "MD: mdesc reread fails with %lu\n",
+		       status);
+		refcount_dec(&hp->refcnt);
+		mdesc_free(hp);
+		goto out;
+	}
+
+	spin_lock_irqsave(&mdesc_lock, flags);
+	orig_hp = cur_mdesc;
+	cur_mdesc = hp;
+	spin_unlock_irqrestore(&mdesc_lock, flags);
+
+	mdesc_notify_clients(orig_hp, hp);
+
+	spin_lock_irqsave(&mdesc_lock, flags);
+	if (refcount_dec_and_test(&orig_hp->refcnt))
+		mdesc_free(orig_hp);
+	else
+		list_add(&orig_hp->list, &mdesc_zombie_list);
+	spin_unlock_irqrestore(&mdesc_lock, flags);
+
+out:
+	mutex_unlock(&mdesc_mutex);
+}
+
+u64 mdesc_get_node(struct mdesc_handle *hp, const char *node_name,
+		   union md_node_info *node_info)
+{
+	mdesc_node_info_get_f get_info_func;
+	mdesc_node_info_rel_f rel_info_func;
+	mdesc_node_match_f node_match_func;
+	union md_node_info hp_node_info;
+	u64 hp_node;
+	int rv;
+
+	if (hp == NULL || node_name == NULL || node_info == NULL)
+		return MDESC_NODE_NULL;
+
+	/* Find the ops for the given node name */
+	mdesc_get_node_ops(node_name, &get_info_func, &rel_info_func,
+			   &node_match_func);
+
+	/* If we didn't find ops for the given node name, it is not supported */
+	if (!get_info_func || !rel_info_func || !node_match_func) {
+		pr_err("MD: %s node is not supported\n", node_name);
+		return -EINVAL;
+	}
+
+	mdesc_for_each_node_by_name(hp, hp_node, node_name) {
+		rv = get_info_func(hp, hp_node, &hp_node_info);
+		if (rv != 0)
+			continue;
+
+		if (node_match_func(node_info, &hp_node_info))
+			break;
+
+		rel_info_func(&hp_node_info);
+	}
+
+	rel_info_func(&hp_node_info);
+
+	return hp_node;
+}
+EXPORT_SYMBOL(mdesc_get_node);
+
+int mdesc_get_node_info(struct mdesc_handle *hp, u64 node,
+			const char *node_name, union md_node_info *node_info)
+{
+	mdesc_node_info_get_f get_info_func;
+	int rv;
+
+	if (hp == NULL || node == MDESC_NODE_NULL ||
+	    node_name == NULL || node_info == NULL)
+		return -EINVAL;
+
+	/* Find the get_info op for the given node name */
+	mdesc_get_node_ops(node_name, &get_info_func, NULL, NULL);
+
+	/* If we didn't find a get_info_func, the node name is not supported */
+	if (get_info_func == NULL) {
+		pr_err("MD: %s node is not supported\n", node_name);
+		return -EINVAL;
+	}
+
+	rv = get_info_func(hp, node, node_info);
+	if (rv != 0) {
+		pr_err("MD: Cannot find 1 or more required match properties for %s node.\n",
+		       node_name);
+		return -1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(mdesc_get_node_info);
+
+static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
+{
+	return (struct mdesc_elem *) (mdesc + 1);
+}
+
+static void *name_block(struct mdesc_hdr *mdesc)
+{
+	return ((void *) node_block(mdesc)) + mdesc->node_sz;
+}
+
+static void *data_block(struct mdesc_hdr *mdesc)
+{
+	return ((void *) name_block(mdesc)) + mdesc->name_sz;
+}
+
+u64 mdesc_node_by_name(struct mdesc_handle *hp,
+		       u64 from_node, const char *name)
+{
+	struct mdesc_elem *ep = node_block(&hp->mdesc);
+	const char *names = name_block(&hp->mdesc);
+	u64 last_node = hp->mdesc.node_sz / 16;
+	u64 ret;
+
+	if (from_node == MDESC_NODE_NULL) {
+		ret = from_node = 0;
+	} else if (from_node >= last_node) {
+		return MDESC_NODE_NULL;
+	} else {
+		ret = ep[from_node].d.val;
+	}
+
+	while (ret < last_node) {
+		if (ep[ret].tag != MD_NODE)
+			return MDESC_NODE_NULL;
+		if (!strcmp(names + ep[ret].name_offset, name))
+			break;
+		ret = ep[ret].d.val;
+	}
+	if (ret >= last_node)
+		ret = MDESC_NODE_NULL;
+	return ret;
+}
+EXPORT_SYMBOL(mdesc_node_by_name);
+
+const void *mdesc_get_property(struct mdesc_handle *hp, u64 node,
+			       const char *name, int *lenp)
+{
+	const char *names = name_block(&hp->mdesc);
+	u64 last_node = hp->mdesc.node_sz / 16;
+	void *data = data_block(&hp->mdesc);
+	struct mdesc_elem *ep;
+
+	if (node == MDESC_NODE_NULL || node >= last_node)
+		return NULL;
+
+	ep = node_block(&hp->mdesc) + node;
+	ep++;
+	for (; ep->tag != MD_NODE_END; ep++) {
+		void *val = NULL;
+		int len = 0;
+
+		switch (ep->tag) {
+		case MD_PROP_VAL:
+			val = &ep->d.val;
+			len = 8;
+			break;
+
+		case MD_PROP_STR:
+		case MD_PROP_DATA:
+			val = data + ep->d.data.data_offset;
+			len = ep->d.data.data_len;
+			break;
+
+		default:
+			break;
+		}
+		if (!val)
+			continue;
+
+		if (!strcmp(names + ep->name_offset, name)) {
+			if (lenp)
+				*lenp = len;
+			return val;
+		}
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(mdesc_get_property);
+
+u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type)
+{
+	struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+	const char *names = name_block(&hp->mdesc);
+	u64 last_node = hp->mdesc.node_sz / 16;
+
+	if (from == MDESC_NODE_NULL || from >= last_node)
+		return MDESC_NODE_NULL;
+
+	ep = base + from;
+
+	ep++;
+	for (; ep->tag != MD_NODE_END; ep++) {
+		if (ep->tag != MD_PROP_ARC)
+			continue;
+
+		if (strcmp(names + ep->name_offset, arc_type))
+			continue;
+
+		return ep - base;
+	}
+
+	return MDESC_NODE_NULL;
+}
+EXPORT_SYMBOL(mdesc_next_arc);
+
+u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc)
+{
+	struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+
+	ep = base + arc;
+
+	return ep->d.val;
+}
+EXPORT_SYMBOL(mdesc_arc_target);
+
+const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
+{
+	struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
+	const char *names = name_block(&hp->mdesc);
+	u64 last_node = hp->mdesc.node_sz / 16;
+
+	if (node == MDESC_NODE_NULL || node >= last_node)
+		return NULL;
+
+	ep = base + node;
+	if (ep->tag != MD_NODE)
+		return NULL;
+
+	return names + ep->name_offset;
+}
+EXPORT_SYMBOL(mdesc_node_name);
+
+static u64 max_cpus = 64;
+
+static void __init report_platform_properties(void)
+{
+	struct mdesc_handle *hp = mdesc_grab();
+	u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
+	const char *s;
+	const u64 *v;
+
+	if (pn == MDESC_NODE_NULL) {
+		prom_printf("No platform node in machine-description.\n");
+		prom_halt();
+	}
+
+	s = mdesc_get_property(hp, pn, "banner-name", NULL);
+	printk("PLATFORM: banner-name [%s]\n", s);
+	s = mdesc_get_property(hp, pn, "name", NULL);
+	printk("PLATFORM: name [%s]\n", s);
+
+	v = mdesc_get_property(hp, pn, "hostid", NULL);
+	if (v)
+		printk("PLATFORM: hostid [%08llx]\n", *v);
+	v = mdesc_get_property(hp, pn, "serial#", NULL);
+	if (v)
+		printk("PLATFORM: serial# [%08llx]\n", *v);
+	v = mdesc_get_property(hp, pn, "stick-frequency", NULL);
+	printk("PLATFORM: stick-frequency [%08llx]\n", *v);
+	v = mdesc_get_property(hp, pn, "mac-address", NULL);
+	if (v)
+		printk("PLATFORM: mac-address [%llx]\n", *v);
+	v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL);
+	if (v)
+		printk("PLATFORM: watchdog-resolution [%llu ms]\n", *v);
+	v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL);
+	if (v)
+		printk("PLATFORM: watchdog-max-timeout [%llu ms]\n", *v);
+	v = mdesc_get_property(hp, pn, "max-cpus", NULL);
+	if (v) {
+		max_cpus = *v;
+		printk("PLATFORM: max-cpus [%llu]\n", max_cpus);
+	}
+
+#ifdef CONFIG_SMP
+	{
+		int max_cpu, i;
+
+		if (v) {
+			max_cpu = *v;
+			if (max_cpu > NR_CPUS)
+				max_cpu = NR_CPUS;
+		} else {
+			max_cpu = NR_CPUS;
+		}
+		for (i = 0; i < max_cpu; i++)
+			set_cpu_possible(i, true);
+	}
+#endif
+
+	mdesc_release(hp);
+}
+
+static void fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp)
+{
+	const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
+	const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
+	const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL);
+	const char *type;
+	int type_len;
+
+	type = mdesc_get_property(hp, mp, "type", &type_len);
+
+	switch (*level) {
+	case 1:
+		if (of_find_in_proplist(type, "instn", type_len)) {
+			c->icache_size = *size;
+			c->icache_line_size = *line_size;
+		} else if (of_find_in_proplist(type, "data", type_len)) {
+			c->dcache_size = *size;
+			c->dcache_line_size = *line_size;
+		}
+		break;
+
+	case 2:
+		c->ecache_size = *size;
+		c->ecache_line_size = *line_size;
+		break;
+
+	default:
+		break;
+	}
+
+	if (*level == 1) {
+		u64 a;
+
+		mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+			u64 target = mdesc_arc_target(hp, a);
+			const char *name = mdesc_node_name(hp, target);
+
+			if (!strcmp(name, "cache"))
+				fill_in_one_cache(c, hp, target);
+		}
+	}
+}
+
+static void find_back_node_value(struct mdesc_handle *hp, u64 node,
+				 char *srch_val,
+				 void (*func)(struct mdesc_handle *, u64, int),
+				 u64 val, int depth)
+{
+	u64 arc;
+
+	/* Since we have an estimate of recursion depth, do a sanity check. */
+	if (depth == 0)
+		return;
+
+	mdesc_for_each_arc(arc, hp, node, MDESC_ARC_TYPE_BACK) {
+		u64 n = mdesc_arc_target(hp, arc);
+		const char *name = mdesc_node_name(hp, n);
+
+		if (!strcmp(srch_val, name))
+			(*func)(hp, n, val);
+
+		find_back_node_value(hp, n, srch_val, func, val, depth-1);
+	}
+}
+
+static void __mark_core_id(struct mdesc_handle *hp, u64 node,
+			   int core_id)
+{
+	const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+	if (*id < num_possible_cpus())
+		cpu_data(*id).core_id = core_id;
+}
+
+static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node,
+				int max_cache_id)
+{
+	const u64 *id = mdesc_get_property(hp, node, "id", NULL);
+
+	if (*id < num_possible_cpus()) {
+		cpu_data(*id).max_cache_id = max_cache_id;
+
+		/**
+		 * On systems without explicit socket descriptions socket
+		 * is max_cache_id
+		 */
+		cpu_data(*id).sock_id = max_cache_id;
+	}
+}
+
+static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
+			  int core_id)
+{
+	find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
+}
+
+static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp,
+			       int max_cache_id)
+{
+	find_back_node_value(hp, mp, "cpu", __mark_max_cache_id,
+			     max_cache_id, 10);
+}
+
+static void set_core_ids(struct mdesc_handle *hp)
+{
+	int idx;
+	u64 mp;
+
+	idx = 1;
+
+	/* Identify unique cores by looking for cpus backpointed to by
+	 * level 1 instruction caches.
+	 */
+	mdesc_for_each_node_by_name(hp, mp, "cache") {
+		const u64 *level;
+		const char *type;
+		int len;
+
+		level = mdesc_get_property(hp, mp, "level", NULL);
+		if (*level != 1)
+			continue;
+
+		type = mdesc_get_property(hp, mp, "type", &len);
+		if (!of_find_in_proplist(type, "instn", len))
+			continue;
+
+		mark_core_ids(hp, mp, idx);
+		idx++;
+	}
+}
+
+static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level)
+{
+	u64 mp;
+	int idx = 1;
+	int fnd = 0;
+
+	/**
+	 * Identify unique highest level of shared cache by looking for cpus
+	 * backpointed to by shared level N caches.
+	 */
+	mdesc_for_each_node_by_name(hp, mp, "cache") {
+		const u64 *cur_lvl;
+
+		cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
+		if (*cur_lvl != level)
+			continue;
+		mark_max_cache_ids(hp, mp, idx);
+		idx++;
+		fnd = 1;
+	}
+	return fnd;
+}
+
+static void set_sock_ids_by_socket(struct mdesc_handle *hp, u64 mp)
+{
+	int idx = 1;
+
+	mdesc_for_each_node_by_name(hp, mp, "socket") {
+		u64 a;
+
+		mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+			u64 t = mdesc_arc_target(hp, a);
+			const char *name;
+			const u64 *id;
+
+			name = mdesc_node_name(hp, t);
+			if (strcmp(name, "cpu"))
+				continue;
+
+			id = mdesc_get_property(hp, t, "id", NULL);
+			if (*id < num_possible_cpus())
+				cpu_data(*id).sock_id = idx;
+		}
+		idx++;
+	}
+}
+
+static void set_sock_ids(struct mdesc_handle *hp)
+{
+	u64 mp;
+
+	/**
+	 * Find the highest level of shared cache which pre-T7 is also
+	 * the socket.
+	 */
+	if (!set_max_cache_ids_by_cache(hp, 3))
+		set_max_cache_ids_by_cache(hp, 2);
+
+	/* If machine description exposes sockets data use it.*/
+	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
+	if (mp != MDESC_NODE_NULL)
+		set_sock_ids_by_socket(hp, mp);
+}
+
+static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
+{
+	u64 a;
+
+	mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
+		u64 t = mdesc_arc_target(hp, a);
+		const char *name;
+		const u64 *id;
+
+		name = mdesc_node_name(hp, t);
+		if (strcmp(name, "cpu"))
+			continue;
+
+		id = mdesc_get_property(hp, t, "id", NULL);
+		if (*id < NR_CPUS)
+			cpu_data(*id).proc_id = proc_id;
+	}
+}
+
+static void __set_proc_ids(struct mdesc_handle *hp, const char *exec_unit_name)
+{
+	int idx;
+	u64 mp;
+
+	idx = 0;
+	mdesc_for_each_node_by_name(hp, mp, exec_unit_name) {
+		const char *type;
+		int len;
+
+		type = mdesc_get_property(hp, mp, "type", &len);
+		if (!of_find_in_proplist(type, "int", len) &&
+		    !of_find_in_proplist(type, "integer", len))
+			continue;
+
+		mark_proc_ids(hp, mp, idx);
+		idx++;
+	}
+}
+
+static void set_proc_ids(struct mdesc_handle *hp)
+{
+	__set_proc_ids(hp, "exec_unit");
+	__set_proc_ids(hp, "exec-unit");
+}
+
+static void get_one_mondo_bits(const u64 *p, unsigned int *mask,
+			       unsigned long def, unsigned long max)
+{
+	u64 val;
+
+	if (!p)
+		goto use_default;
+	val = *p;
+
+	if (!val || val >= 64)
+		goto use_default;
+
+	if (val > max)
+		val = max;
+
+	*mask = ((1U << val) * 64U) - 1U;
+	return;
+
+use_default:
+	*mask = ((1U << def) * 64U) - 1U;
+}
+
+static void get_mondo_data(struct mdesc_handle *hp, u64 mp,
+			   struct trap_per_cpu *tb)
+{
+	static int printed;
+	const u64 *val;
+
+	val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
+	get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7, ilog2(max_cpus * 2));
+
+	val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
+	get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7, 8);
+
+	val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
+	get_one_mondo_bits(val, &tb->resum_qmask, 6, 7);
+
+	val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
+	get_one_mondo_bits(val, &tb->nonresum_qmask, 2, 2);
+	if (!printed++) {
+		pr_info("SUN4V: Mondo queue sizes "
+			"[cpu(%u) dev(%u) r(%u) nr(%u)]\n",
+			tb->cpu_mondo_qmask + 1,
+			tb->dev_mondo_qmask + 1,
+			tb->resum_qmask + 1,
+			tb->nonresum_qmask + 1);
+	}
+}
+
+static void *mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handle *, u64, int, void *), void *arg, cpumask_t *mask)
+{
+	struct mdesc_handle *hp = mdesc_grab();
+	void *ret = NULL;
+	u64 mp;
+
+	mdesc_for_each_node_by_name(hp, mp, "cpu") {
+		const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
+		int cpuid = *id;
+
+#ifdef CONFIG_SMP
+		if (cpuid >= NR_CPUS) {
+			printk(KERN_WARNING "Ignoring CPU %d which is "
+			       ">= NR_CPUS (%d)\n",
+			       cpuid, NR_CPUS);
+			continue;
+		}
+		if (!cpumask_test_cpu(cpuid, mask))
+			continue;
+#endif
+
+		ret = func(hp, mp, cpuid, arg);
+		if (ret)
+			goto out;
+	}
+out:
+	mdesc_release(hp);
+	return ret;
+}
+
+static void *record_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid,
+			    void *arg)
+{
+	ncpus_probed++;
+#ifdef CONFIG_SMP
+	set_cpu_present(cpuid, true);
+#endif
+	return NULL;
+}
+
+void mdesc_populate_present_mask(cpumask_t *mask)
+{
+	if (tlb_type != hypervisor)
+		return;
+
+	ncpus_probed = 0;
+	mdesc_iterate_over_cpus(record_one_cpu, NULL, mask);
+}
+
+static void * __init check_one_pgsz(struct mdesc_handle *hp, u64 mp, int cpuid, void *arg)
+{
+	const u64 *pgsz_prop = mdesc_get_property(hp, mp, "mmu-page-size-list", NULL);
+	unsigned long *pgsz_mask = arg;
+	u64 val;
+
+	val = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
+	       HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
+	if (pgsz_prop)
+		val = *pgsz_prop;
+
+	if (!*pgsz_mask)
+		*pgsz_mask = val;
+	else
+		*pgsz_mask &= val;
+	return NULL;
+}
+
+void __init mdesc_get_page_sizes(cpumask_t *mask, unsigned long *pgsz_mask)
+{
+	*pgsz_mask = 0;
+	mdesc_iterate_over_cpus(check_one_pgsz, pgsz_mask, mask);
+}
+
+static void *fill_in_one_cpu(struct mdesc_handle *hp, u64 mp, int cpuid,
+			     void *arg)
+{
+	const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
+	struct trap_per_cpu *tb;
+	cpuinfo_sparc *c;
+	u64 a;
+
+#ifndef CONFIG_SMP
+	/* On uniprocessor we only want the values for the
+	 * real physical cpu the kernel booted onto, however
+	 * cpu_data() only has one entry at index 0.
+	 */
+	if (cpuid != real_hard_smp_processor_id())
+		return NULL;
+	cpuid = 0;
+#endif
+
+	c = &cpu_data(cpuid);
+	c->clock_tick = *cfreq;
+
+	tb = &trap_block[cpuid];
+	get_mondo_data(hp, mp, tb);
+
+	mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+		u64 j, t = mdesc_arc_target(hp, a);
+		const char *t_name;
+
+		t_name = mdesc_node_name(hp, t);
+		if (!strcmp(t_name, "cache")) {
+			fill_in_one_cache(c, hp, t);
+			continue;
+		}
+
+		mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
+			u64 n = mdesc_arc_target(hp, j);
+			const char *n_name;
+
+			n_name = mdesc_node_name(hp, n);
+			if (!strcmp(n_name, "cache"))
+				fill_in_one_cache(c, hp, n);
+		}
+	}
+
+	c->core_id = 0;
+	c->proc_id = -1;
+
+	return NULL;
+}
+
+void mdesc_fill_in_cpu_data(cpumask_t *mask)
+{
+	struct mdesc_handle *hp;
+
+	mdesc_iterate_over_cpus(fill_in_one_cpu, NULL, mask);
+
+	hp = mdesc_grab();
+
+	set_core_ids(hp);
+	set_proc_ids(hp);
+	set_sock_ids(hp);
+
+	mdesc_release(hp);
+
+	smp_fill_in_sib_core_maps();
+}
+
+/* mdesc_open() - Grab a reference to mdesc_handle when /dev/mdesc is
+ * opened. Hold this reference until /dev/mdesc is closed to ensure
+ * mdesc data structure is not released underneath us. Store the
+ * pointer to mdesc structure in private_data for read and seek to use
+ */
+static int mdesc_open(struct inode *inode, struct file *file)
+{
+	struct mdesc_handle *hp = mdesc_grab();
+
+	if (!hp)
+		return -ENODEV;
+
+	file->private_data = hp;
+
+	return 0;
+}
+
+static ssize_t mdesc_read(struct file *file, char __user *buf,
+			  size_t len, loff_t *offp)
+{
+	struct mdesc_handle *hp = file->private_data;
+	unsigned char *mdesc;
+	int bytes_left, count = len;
+
+	if (*offp >= hp->handle_size)
+		return 0;
+
+	bytes_left = hp->handle_size - *offp;
+	if (count > bytes_left)
+		count = bytes_left;
+
+	mdesc = (unsigned char *)&hp->mdesc;
+	mdesc += *offp;
+	if (!copy_to_user(buf, mdesc, count)) {
+		*offp += count;
+		return count;
+	} else {
+		return -EFAULT;
+	}
+}
+
+static loff_t mdesc_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct mdesc_handle *hp = file->private_data;
+
+	return no_seek_end_llseek_size(file, offset, whence, hp->handle_size);
+}
+
+/* mdesc_close() - /dev/mdesc is being closed, release the reference to
+ * mdesc structure.
+ */
+static int mdesc_close(struct inode *inode, struct file *file)
+{
+	mdesc_release(file->private_data);
+	return 0;
+}
+
+static const struct file_operations mdesc_fops = {
+	.open    = mdesc_open,
+	.read	 = mdesc_read,
+	.llseek  = mdesc_llseek,
+	.release = mdesc_close,
+	.owner	 = THIS_MODULE,
+};
+
+static struct miscdevice mdesc_misc = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "mdesc",
+	.fops	= &mdesc_fops,
+};
+
+static int __init mdesc_misc_init(void)
+{
+	return misc_register(&mdesc_misc);
+}
+
+__initcall(mdesc_misc_init);
+
+void __init sun4v_mdesc_init(void)
+{
+	struct mdesc_handle *hp;
+	unsigned long len, real_len, status;
+
+	(void) sun4v_mach_desc(0UL, 0UL, &len);
+
+	printk("MDESC: Size is %lu bytes.\n", len);
+
+	hp = mdesc_alloc(len, &memblock_mdesc_ops);
+	if (hp == NULL) {
+		prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
+		prom_halt();
+	}
+
+	status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
+	if (status != HV_EOK || real_len > len) {
+		prom_printf("sun4v_mach_desc fails, err(%lu), "
+			    "len(%lu), real_len(%lu)\n",
+			    status, len, real_len);
+		mdesc_free(hp);
+		prom_halt();
+	}
+
+	cur_mdesc = hp;
+
+	mdesc_adi_init();
+	report_platform_properties();
+}
diff --git a/arch/sparc/kernel/misctrap.S b/arch/sparc/kernel/misctrap.S
new file mode 100644
index 0000000..b5c8417
--- /dev/null
+++ b/arch/sparc/kernel/misctrap.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef CONFIG_KGDB
+	.globl		arch_kgdb_breakpoint
+	.type		arch_kgdb_breakpoint,#function
+arch_kgdb_breakpoint:
+	ta		0x72
+	retl
+	 nop
+	.size		arch_kgdb_breakpoint,.-arch_kgdb_breakpoint
+#endif
+
+	.type		__do_privact,#function
+__do_privact:
+	mov		TLB_SFSR, %g3
+	stxa		%g0, [%g3] ASI_DMMU	! Clear FaultValid bit
+	membar		#Sync
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etrap
+109:	or		%g7, %lo(109b), %g7
+	call		do_privact
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		__do_privact,.-__do_privact
+
+	.type		do_mna,#function
+do_mna:
+	rdpr		%tl, %g3
+	cmp		%g3, 1
+
+	/* Setup %g4/%g5 now as they are used in the
+	 * winfixup code.
+	 */
+	mov		TLB_SFSR, %g3
+	mov		DMMU_SFAR, %g4
+	ldxa		[%g4] ASI_DMMU, %g4
+	ldxa		[%g3] ASI_DMMU, %g5
+	stxa		%g0, [%g3] ASI_DMMU	! Clear FaultValid bit
+	membar		#Sync
+	bgu,pn		%icc, winfix_mna
+	 rdpr		%tpc, %g3
+
+1:	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		mem_address_unaligned
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		do_mna,.-do_mna
+
+	.type		do_lddfmna,#function
+do_lddfmna:
+	sethi		%hi(109f), %g7
+	mov		TLB_SFSR, %g4
+	ldxa		[%g4] ASI_DMMU, %g5
+	stxa		%g0, [%g4] ASI_DMMU	! Clear FaultValid bit
+	membar		#Sync
+	mov		DMMU_SFAR, %g4
+	ldxa		[%g4] ASI_DMMU, %g4
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		handle_lddfmna
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		do_lddfmna,.-do_lddfmna
+
+	.type		do_stdfmna,#function
+do_stdfmna:
+	sethi		%hi(109f), %g7
+	mov		TLB_SFSR, %g4
+	ldxa		[%g4] ASI_DMMU, %g5
+	stxa		%g0, [%g4] ASI_DMMU	! Clear FaultValid bit
+	membar		#Sync
+	mov		DMMU_SFAR, %g4
+	ldxa		[%g4] ASI_DMMU, %g4
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		handle_stdfmna
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	 nop
+	.size		do_stdfmna,.-do_stdfmna
+
+	.type		breakpoint_trap,#function
+breakpoint_trap:
+	call		sparc_breakpoint
+	 add		%sp, PTREGS_OFF, %o0
+	ba,pt		%xcc, rtrap
+	 nop
+	.size		breakpoint_trap,.-breakpoint_trap
diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c
new file mode 100644
index 0000000..df39580
--- /dev/null
+++ b/arch/sparc/kernel/module.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Kernel module help for sparc64.
+ *
+ * Copyright (C) 2001 Rusty Russell.
+ * Copyright (C) 2002 David S. Miller.
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/kernel.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+
+#include <asm/processor.h>
+#include <asm/spitfire.h>
+#include <asm/cacheflush.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_SPARC64
+
+#include <linux/jump_label.h>
+
+static void *module_map(unsigned long size)
+{
+	if (PAGE_ALIGN(size) > MODULES_LEN)
+		return NULL;
+	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+				GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE,
+				__builtin_return_address(0));
+}
+#else
+static void *module_map(unsigned long size)
+{
+	return vmalloc(size);
+}
+#endif /* CONFIG_SPARC64 */
+
+void *module_alloc(unsigned long size)
+{
+	void *ret;
+
+	ret = module_map(size);
+	if (ret)
+		memset(ret, 0, size);
+
+	return ret;
+}
+
+/* Make generic code ignore STT_REGISTER dummy undefined symbols.  */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+			      Elf_Shdr *sechdrs,
+			      char *secstrings,
+			      struct module *mod)
+{
+	unsigned int symidx;
+	Elf_Sym *sym;
+	char *strtab;
+	int i;
+
+	for (symidx = 0; sechdrs[symidx].sh_type != SHT_SYMTAB; symidx++) {
+		if (symidx == hdr->e_shnum-1) {
+			printk("%s: no symtab found.\n", mod->name);
+			return -ENOEXEC;
+		}
+	}
+	sym = (Elf_Sym *)sechdrs[symidx].sh_addr;
+	strtab = (char *)sechdrs[sechdrs[symidx].sh_link].sh_addr;
+
+	for (i = 1; i < sechdrs[symidx].sh_size / sizeof(Elf_Sym); i++) {
+		if (sym[i].st_shndx == SHN_UNDEF) {
+			if (ELF_ST_TYPE(sym[i].st_info) == STT_REGISTER)
+				sym[i].st_shndx = SHN_ABS;
+		}
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf_Shdr *sechdrs,
+		       const char *strtab,
+		       unsigned int symindex,
+		       unsigned int relsec,
+		       struct module *me)
+{
+	unsigned int i;
+	Elf_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+	Elf_Sym *sym;
+	u8 *location;
+	u32 *loc32;
+
+	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+		Elf_Addr v;
+
+		/* This is where to make the change */
+		location = (u8 *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+			+ rel[i].r_offset;
+		loc32 = (u32 *) location;
+
+#ifdef CONFIG_SPARC64
+		BUG_ON(((u64)location >> (u64)32) != (u64)0);
+#endif /* CONFIG_SPARC64 */
+
+		/* This is the symbol it is referring to.  Note that all
+		   undefined symbols have been resolved.  */
+		sym = (Elf_Sym *)sechdrs[symindex].sh_addr
+			+ ELF_R_SYM(rel[i].r_info);
+		v = sym->st_value + rel[i].r_addend;
+
+		switch (ELF_R_TYPE(rel[i].r_info) & 0xff) {
+		case R_SPARC_DISP32:
+			v -= (Elf_Addr) location;
+			*loc32 = v;
+			break;
+#ifdef CONFIG_SPARC64
+		case R_SPARC_64:
+			location[0] = v >> 56;
+			location[1] = v >> 48;
+			location[2] = v >> 40;
+			location[3] = v >> 32;
+			location[4] = v >> 24;
+			location[5] = v >> 16;
+			location[6] = v >>  8;
+			location[7] = v >>  0;
+			break;
+
+		case R_SPARC_WDISP19:
+			v -= (Elf_Addr) location;
+			*loc32 = (*loc32 & ~0x7ffff) |
+				((v >> 2) & 0x7ffff);
+			break;
+
+		case R_SPARC_OLO10:
+			*loc32 = (*loc32 & ~0x1fff) |
+				(((v & 0x3ff) +
+				  (ELF_R_TYPE(rel[i].r_info) >> 8))
+				 & 0x1fff);
+			break;
+#endif /* CONFIG_SPARC64 */
+
+		case R_SPARC_32:
+		case R_SPARC_UA32:
+			location[0] = v >> 24;
+			location[1] = v >> 16;
+			location[2] = v >>  8;
+			location[3] = v >>  0;
+			break;
+
+		case R_SPARC_WDISP30:
+			v -= (Elf_Addr) location;
+			*loc32 = (*loc32 & ~0x3fffffff) |
+				((v >> 2) & 0x3fffffff);
+			break;
+
+		case R_SPARC_WDISP22:
+			v -= (Elf_Addr) location;
+			*loc32 = (*loc32 & ~0x3fffff) |
+				((v >> 2) & 0x3fffff);
+			break;
+
+		case R_SPARC_LO10:
+			*loc32 = (*loc32 & ~0x3ff) | (v & 0x3ff);
+			break;
+
+		case R_SPARC_HI22:
+			*loc32 = (*loc32 & ~0x3fffff) |
+				((v >> 10) & 0x3fffff);
+			break;
+
+		default:
+			printk(KERN_ERR "module %s: Unknown relocation: %x\n",
+			       me->name,
+			       (int) (ELF_R_TYPE(rel[i].r_info) & 0xff));
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+#ifdef CONFIG_SPARC64
+static void do_patch_sections(const Elf_Ehdr *hdr,
+			      const Elf_Shdr *sechdrs)
+{
+	const Elf_Shdr *s, *sun4v_1insn = NULL, *sun4v_2insn = NULL;
+	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		if (!strcmp(".sun4v_1insn_patch", secstrings + s->sh_name))
+			sun4v_1insn = s;
+		if (!strcmp(".sun4v_2insn_patch", secstrings + s->sh_name))
+			sun4v_2insn = s;
+	}
+
+	if (sun4v_1insn && tlb_type == hypervisor) {
+		void *p = (void *) sun4v_1insn->sh_addr;
+		sun4v_patch_1insn_range(p, p + sun4v_1insn->sh_size);
+	}
+	if (sun4v_2insn && tlb_type == hypervisor) {
+		void *p = (void *) sun4v_2insn->sh_addr;
+		sun4v_patch_2insn_range(p, p + sun4v_2insn->sh_size);
+	}
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	/* make jump label nops */
+	jump_label_apply_nops(me);
+
+	do_patch_sections(hdr, sechdrs);
+
+	/* Cheetah's I-cache is fully coherent.  */
+	if (tlb_type == spitfire) {
+		unsigned long va;
+
+		flushw_all();
+		for (va =  0; va < (PAGE_SIZE << 1); va += 32)
+			spitfire_put_icache_tag(va, 0x0);
+		__asm__ __volatile__("flush %g6");
+	}
+
+	return 0;
+}
+#endif /* CONFIG_SPARC64 */
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
new file mode 100644
index 0000000..8babbeb
--- /dev/null
+++ b/arch/sparc/kernel/nmi.c
@@ -0,0 +1,313 @@
+/* Pseudo NMI support on sparc64 systems.
+ *
+ * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ *
+ * The NMI watchdog support and infrastructure is based almost
+ * entirely upon the x86 NMI support code.
+ */
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/nmi.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/kernel_stat.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+
+#include <asm/perf_event.h>
+#include <asm/ptrace.h>
+#include <asm/pcr.h>
+
+#include "kstack.h"
+
+/* We don't have a real NMI on sparc64, but we can fake one
+ * up using profiling counter overflow interrupts and interrupt
+ * levels.
+ *
+ * The profile overflow interrupts at level 15, so we use
+ * level 14 as our IRQ off level.
+ */
+
+static int panic_on_timeout;
+
+/* nmi_active:
+ * >0: the NMI watchdog is active, but can be disabled
+ * <0: the NMI watchdog has not been set up, and cannot be enabled
+ *  0: the NMI watchdog is disabled, but can be enabled
+ */
+atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
+EXPORT_SYMBOL(nmi_active);
+static int nmi_init_done;
+static unsigned int nmi_hz = HZ;
+static DEFINE_PER_CPU(short, wd_enabled);
+static int endflag __initdata;
+
+static DEFINE_PER_CPU(unsigned int, last_irq_sum);
+static DEFINE_PER_CPU(long, alert_counter);
+static DEFINE_PER_CPU(int, nmi_touch);
+
+void arch_touch_nmi_watchdog(void)
+{
+	if (atomic_read(&nmi_active)) {
+		int cpu;
+
+		for_each_present_cpu(cpu) {
+			if (per_cpu(nmi_touch, cpu) != 1)
+				per_cpu(nmi_touch, cpu) = 1;
+		}
+	}
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void die_nmi(const char *str, struct pt_regs *regs, int do_panic)
+{
+	int this_cpu = smp_processor_id();
+
+	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0,
+		       pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
+		return;
+
+	if (do_panic || panic_on_oops)
+		panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+	else
+		WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+}
+
+notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs)
+{
+	unsigned int sum, touched = 0;
+	void *orig_sp;
+
+	clear_softint(1 << irq);
+
+	local_cpu_data().__nmi_count++;
+
+	nmi_enter();
+
+	orig_sp = set_hardirq_stack();
+
+	if (notify_die(DIE_NMI, "nmi", regs, 0,
+		       pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP)
+		touched = 1;
+	else
+		pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
+
+	sum = local_cpu_data().irq0_irqs;
+	if (__this_cpu_read(nmi_touch)) {
+		__this_cpu_write(nmi_touch, 0);
+		touched = 1;
+	}
+	if (!touched && __this_cpu_read(last_irq_sum) == sum) {
+		__this_cpu_inc(alert_counter);
+		if (__this_cpu_read(alert_counter) == 30 * nmi_hz)
+			die_nmi("BUG: NMI Watchdog detected LOCKUP",
+				regs, panic_on_timeout);
+	} else {
+		__this_cpu_write(last_irq_sum, sum);
+		__this_cpu_write(alert_counter, 0);
+	}
+	if (__this_cpu_read(wd_enabled)) {
+		pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
+		pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
+	}
+
+	restore_hardirq_stack(orig_sp);
+
+	nmi_exit();
+}
+
+static inline unsigned int get_nmi_count(int cpu)
+{
+	return cpu_data(cpu).__nmi_count;
+}
+
+static __init void nmi_cpu_busy(void *data)
+{
+	while (endflag == 0)
+		mb();
+}
+
+static void report_broken_nmi(int cpu, int *prev_nmi_count)
+{
+	printk(KERN_CONT "\n");
+
+	printk(KERN_WARNING
+		"WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+			cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
+
+	printk(KERN_WARNING
+		"Please report this to bugzilla.kernel.org,\n");
+	printk(KERN_WARNING
+		"and attach the output of the 'dmesg' command.\n");
+
+	per_cpu(wd_enabled, cpu) = 0;
+	atomic_dec(&nmi_active);
+}
+
+void stop_nmi_watchdog(void *unused)
+{
+	if (!__this_cpu_read(wd_enabled))
+		return;
+	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
+	__this_cpu_write(wd_enabled, 0);
+	atomic_dec(&nmi_active);
+}
+
+static int __init check_nmi_watchdog(void)
+{
+	unsigned int *prev_nmi_count;
+	int cpu, err;
+
+	if (!atomic_read(&nmi_active))
+		return 0;
+
+	prev_nmi_count = kmalloc_array(nr_cpu_ids, sizeof(unsigned int),
+				       GFP_KERNEL);
+	if (!prev_nmi_count) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	printk(KERN_INFO "Testing NMI watchdog ... ");
+
+	smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
+
+	for_each_possible_cpu(cpu)
+		prev_nmi_count[cpu] = get_nmi_count(cpu);
+	local_irq_enable();
+	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
+
+	for_each_online_cpu(cpu) {
+		if (!per_cpu(wd_enabled, cpu))
+			continue;
+		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
+			report_broken_nmi(cpu, prev_nmi_count);
+	}
+	endflag = 1;
+	if (!atomic_read(&nmi_active)) {
+		kfree(prev_nmi_count);
+		atomic_set(&nmi_active, -1);
+		err = -ENODEV;
+		goto error;
+	}
+	printk("OK.\n");
+
+	nmi_hz = 1;
+
+	kfree(prev_nmi_count);
+	return 0;
+error:
+	on_each_cpu(stop_nmi_watchdog, NULL, 1);
+	return err;
+}
+
+void start_nmi_watchdog(void *unused)
+{
+	if (__this_cpu_read(wd_enabled))
+		return;
+
+	__this_cpu_write(wd_enabled, 1);
+	atomic_inc(&nmi_active);
+
+	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
+	pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
+
+	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
+}
+
+static void nmi_adjust_hz_one(void *unused)
+{
+	if (!__this_cpu_read(wd_enabled))
+		return;
+
+	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_disable);
+	pcr_ops->write_pic(0, pcr_ops->nmi_picl_value(nmi_hz));
+
+	pcr_ops->write_pcr(0, pcr_ops->pcr_nmi_enable);
+}
+
+void nmi_adjust_hz(unsigned int new_hz)
+{
+	nmi_hz = new_hz;
+	on_each_cpu(nmi_adjust_hz_one, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(nmi_adjust_hz);
+
+static int nmi_shutdown(struct notifier_block *nb, unsigned long cmd, void *p)
+{
+	on_each_cpu(stop_nmi_watchdog, NULL, 1);
+	return 0;
+}
+
+static struct notifier_block nmi_reboot_notifier = {
+	.notifier_call = nmi_shutdown,
+};
+
+int __init nmi_init(void)
+{
+	int err;
+
+	on_each_cpu(start_nmi_watchdog, NULL, 1);
+
+	err = check_nmi_watchdog();
+	if (!err) {
+		err = register_reboot_notifier(&nmi_reboot_notifier);
+		if (err) {
+			on_each_cpu(stop_nmi_watchdog, NULL, 1);
+			atomic_set(&nmi_active, -1);
+		}
+	}
+
+	nmi_init_done = 1;
+
+	return err;
+}
+
+static int __init setup_nmi_watchdog(char *str)
+{
+	if (!strncmp(str, "panic", 5))
+		panic_on_timeout = 1;
+
+	return 0;
+}
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
+/*
+ * sparc specific NMI watchdog enable function.
+ * Enables watchdog if it is not enabled already.
+ */
+int watchdog_nmi_enable(unsigned int cpu)
+{
+	if (atomic_read(&nmi_active) == -1) {
+		pr_warn("NMI watchdog cannot be enabled or disabled\n");
+		return -1;
+	}
+
+	/*
+	 * watchdog thread could start even before nmi_init is called.
+	 * Just Return in that case. Let nmi_init finish the init
+	 * process first.
+	 */
+	if (!nmi_init_done)
+		return 0;
+
+	smp_call_function_single(cpu, start_nmi_watchdog, NULL, 1);
+
+	return 0;
+}
+/*
+ * sparc specific NMI watchdog disable function.
+ * Disables watchdog if it is not disabled already.
+ */
+void watchdog_nmi_disable(unsigned int cpu)
+{
+	if (atomic_read(&nmi_active) == -1)
+		pr_warn_once("NMI watchdog cannot be enabled or disabled\n");
+	else
+		smp_call_function_single(cpu, stop_nmi_watchdog, NULL, 1);
+}
diff --git a/arch/sparc/kernel/of_device_32.c b/arch/sparc/kernel/of_device_32.c
new file mode 100644
index 0000000..e4abe9b
--- /dev/null
+++ b/arch/sparc/kernel/of_device_32.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/init.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+#include <asm/leon.h>
+#include <asm/leon_amba.h>
+
+#include "of_device_common.h"
+#include "irq.h"
+
+/*
+ * PCI bus specific translator
+ */
+
+static int of_bus_pci_match(struct device_node *np)
+{
+	if (!strcmp(np->type, "pci") || !strcmp(np->type, "pciex")) {
+		/* Do not do PCI specific frobbing if the
+		 * PCI bridge lacks a ranges property.  We
+		 * want to pass it through up to the next
+		 * parent as-is, not with the PCI translate
+		 * method which chops off the top address cell.
+		 */
+		if (!of_find_property(np, "ranges", NULL))
+			return 0;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+static void of_bus_pci_count_cells(struct device_node *np,
+				   int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 3;
+	if (sizec)
+		*sizec = 2;
+}
+
+static int of_bus_pci_map(u32 *addr, const u32 *range,
+			  int na, int ns, int pna)
+{
+	u32 result[OF_MAX_ADDR_CELLS];
+	int i;
+
+	/* Check address type match */
+	if ((addr[0] ^ range[0]) & 0x03000000)
+		return -EINVAL;
+
+	if (of_out_of_range(addr + 1, range + 1, range + na + pna,
+			    na - 1, ns))
+		return -EINVAL;
+
+	/* Start with the parent range base.  */
+	memcpy(result, range + na, pna * 4);
+
+	/* Add in the child address offset, skipping high cell.  */
+	for (i = 0; i < na - 1; i++)
+		result[pna - 1 - i] +=
+			(addr[na - 1 - i] -
+			 range[na - 1 - i]);
+
+	memcpy(addr, result, pna * 4);
+
+	return 0;
+}
+
+static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
+{
+	u32 w = addr[0];
+
+	/* For PCI, we override whatever child busses may have used.  */
+	flags = 0;
+	switch((w >> 24) & 0x03) {
+	case 0x01:
+		flags |= IORESOURCE_IO;
+		break;
+
+	case 0x02: /* 32 bits */
+	case 0x03: /* 64 bits */
+		flags |= IORESOURCE_MEM;
+		break;
+	}
+	if (w & 0x40000000)
+		flags |= IORESOURCE_PREFETCH;
+	return flags;
+}
+
+static unsigned long of_bus_sbus_get_flags(const u32 *addr, unsigned long flags)
+{
+	return IORESOURCE_MEM;
+}
+
+ /*
+ * AMBAPP bus specific translator
+ */
+
+static int of_bus_ambapp_match(struct device_node *np)
+{
+	return !strcmp(np->type, "ambapp");
+}
+
+static void of_bus_ambapp_count_cells(struct device_node *child,
+				      int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 1;
+	if (sizec)
+		*sizec = 1;
+}
+
+static int of_bus_ambapp_map(u32 *addr, const u32 *range,
+			     int na, int ns, int pna)
+{
+	return of_bus_default_map(addr, range, na, ns, pna);
+}
+
+static unsigned long of_bus_ambapp_get_flags(const u32 *addr,
+					     unsigned long flags)
+{
+	return IORESOURCE_MEM;
+}
+
+/*
+ * Array of bus specific translators
+ */
+
+static struct of_bus of_busses[] = {
+	/* PCI */
+	{
+		.name = "pci",
+		.addr_prop_name = "assigned-addresses",
+		.match = of_bus_pci_match,
+		.count_cells = of_bus_pci_count_cells,
+		.map = of_bus_pci_map,
+		.get_flags = of_bus_pci_get_flags,
+	},
+	/* SBUS */
+	{
+		.name = "sbus",
+		.addr_prop_name = "reg",
+		.match = of_bus_sbus_match,
+		.count_cells = of_bus_sbus_count_cells,
+		.map = of_bus_default_map,
+		.get_flags = of_bus_sbus_get_flags,
+	},
+	/* AMBA */
+	{
+		.name = "ambapp",
+		.addr_prop_name = "reg",
+		.match = of_bus_ambapp_match,
+		.count_cells = of_bus_ambapp_count_cells,
+		.map = of_bus_ambapp_map,
+		.get_flags = of_bus_ambapp_get_flags,
+	},
+	/* Default */
+	{
+		.name = "default",
+		.addr_prop_name = "reg",
+		.match = NULL,
+		.count_cells = of_bus_default_count_cells,
+		.map = of_bus_default_map,
+		.get_flags = of_bus_default_get_flags,
+	},
+};
+
+static struct of_bus *of_match_bus(struct device_node *np)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
+		if (!of_busses[i].match || of_busses[i].match(np))
+			return &of_busses[i];
+	BUG();
+	return NULL;
+}
+
+static int __init build_one_resource(struct device_node *parent,
+				     struct of_bus *bus,
+				     struct of_bus *pbus,
+				     u32 *addr,
+				     int na, int ns, int pna)
+{
+	const u32 *ranges;
+	unsigned int rlen;
+	int rone;
+
+	ranges = of_get_property(parent, "ranges", &rlen);
+	if (ranges == NULL || rlen == 0) {
+		u32 result[OF_MAX_ADDR_CELLS];
+		int i;
+
+		memset(result, 0, pna * 4);
+		for (i = 0; i < na; i++)
+			result[pna - 1 - i] =
+				addr[na - 1 - i];
+
+		memcpy(addr, result, pna * 4);
+		return 0;
+	}
+
+	/* Now walk through the ranges */
+	rlen /= 4;
+	rone = na + pna + ns;
+	for (; rlen >= rone; rlen -= rone, ranges += rone) {
+		if (!bus->map(addr, ranges, na, ns, pna))
+			return 0;
+	}
+
+	return 1;
+}
+
+static int __init use_1to1_mapping(struct device_node *pp)
+{
+	/* If we have a ranges property in the parent, use it.  */
+	if (of_find_property(pp, "ranges", NULL) != NULL)
+		return 0;
+
+	/* Some SBUS devices use intermediate nodes to express
+	 * hierarchy within the device itself.  These aren't
+	 * real bus nodes, and don't have a 'ranges' property.
+	 * But, we should still pass the translation work up
+	 * to the SBUS itself.
+	 */
+	if (!strcmp(pp->name, "dma") ||
+	    !strcmp(pp->name, "espdma") ||
+	    !strcmp(pp->name, "ledma") ||
+	    !strcmp(pp->name, "lebuffer"))
+		return 0;
+
+	return 1;
+}
+
+static int of_resource_verbose;
+
+static void __init build_device_resources(struct platform_device *op,
+					  struct device *parent)
+{
+	struct platform_device *p_op;
+	struct of_bus *bus;
+	int na, ns;
+	int index, num_reg;
+	const void *preg;
+
+	if (!parent)
+		return;
+
+	p_op = to_platform_device(parent);
+	bus = of_match_bus(p_op->dev.of_node);
+	bus->count_cells(op->dev.of_node, &na, &ns);
+
+	preg = of_get_property(op->dev.of_node, bus->addr_prop_name, &num_reg);
+	if (!preg || num_reg == 0)
+		return;
+
+	/* Convert to num-cells.  */
+	num_reg /= 4;
+
+	/* Conver to num-entries.  */
+	num_reg /= na + ns;
+
+	op->resource = op->archdata.resource;
+	op->num_resources = num_reg;
+	for (index = 0; index < num_reg; index++) {
+		struct resource *r = &op->resource[index];
+		u32 addr[OF_MAX_ADDR_CELLS];
+		const u32 *reg = (preg + (index * ((na + ns) * 4)));
+		struct device_node *dp = op->dev.of_node;
+		struct device_node *pp = p_op->dev.of_node;
+		struct of_bus *pbus, *dbus;
+		u64 size, result = OF_BAD_ADDR;
+		unsigned long flags;
+		int dna, dns;
+		int pna, pns;
+
+		size = of_read_addr(reg + na, ns);
+
+		memcpy(addr, reg, na * 4);
+
+		flags = bus->get_flags(reg, 0);
+
+		if (use_1to1_mapping(pp)) {
+			result = of_read_addr(addr, na);
+			goto build_res;
+		}
+
+		dna = na;
+		dns = ns;
+		dbus = bus;
+
+		while (1) {
+			dp = pp;
+			pp = dp->parent;
+			if (!pp) {
+				result = of_read_addr(addr, dna);
+				break;
+			}
+
+			pbus = of_match_bus(pp);
+			pbus->count_cells(dp, &pna, &pns);
+
+			if (build_one_resource(dp, dbus, pbus, addr,
+					       dna, dns, pna))
+				break;
+
+			flags = pbus->get_flags(addr, flags);
+
+			dna = pna;
+			dns = pns;
+			dbus = pbus;
+		}
+
+	build_res:
+		memset(r, 0, sizeof(*r));
+
+		if (of_resource_verbose)
+			printk("%s reg[%d] -> %llx\n",
+			       op->dev.of_node->full_name, index,
+			       result);
+
+		if (result != OF_BAD_ADDR) {
+			r->start = result & 0xffffffff;
+			r->end = result + size - 1;
+			r->flags = flags | ((result >> 32ULL) & 0xffUL);
+		}
+		r->name = op->dev.of_node->name;
+	}
+}
+
+static struct platform_device * __init scan_one_device(struct device_node *dp,
+						 struct device *parent)
+{
+	struct platform_device *op = kzalloc(sizeof(*op), GFP_KERNEL);
+	const struct linux_prom_irqs *intr;
+	struct dev_archdata *sd;
+	int len, i;
+
+	if (!op)
+		return NULL;
+
+	sd = &op->dev.archdata;
+	sd->op = op;
+
+	op->dev.of_node = dp;
+
+	intr = of_get_property(dp, "intr", &len);
+	if (intr) {
+		op->archdata.num_irqs = len / sizeof(struct linux_prom_irqs);
+		for (i = 0; i < op->archdata.num_irqs; i++)
+			op->archdata.irqs[i] =
+			    sparc_config.build_device_irq(op, intr[i].pri);
+	} else {
+		const unsigned int *irq =
+			of_get_property(dp, "interrupts", &len);
+
+		if (irq) {
+			op->archdata.num_irqs = len / sizeof(unsigned int);
+			for (i = 0; i < op->archdata.num_irqs; i++)
+				op->archdata.irqs[i] =
+				    sparc_config.build_device_irq(op, irq[i]);
+		} else {
+			op->archdata.num_irqs = 0;
+		}
+	}
+
+	build_device_resources(op, parent);
+
+	op->dev.parent = parent;
+	op->dev.bus = &platform_bus_type;
+	if (!parent)
+		dev_set_name(&op->dev, "root");
+	else
+		dev_set_name(&op->dev, "%08x", dp->phandle);
+
+	op->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	op->dev.dma_mask = &op->dev.coherent_dma_mask;
+
+	if (of_device_register(op)) {
+		printk("%s: Could not register of device.\n",
+		       dp->full_name);
+		kfree(op);
+		op = NULL;
+	}
+
+	return op;
+}
+
+static void __init scan_tree(struct device_node *dp, struct device *parent)
+{
+	while (dp) {
+		struct platform_device *op = scan_one_device(dp, parent);
+
+		if (op)
+			scan_tree(dp->child, &op->dev);
+
+		dp = dp->sibling;
+	}
+}
+
+static int __init scan_of_devices(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct platform_device *parent;
+
+	parent = scan_one_device(root, NULL);
+	if (!parent)
+		return 0;
+
+	scan_tree(root->child, &parent->dev);
+	return 0;
+}
+postcore_initcall(scan_of_devices);
+
+static int __init of_debug(char *str)
+{
+	int val = 0;
+
+	get_option(&str, &val);
+	if (val & 1)
+		of_resource_verbose = 1;
+	return 1;
+}
+
+__setup("of_debug=", of_debug);
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
new file mode 100644
index 0000000..6df6086
--- /dev/null
+++ b/arch/sparc/kernel/of_device_64.c
@@ -0,0 +1,730 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <asm/spitfire.h>
+
+#include "of_device_common.h"
+
+void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name)
+{
+	unsigned long ret = res->start + offset;
+	struct resource *r;
+
+	if (res->flags & IORESOURCE_MEM)
+		r = request_mem_region(ret, size, name);
+	else
+		r = request_region(ret, size, name);
+	if (!r)
+		ret = 0;
+
+	return (void __iomem *) ret;
+}
+EXPORT_SYMBOL(of_ioremap);
+
+void of_iounmap(struct resource *res, void __iomem *base, unsigned long size)
+{
+	if (res->flags & IORESOURCE_MEM)
+		release_mem_region((unsigned long) base, size);
+	else
+		release_region((unsigned long) base, size);
+}
+EXPORT_SYMBOL(of_iounmap);
+
+/*
+ * PCI bus specific translator
+ */
+
+static int of_bus_pci_match(struct device_node *np)
+{
+	if (!strcmp(np->name, "pci")) {
+		const char *model = of_get_property(np, "model", NULL);
+
+		if (model && !strcmp(model, "SUNW,simba"))
+			return 0;
+
+		/* Do not do PCI specific frobbing if the
+		 * PCI bridge lacks a ranges property.  We
+		 * want to pass it through up to the next
+		 * parent as-is, not with the PCI translate
+		 * method which chops off the top address cell.
+		 */
+		if (!of_find_property(np, "ranges", NULL))
+			return 0;
+
+		return 1;
+	}
+
+	return 0;
+}
+
+static int of_bus_simba_match(struct device_node *np)
+{
+	const char *model = of_get_property(np, "model", NULL);
+
+	if (model && !strcmp(model, "SUNW,simba"))
+		return 1;
+
+	/* Treat PCI busses lacking ranges property just like
+	 * simba.
+	 */
+	if (!strcmp(np->name, "pci")) {
+		if (!of_find_property(np, "ranges", NULL))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int of_bus_simba_map(u32 *addr, const u32 *range,
+			    int na, int ns, int pna)
+{
+	return 0;
+}
+
+static void of_bus_pci_count_cells(struct device_node *np,
+				   int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 3;
+	if (sizec)
+		*sizec = 2;
+}
+
+static int of_bus_pci_map(u32 *addr, const u32 *range,
+			  int na, int ns, int pna)
+{
+	u32 result[OF_MAX_ADDR_CELLS];
+	int i;
+
+	/* Check address type match */
+	if (!((addr[0] ^ range[0]) & 0x03000000))
+		goto type_match;
+
+	/* Special exception, we can map a 64-bit address into
+	 * a 32-bit range.
+	 */
+	if ((addr[0] & 0x03000000) == 0x03000000 &&
+	    (range[0] & 0x03000000) == 0x02000000)
+		goto type_match;
+
+	return -EINVAL;
+
+type_match:
+	if (of_out_of_range(addr + 1, range + 1, range + na + pna,
+			    na - 1, ns))
+		return -EINVAL;
+
+	/* Start with the parent range base.  */
+	memcpy(result, range + na, pna * 4);
+
+	/* Add in the child address offset, skipping high cell.  */
+	for (i = 0; i < na - 1; i++)
+		result[pna - 1 - i] +=
+			(addr[na - 1 - i] -
+			 range[na - 1 - i]);
+
+	memcpy(addr, result, pna * 4);
+
+	return 0;
+}
+
+static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
+{
+	u32 w = addr[0];
+
+	/* For PCI, we override whatever child busses may have used.  */
+	flags = 0;
+	switch((w >> 24) & 0x03) {
+	case 0x01:
+		flags |= IORESOURCE_IO;
+		break;
+
+	case 0x02: /* 32 bits */
+	case 0x03: /* 64 bits */
+		flags |= IORESOURCE_MEM;
+		break;
+	}
+	if (w & 0x40000000)
+		flags |= IORESOURCE_PREFETCH;
+	return flags;
+}
+
+/*
+ * FHC/Central bus specific translator.
+ *
+ * This is just needed to hard-code the address and size cell
+ * counts.  'fhc' and 'central' nodes lack the #address-cells and
+ * #size-cells properties, and if you walk to the root on such
+ * Enterprise boxes all you'll get is a #size-cells of 2 which is
+ * not what we want to use.
+ */
+static int of_bus_fhc_match(struct device_node *np)
+{
+	return !strcmp(np->name, "fhc") ||
+		!strcmp(np->name, "central");
+}
+
+#define of_bus_fhc_count_cells of_bus_sbus_count_cells
+
+/*
+ * Array of bus specific translators
+ */
+
+static struct of_bus of_busses[] = {
+	/* PCI */
+	{
+		.name = "pci",
+		.addr_prop_name = "assigned-addresses",
+		.match = of_bus_pci_match,
+		.count_cells = of_bus_pci_count_cells,
+		.map = of_bus_pci_map,
+		.get_flags = of_bus_pci_get_flags,
+	},
+	/* SIMBA */
+	{
+		.name = "simba",
+		.addr_prop_name = "assigned-addresses",
+		.match = of_bus_simba_match,
+		.count_cells = of_bus_pci_count_cells,
+		.map = of_bus_simba_map,
+		.get_flags = of_bus_pci_get_flags,
+	},
+	/* SBUS */
+	{
+		.name = "sbus",
+		.addr_prop_name = "reg",
+		.match = of_bus_sbus_match,
+		.count_cells = of_bus_sbus_count_cells,
+		.map = of_bus_default_map,
+		.get_flags = of_bus_default_get_flags,
+	},
+	/* FHC */
+	{
+		.name = "fhc",
+		.addr_prop_name = "reg",
+		.match = of_bus_fhc_match,
+		.count_cells = of_bus_fhc_count_cells,
+		.map = of_bus_default_map,
+		.get_flags = of_bus_default_get_flags,
+	},
+	/* Default */
+	{
+		.name = "default",
+		.addr_prop_name = "reg",
+		.match = NULL,
+		.count_cells = of_bus_default_count_cells,
+		.map = of_bus_default_map,
+		.get_flags = of_bus_default_get_flags,
+	},
+};
+
+static struct of_bus *of_match_bus(struct device_node *np)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
+		if (!of_busses[i].match || of_busses[i].match(np))
+			return &of_busses[i];
+	BUG();
+	return NULL;
+}
+
+static int __init build_one_resource(struct device_node *parent,
+				     struct of_bus *bus,
+				     struct of_bus *pbus,
+				     u32 *addr,
+				     int na, int ns, int pna)
+{
+	const u32 *ranges;
+	int rone, rlen;
+
+	ranges = of_get_property(parent, "ranges", &rlen);
+	if (ranges == NULL || rlen == 0) {
+		u32 result[OF_MAX_ADDR_CELLS];
+		int i;
+
+		memset(result, 0, pna * 4);
+		for (i = 0; i < na; i++)
+			result[pna - 1 - i] =
+				addr[na - 1 - i];
+
+		memcpy(addr, result, pna * 4);
+		return 0;
+	}
+
+	/* Now walk through the ranges */
+	rlen /= 4;
+	rone = na + pna + ns;
+	for (; rlen >= rone; rlen -= rone, ranges += rone) {
+		if (!bus->map(addr, ranges, na, ns, pna))
+			return 0;
+	}
+
+	/* When we miss an I/O space match on PCI, just pass it up
+	 * to the next PCI bridge and/or controller.
+	 */
+	if (!strcmp(bus->name, "pci") &&
+	    (addr[0] & 0x03000000) == 0x01000000)
+		return 0;
+
+	return 1;
+}
+
+static int __init use_1to1_mapping(struct device_node *pp)
+{
+	/* If we have a ranges property in the parent, use it.  */
+	if (of_find_property(pp, "ranges", NULL) != NULL)
+		return 0;
+
+	/* If the parent is the dma node of an ISA bus, pass
+	 * the translation up to the root.
+	 *
+	 * Some SBUS devices use intermediate nodes to express
+	 * hierarchy within the device itself.  These aren't
+	 * real bus nodes, and don't have a 'ranges' property.
+	 * But, we should still pass the translation work up
+	 * to the SBUS itself.
+	 */
+	if (!strcmp(pp->name, "dma") ||
+	    !strcmp(pp->name, "espdma") ||
+	    !strcmp(pp->name, "ledma") ||
+	    !strcmp(pp->name, "lebuffer"))
+		return 0;
+
+	/* Similarly for all PCI bridges, if we get this far
+	 * it lacks a ranges property, and this will include
+	 * cases like Simba.
+	 */
+	if (!strcmp(pp->name, "pci"))
+		return 0;
+
+	return 1;
+}
+
+static int of_resource_verbose;
+
+static void __init build_device_resources(struct platform_device *op,
+					  struct device *parent)
+{
+	struct platform_device *p_op;
+	struct of_bus *bus;
+	int na, ns;
+	int index, num_reg;
+	const void *preg;
+
+	if (!parent)
+		return;
+
+	p_op = to_platform_device(parent);
+	bus = of_match_bus(p_op->dev.of_node);
+	bus->count_cells(op->dev.of_node, &na, &ns);
+
+	preg = of_get_property(op->dev.of_node, bus->addr_prop_name, &num_reg);
+	if (!preg || num_reg == 0)
+		return;
+
+	/* Convert to num-cells.  */
+	num_reg /= 4;
+
+	/* Convert to num-entries.  */
+	num_reg /= na + ns;
+
+	/* Prevent overrunning the op->resources[] array.  */
+	if (num_reg > PROMREG_MAX) {
+		printk(KERN_WARNING "%s: Too many regs (%d), "
+		       "limiting to %d.\n",
+		       op->dev.of_node->full_name, num_reg, PROMREG_MAX);
+		num_reg = PROMREG_MAX;
+	}
+
+	op->resource = op->archdata.resource;
+	op->num_resources = num_reg;
+	for (index = 0; index < num_reg; index++) {
+		struct resource *r = &op->resource[index];
+		u32 addr[OF_MAX_ADDR_CELLS];
+		const u32 *reg = (preg + (index * ((na + ns) * 4)));
+		struct device_node *dp = op->dev.of_node;
+		struct device_node *pp = p_op->dev.of_node;
+		struct of_bus *pbus, *dbus;
+		u64 size, result = OF_BAD_ADDR;
+		unsigned long flags;
+		int dna, dns;
+		int pna, pns;
+
+		size = of_read_addr(reg + na, ns);
+		memcpy(addr, reg, na * 4);
+
+		flags = bus->get_flags(addr, 0);
+
+		if (use_1to1_mapping(pp)) {
+			result = of_read_addr(addr, na);
+			goto build_res;
+		}
+
+		dna = na;
+		dns = ns;
+		dbus = bus;
+
+		while (1) {
+			dp = pp;
+			pp = dp->parent;
+			if (!pp) {
+				result = of_read_addr(addr, dna);
+				break;
+			}
+
+			pbus = of_match_bus(pp);
+			pbus->count_cells(dp, &pna, &pns);
+
+			if (build_one_resource(dp, dbus, pbus, addr,
+					       dna, dns, pna))
+				break;
+
+			flags = pbus->get_flags(addr, flags);
+
+			dna = pna;
+			dns = pns;
+			dbus = pbus;
+		}
+
+	build_res:
+		memset(r, 0, sizeof(*r));
+
+		if (of_resource_verbose)
+			printk("%s reg[%d] -> %llx\n",
+			       op->dev.of_node->full_name, index,
+			       result);
+
+		if (result != OF_BAD_ADDR) {
+			if (tlb_type == hypervisor)
+				result &= 0x0fffffffffffffffUL;
+
+			r->start = result;
+			r->end = result + size - 1;
+			r->flags = flags;
+		}
+		r->name = op->dev.of_node->name;
+	}
+}
+
+static struct device_node * __init
+apply_interrupt_map(struct device_node *dp, struct device_node *pp,
+		    const u32 *imap, int imlen, const u32 *imask,
+		    unsigned int *irq_p)
+{
+	struct device_node *cp;
+	unsigned int irq = *irq_p;
+	struct of_bus *bus;
+	phandle handle;
+	const u32 *reg;
+	int na, num_reg, i;
+
+	bus = of_match_bus(pp);
+	bus->count_cells(dp, &na, NULL);
+
+	reg = of_get_property(dp, "reg", &num_reg);
+	if (!reg || !num_reg)
+		return NULL;
+
+	imlen /= ((na + 3) * 4);
+	handle = 0;
+	for (i = 0; i < imlen; i++) {
+		int j;
+
+		for (j = 0; j < na; j++) {
+			if ((reg[j] & imask[j]) != imap[j])
+				goto next;
+		}
+		if (imap[na] == irq) {
+			handle = imap[na + 1];
+			irq = imap[na + 2];
+			break;
+		}
+
+	next:
+		imap += (na + 3);
+	}
+	if (i == imlen) {
+		/* Psycho and Sabre PCI controllers can have 'interrupt-map'
+		 * properties that do not include the on-board device
+		 * interrupts.  Instead, the device's 'interrupts' property
+		 * is already a fully specified INO value.
+		 *
+		 * Handle this by deciding that, if we didn't get a
+		 * match in the parent's 'interrupt-map', and the
+		 * parent is an IRQ translator, then use the parent as
+		 * our IRQ controller.
+		 */
+		if (pp->irq_trans)
+			return pp;
+
+		return NULL;
+	}
+
+	*irq_p = irq;
+	cp = of_find_node_by_phandle(handle);
+
+	return cp;
+}
+
+static unsigned int __init pci_irq_swizzle(struct device_node *dp,
+					   struct device_node *pp,
+					   unsigned int irq)
+{
+	const struct linux_prom_pci_registers *regs;
+	unsigned int bus, devfn, slot, ret;
+
+	if (irq < 1 || irq > 4)
+		return irq;
+
+	regs = of_get_property(dp, "reg", NULL);
+	if (!regs)
+		return irq;
+
+	bus = (regs->phys_hi >> 16) & 0xff;
+	devfn = (regs->phys_hi >> 8) & 0xff;
+	slot = (devfn >> 3) & 0x1f;
+
+	if (pp->irq_trans) {
+		/* Derived from Table 8-3, U2P User's Manual.  This branch
+		 * is handling a PCI controller that lacks a proper set of
+		 * interrupt-map and interrupt-map-mask properties.  The
+		 * Ultra-E450 is one example.
+		 *
+		 * The bit layout is BSSLL, where:
+		 * B: 0 on bus A, 1 on bus B
+		 * D: 2-bit slot number, derived from PCI device number as
+		 *    (dev - 1) for bus A, or (dev - 2) for bus B
+		 * L: 2-bit line number
+		 */
+		if (bus & 0x80) {
+			/* PBM-A */
+			bus  = 0x00;
+			slot = (slot - 1) << 2;
+		} else {
+			/* PBM-B */
+			bus  = 0x10;
+			slot = (slot - 2) << 2;
+		}
+		irq -= 1;
+
+		ret = (bus | slot | irq);
+	} else {
+		/* Going through a PCI-PCI bridge that lacks a set of
+		 * interrupt-map and interrupt-map-mask properties.
+		 */
+		ret = ((irq - 1 + (slot & 3)) & 3) + 1;
+	}
+
+	return ret;
+}
+
+static int of_irq_verbose;
+
+static unsigned int __init build_one_device_irq(struct platform_device *op,
+						struct device *parent,
+						unsigned int irq)
+{
+	struct device_node *dp = op->dev.of_node;
+	struct device_node *pp, *ip;
+	unsigned int orig_irq = irq;
+	int nid;
+
+	if (irq == 0xffffffff)
+		return irq;
+
+	if (dp->irq_trans) {
+		irq = dp->irq_trans->irq_build(dp, irq,
+					       dp->irq_trans->data);
+
+		if (of_irq_verbose)
+			printk("%s: direct translate %x --> %x\n",
+			       dp->full_name, orig_irq, irq);
+
+		goto out;
+	}
+
+	/* Something more complicated.  Walk up to the root, applying
+	 * interrupt-map or bus specific translations, until we hit
+	 * an IRQ translator.
+	 *
+	 * If we hit a bus type or situation we cannot handle, we
+	 * stop and assume that the original IRQ number was in a
+	 * format which has special meaning to it's immediate parent.
+	 */
+	pp = dp->parent;
+	ip = NULL;
+	while (pp) {
+		const void *imap, *imsk;
+		int imlen;
+
+		imap = of_get_property(pp, "interrupt-map", &imlen);
+		imsk = of_get_property(pp, "interrupt-map-mask", NULL);
+		if (imap && imsk) {
+			struct device_node *iret;
+			int this_orig_irq = irq;
+
+			iret = apply_interrupt_map(dp, pp,
+						   imap, imlen, imsk,
+						   &irq);
+
+			if (of_irq_verbose)
+				printk("%s: Apply [%s:%x] imap --> [%s:%x]\n",
+				       op->dev.of_node->full_name,
+				       pp->full_name, this_orig_irq,
+				       of_node_full_name(iret), irq);
+
+			if (!iret)
+				break;
+
+			if (iret->irq_trans) {
+				ip = iret;
+				break;
+			}
+		} else {
+			if (!strcmp(pp->name, "pci")) {
+				unsigned int this_orig_irq = irq;
+
+				irq = pci_irq_swizzle(dp, pp, irq);
+				if (of_irq_verbose)
+					printk("%s: PCI swizzle [%s] "
+					       "%x --> %x\n",
+					       op->dev.of_node->full_name,
+					       pp->full_name, this_orig_irq,
+					       irq);
+
+			}
+
+			if (pp->irq_trans) {
+				ip = pp;
+				break;
+			}
+		}
+		dp = pp;
+		pp = pp->parent;
+	}
+	if (!ip)
+		return orig_irq;
+
+	irq = ip->irq_trans->irq_build(op->dev.of_node, irq,
+				       ip->irq_trans->data);
+	if (of_irq_verbose)
+		printk("%s: Apply IRQ trans [%s] %x --> %x\n",
+		      op->dev.of_node->full_name, ip->full_name, orig_irq, irq);
+
+out:
+	nid = of_node_to_nid(dp);
+	if (nid != -1) {
+		cpumask_t numa_mask;
+
+		cpumask_copy(&numa_mask, cpumask_of_node(nid));
+		irq_set_affinity(irq, &numa_mask);
+	}
+
+	return irq;
+}
+
+static struct platform_device * __init scan_one_device(struct device_node *dp,
+						 struct device *parent)
+{
+	struct platform_device *op = kzalloc(sizeof(*op), GFP_KERNEL);
+	const unsigned int *irq;
+	struct dev_archdata *sd;
+	int len, i;
+
+	if (!op)
+		return NULL;
+
+	sd = &op->dev.archdata;
+	sd->op = op;
+
+	op->dev.of_node = dp;
+
+	irq = of_get_property(dp, "interrupts", &len);
+	if (irq) {
+		op->archdata.num_irqs = len / 4;
+
+		/* Prevent overrunning the op->irqs[] array.  */
+		if (op->archdata.num_irqs > PROMINTR_MAX) {
+			printk(KERN_WARNING "%s: Too many irqs (%d), "
+			       "limiting to %d.\n",
+			       dp->full_name, op->archdata.num_irqs, PROMINTR_MAX);
+			op->archdata.num_irqs = PROMINTR_MAX;
+		}
+		memcpy(op->archdata.irqs, irq, op->archdata.num_irqs * 4);
+	} else {
+		op->archdata.num_irqs = 0;
+	}
+
+	build_device_resources(op, parent);
+	for (i = 0; i < op->archdata.num_irqs; i++)
+		op->archdata.irqs[i] = build_one_device_irq(op, parent, op->archdata.irqs[i]);
+
+	op->dev.parent = parent;
+	op->dev.bus = &platform_bus_type;
+	if (!parent)
+		dev_set_name(&op->dev, "root");
+	else
+		dev_set_name(&op->dev, "%08x", dp->phandle);
+	op->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	op->dev.dma_mask = &op->dev.coherent_dma_mask;
+
+	if (of_device_register(op)) {
+		printk("%s: Could not register of device.\n",
+		       dp->full_name);
+		kfree(op);
+		op = NULL;
+	}
+
+	return op;
+}
+
+static void __init scan_tree(struct device_node *dp, struct device *parent)
+{
+	while (dp) {
+		struct platform_device *op = scan_one_device(dp, parent);
+
+		if (op)
+			scan_tree(dp->child, &op->dev);
+
+		dp = dp->sibling;
+	}
+}
+
+static int __init scan_of_devices(void)
+{
+	struct device_node *root = of_find_node_by_path("/");
+	struct platform_device *parent;
+
+	parent = scan_one_device(root, NULL);
+	if (!parent)
+		return 0;
+
+	scan_tree(root->child, &parent->dev);
+	return 0;
+}
+postcore_initcall(scan_of_devices);
+
+static int __init of_debug(char *str)
+{
+	int val = 0;
+
+	get_option(&str, &val);
+	if (val & 1)
+		of_resource_verbose = 1;
+	if (val & 2)
+		of_irq_verbose = 1;
+	return 1;
+}
+
+__setup("of_debug=", of_debug);
diff --git a/arch/sparc/kernel/of_device_common.c b/arch/sparc/kernel/of_device_common.c
new file mode 100644
index 0000000..de0ee39
--- /dev/null
+++ b/arch/sparc/kernel/of_device_common.c
@@ -0,0 +1,178 @@
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/export.h>
+#include <linux/mod_devicetable.h>
+#include <linux/errno.h>
+#include <linux/irq.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+
+#include "of_device_common.h"
+
+unsigned int irq_of_parse_and_map(struct device_node *node, int index)
+{
+	struct platform_device *op = of_find_device_by_node(node);
+
+	if (!op || index >= op->archdata.num_irqs)
+		return 0;
+
+	return op->archdata.irqs[index];
+}
+EXPORT_SYMBOL(irq_of_parse_and_map);
+
+int of_address_to_resource(struct device_node *node, int index,
+			   struct resource *r)
+{
+	struct platform_device *op = of_find_device_by_node(node);
+
+	if (!op || index >= op->num_resources)
+		return -EINVAL;
+
+	memcpy(r, &op->archdata.resource[index], sizeof(*r));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_address_to_resource);
+
+void __iomem *of_iomap(struct device_node *node, int index)
+{
+	struct platform_device *op = of_find_device_by_node(node);
+	struct resource *r;
+
+	if (!op || index >= op->num_resources)
+		return NULL;
+
+	r = &op->archdata.resource[index];
+
+	return of_ioremap(r, 0, resource_size(r), (char *) r->name);
+}
+EXPORT_SYMBOL(of_iomap);
+
+/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
+ * BUS and propagate to all child platform_device objects.
+ */
+void of_propagate_archdata(struct platform_device *bus)
+{
+	struct dev_archdata *bus_sd = &bus->dev.archdata;
+	struct device_node *bus_dp = bus->dev.of_node;
+	struct device_node *dp;
+
+	for (dp = bus_dp->child; dp; dp = dp->sibling) {
+		struct platform_device *op = of_find_device_by_node(dp);
+
+		op->dev.archdata.iommu = bus_sd->iommu;
+		op->dev.archdata.stc = bus_sd->stc;
+		op->dev.archdata.host_controller = bus_sd->host_controller;
+		op->dev.archdata.numa_node = bus_sd->numa_node;
+
+		if (dp->child)
+			of_propagate_archdata(op);
+	}
+}
+
+static void get_cells(struct device_node *dp, int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = of_n_addr_cells(dp);
+	if (sizec)
+		*sizec = of_n_size_cells(dp);
+}
+
+/*
+ * Default translator (generic bus)
+ */
+
+void of_bus_default_count_cells(struct device_node *dev, int *addrc, int *sizec)
+{
+	get_cells(dev, addrc, sizec);
+}
+
+/* Make sure the least significant 64-bits are in-range.  Even
+ * for 3 or 4 cell values it is a good enough approximation.
+ */
+int of_out_of_range(const u32 *addr, const u32 *base,
+		    const u32 *size, int na, int ns)
+{
+	u64 a = of_read_addr(addr, na);
+	u64 b = of_read_addr(base, na);
+
+	if (a < b)
+		return 1;
+
+	b += of_read_addr(size, ns);
+	if (a >= b)
+		return 1;
+
+	return 0;
+}
+
+int of_bus_default_map(u32 *addr, const u32 *range, int na, int ns, int pna)
+{
+	u32 result[OF_MAX_ADDR_CELLS];
+	int i;
+
+	if (ns > 2) {
+		printk("of_device: Cannot handle size cells (%d) > 2.", ns);
+		return -EINVAL;
+	}
+
+	if (of_out_of_range(addr, range, range + na + pna, na, ns))
+		return -EINVAL;
+
+	/* Start with the parent range base.  */
+	memcpy(result, range + na, pna * 4);
+
+	/* Add in the child address offset.  */
+	for (i = 0; i < na; i++)
+		result[pna - 1 - i] +=
+			(addr[na - 1 - i] -
+			 range[na - 1 - i]);
+
+	memcpy(addr, result, pna * 4);
+
+	return 0;
+}
+
+unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
+{
+	if (flags)
+		return flags;
+	return IORESOURCE_MEM;
+}
+
+/*
+ * SBUS bus specific translator
+ */
+
+int of_bus_sbus_match(struct device_node *np)
+{
+	struct device_node *dp = np;
+
+	while (dp) {
+		if (!strcmp(dp->name, "sbus") ||
+		    !strcmp(dp->name, "sbi"))
+			return 1;
+
+		/* Have a look at use_1to1_mapping().  We're trying
+		 * to match SBUS if that's the top-level bus and we
+		 * don't have some intervening real bus that provides
+		 * ranges based translations.
+		 */
+		if (of_find_property(dp, "ranges", NULL) != NULL)
+			break;
+
+		dp = dp->parent;
+	}
+
+	return 0;
+}
+
+void of_bus_sbus_count_cells(struct device_node *child, int *addrc, int *sizec)
+{
+	if (addrc)
+		*addrc = 2;
+	if (sizec)
+		*sizec = 1;
+}
diff --git a/arch/sparc/kernel/of_device_common.h b/arch/sparc/kernel/of_device_common.h
new file mode 100644
index 0000000..3d66230
--- /dev/null
+++ b/arch/sparc/kernel/of_device_common.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _OF_DEVICE_COMMON_H
+#define _OF_DEVICE_COMMON_H
+
+static inline u64 of_read_addr(const u32 *cell, int size)
+{
+	u64 r = 0;
+	while (size--)
+		r = (r << 32) | *(cell++);
+	return r;
+}
+
+void of_bus_default_count_cells(struct device_node *dev, int *addrc,
+				int *sizec);
+int of_out_of_range(const u32 *addr, const u32 *base,
+		    const u32 *size, int na, int ns);
+int of_bus_default_map(u32 *addr, const u32 *range, int na, int ns, int pna);
+unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags);
+
+int of_bus_sbus_match(struct device_node *np);
+void of_bus_sbus_count_cells(struct device_node *child, int *addrc, int *sizec);
+
+/* Max address size we deal with */
+#define OF_MAX_ADDR_CELLS	4
+
+struct of_bus {
+	const char	*name;
+	const char	*addr_prop_name;
+	int		(*match)(struct device_node *parent);
+	void		(*count_cells)(struct device_node *child,
+				       int *addrc, int *sizec);
+	int		(*map)(u32 *addr, const u32 *range,
+			       int na, int ns, int pna);
+	unsigned long	(*get_flags)(const u32 *addr, unsigned long);
+};
+
+#endif /* _OF_DEVICE_COMMON_H */
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
new file mode 100644
index 0000000..17ea16a
--- /dev/null
+++ b/arch/sparc/kernel/pci.c
@@ -0,0 +1,1175 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci.c: UltraSparc PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1998, 1999 Eddie C. Dost   (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek   (jj@ultra.linux.cz)
+ *
+ * OF tree based PCI bus probing taken from the PowerPC port
+ * with minor modifications, see there for credits.
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <linux/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/apb.h>
+
+#include "pci_impl.h"
+#include "kernel.h"
+
+/* List of all PCI controllers found in the system. */
+struct pci_pbm_info *pci_pbm_root = NULL;
+
+/* Each PBM found gets a unique index. */
+int pci_num_pbms = 0;
+
+volatile int pci_poke_in_progress;
+volatile int pci_poke_cpu = -1;
+volatile int pci_poke_faulted;
+
+static DEFINE_SPINLOCK(pci_poke_lock);
+
+void pci_config_read8(u8 *addr, u8 *ret)
+{
+	unsigned long flags;
+	u8 byte;
+
+	spin_lock_irqsave(&pci_poke_lock, flags);
+	pci_poke_cpu = smp_processor_id();
+	pci_poke_in_progress = 1;
+	pci_poke_faulted = 0;
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "lduba [%1] %2, %0\n\t"
+			     "membar #Sync"
+			     : "=r" (byte)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+	pci_poke_in_progress = 0;
+	pci_poke_cpu = -1;
+	if (!pci_poke_faulted)
+		*ret = byte;
+	spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_read16(u16 *addr, u16 *ret)
+{
+	unsigned long flags;
+	u16 word;
+
+	spin_lock_irqsave(&pci_poke_lock, flags);
+	pci_poke_cpu = smp_processor_id();
+	pci_poke_in_progress = 1;
+	pci_poke_faulted = 0;
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "lduha [%1] %2, %0\n\t"
+			     "membar #Sync"
+			     : "=r" (word)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+	pci_poke_in_progress = 0;
+	pci_poke_cpu = -1;
+	if (!pci_poke_faulted)
+		*ret = word;
+	spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_read32(u32 *addr, u32 *ret)
+{
+	unsigned long flags;
+	u32 dword;
+
+	spin_lock_irqsave(&pci_poke_lock, flags);
+	pci_poke_cpu = smp_processor_id();
+	pci_poke_in_progress = 1;
+	pci_poke_faulted = 0;
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "lduwa [%1] %2, %0\n\t"
+			     "membar #Sync"
+			     : "=r" (dword)
+			     : "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+	pci_poke_in_progress = 0;
+	pci_poke_cpu = -1;
+	if (!pci_poke_faulted)
+		*ret = dword;
+	spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write8(u8 *addr, u8 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_poke_lock, flags);
+	pci_poke_cpu = smp_processor_id();
+	pci_poke_in_progress = 1;
+	pci_poke_faulted = 0;
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "stba %0, [%1] %2\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+	pci_poke_in_progress = 0;
+	pci_poke_cpu = -1;
+	spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write16(u16 *addr, u16 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_poke_lock, flags);
+	pci_poke_cpu = smp_processor_id();
+	pci_poke_in_progress = 1;
+	pci_poke_faulted = 0;
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "stha %0, [%1] %2\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+	pci_poke_in_progress = 0;
+	pci_poke_cpu = -1;
+	spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+void pci_config_write32(u32 *addr, u32 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pci_poke_lock, flags);
+	pci_poke_cpu = smp_processor_id();
+	pci_poke_in_progress = 1;
+	pci_poke_faulted = 0;
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "stwa %0, [%1] %2\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (val), "r" (addr), "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+	pci_poke_in_progress = 0;
+	pci_poke_cpu = -1;
+	spin_unlock_irqrestore(&pci_poke_lock, flags);
+}
+
+static int ofpci_verbose;
+
+static int __init ofpci_debug(char *str)
+{
+	int val = 0;
+
+	get_option(&str, &val);
+	if (val)
+		ofpci_verbose = 1;
+	return 1;
+}
+
+__setup("ofpci_debug=", ofpci_debug);
+
+static unsigned long pci_parse_of_flags(u32 addr0)
+{
+	unsigned long flags = 0;
+
+	if (addr0 & 0x02000000) {
+		flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
+		flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
+		if (addr0 & 0x01000000)
+			flags |= IORESOURCE_MEM_64
+				 | PCI_BASE_ADDRESS_MEM_TYPE_64;
+		if (addr0 & 0x40000000)
+			flags |= IORESOURCE_PREFETCH
+				 | PCI_BASE_ADDRESS_MEM_PREFETCH;
+	} else if (addr0 & 0x01000000)
+		flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+	return flags;
+}
+
+/* The of_device layer has translated all of the assigned-address properties
+ * into physical address resources, we only have to figure out the register
+ * mapping.
+ */
+static void pci_parse_of_addrs(struct platform_device *op,
+			       struct device_node *node,
+			       struct pci_dev *dev)
+{
+	struct resource *op_res;
+	const u32 *addrs;
+	int proplen;
+
+	addrs = of_get_property(node, "assigned-addresses", &proplen);
+	if (!addrs)
+		return;
+	if (ofpci_verbose)
+		pci_info(dev, "    parse addresses (%d bytes) @ %p\n",
+			 proplen, addrs);
+	op_res = &op->resource[0];
+	for (; proplen >= 20; proplen -= 20, addrs += 5, op_res++) {
+		struct resource *res;
+		unsigned long flags;
+		int i;
+
+		flags = pci_parse_of_flags(addrs[0]);
+		if (!flags)
+			continue;
+		i = addrs[0] & 0xff;
+		if (ofpci_verbose)
+			pci_info(dev, "  start: %llx, end: %llx, i: %x\n",
+				 op_res->start, op_res->end, i);
+
+		if (PCI_BASE_ADDRESS_0 <= i && i <= PCI_BASE_ADDRESS_5) {
+			res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
+		} else if (i == dev->rom_base_reg) {
+			res = &dev->resource[PCI_ROM_RESOURCE];
+			flags |= IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
+		} else {
+			pci_err(dev, "bad cfg reg num 0x%x\n", i);
+			continue;
+		}
+		res->start = op_res->start;
+		res->end = op_res->end;
+		res->flags = flags;
+		res->name = pci_name(dev);
+
+		pci_info(dev, "reg 0x%x: %pR\n", i, res);
+	}
+}
+
+static void pci_init_dev_archdata(struct dev_archdata *sd, void *iommu,
+				  void *stc, void *host_controller,
+				  struct platform_device  *op,
+				  int numa_node)
+{
+	sd->iommu = iommu;
+	sd->stc = stc;
+	sd->host_controller = host_controller;
+	sd->op = op;
+	sd->numa_node = numa_node;
+}
+
+static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm,
+					 struct device_node *node,
+					 struct pci_bus *bus, int devfn)
+{
+	struct dev_archdata *sd;
+	struct platform_device *op;
+	struct pci_dev *dev;
+	const char *type;
+	u32 class;
+
+	dev = pci_alloc_dev(bus);
+	if (!dev)
+		return NULL;
+
+	op = of_find_device_by_node(node);
+	sd = &dev->dev.archdata;
+	pci_init_dev_archdata(sd, pbm->iommu, &pbm->stc, pbm, op,
+			      pbm->numa_node);
+	sd = &op->dev.archdata;
+	sd->iommu = pbm->iommu;
+	sd->stc = &pbm->stc;
+	sd->numa_node = pbm->numa_node;
+
+	if (!strcmp(node->name, "ebus"))
+		of_propagate_archdata(op);
+
+	type = of_get_property(node, "device_type", NULL);
+	if (type == NULL)
+		type = "";
+
+	if (ofpci_verbose)
+		pci_info(bus,"    create device, devfn: %x, type: %s\n",
+			 devfn, type);
+
+	dev->sysdata = node;
+	dev->dev.parent = bus->bridge;
+	dev->dev.bus = &pci_bus_type;
+	dev->dev.of_node = of_node_get(node);
+	dev->devfn = devfn;
+	dev->multifunction = 0;		/* maybe a lie? */
+	set_pcie_port_type(dev);
+
+	pci_dev_assign_slot(dev);
+	dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff);
+	dev->device = of_getintprop_default(node, "device-id", 0xffff);
+	dev->subsystem_vendor =
+		of_getintprop_default(node, "subsystem-vendor-id", 0);
+	dev->subsystem_device =
+		of_getintprop_default(node, "subsystem-id", 0);
+
+	dev->cfg_size = pci_cfg_space_size(dev);
+
+	/* We can't actually use the firmware value, we have
+	 * to read what is in the register right now.  One
+	 * reason is that in the case of IDE interfaces the
+	 * firmware can sample the value before the the IDE
+	 * interface is programmed into native mode.
+	 */
+	pci_read_config_dword(dev, PCI_CLASS_REVISION, &class);
+	dev->class = class >> 8;
+	dev->revision = class & 0xff;
+
+	dev_set_name(&dev->dev, "%04x:%02x:%02x.%d", pci_domain_nr(bus),
+		dev->bus->number, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+	/* I have seen IDE devices which will not respond to
+	 * the bmdma simplex check reads if bus mastering is
+	 * disabled.
+	 */
+	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)
+		pci_set_master(dev);
+
+	dev->current_state = PCI_UNKNOWN;	/* unknown power state */
+	dev->error_state = pci_channel_io_normal;
+	dev->dma_mask = 0xffffffff;
+
+	if (!strcmp(node->name, "pci")) {
+		/* a PCI-PCI bridge */
+		dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
+		dev->rom_base_reg = PCI_ROM_ADDRESS1;
+	} else if (!strcmp(type, "cardbus")) {
+		dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
+	} else {
+		dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
+		dev->rom_base_reg = PCI_ROM_ADDRESS;
+
+		dev->irq = sd->op->archdata.irqs[0];
+		if (dev->irq == 0xffffffff)
+			dev->irq = PCI_IRQ_NONE;
+	}
+
+	pci_info(dev, "[%04x:%04x] type %02x class %#08x\n",
+		 dev->vendor, dev->device, dev->hdr_type, dev->class);
+
+	pci_parse_of_addrs(sd->op, node, dev);
+
+	if (ofpci_verbose)
+		pci_info(dev, "    adding to system ...\n");
+
+	pci_device_add(dev, bus);
+
+	return dev;
+}
+
+static void apb_calc_first_last(u8 map, u32 *first_p, u32 *last_p)
+{
+	u32 idx, first, last;
+
+	first = 8;
+	last = 0;
+	for (idx = 0; idx < 8; idx++) {
+		if ((map & (1 << idx)) != 0) {
+			if (first > idx)
+				first = idx;
+			if (last < idx)
+				last = idx;
+		}
+	}
+
+	*first_p = first;
+	*last_p = last;
+}
+
+/* Cook up fake bus resources for SUNW,simba PCI bridges which lack
+ * a proper 'ranges' property.
+ */
+static void apb_fake_ranges(struct pci_dev *dev,
+			    struct pci_bus *bus,
+			    struct pci_pbm_info *pbm)
+{
+	struct pci_bus_region region;
+	struct resource *res;
+	u32 first, last;
+	u8 map;
+
+	pci_read_config_byte(dev, APB_IO_ADDRESS_MAP, &map);
+	apb_calc_first_last(map, &first, &last);
+	res = bus->resource[0];
+	res->flags = IORESOURCE_IO;
+	region.start = (first << 21);
+	region.end = (last << 21) + ((1 << 21) - 1);
+	pcibios_bus_to_resource(dev->bus, res, &region);
+
+	pci_read_config_byte(dev, APB_MEM_ADDRESS_MAP, &map);
+	apb_calc_first_last(map, &first, &last);
+	res = bus->resource[1];
+	res->flags = IORESOURCE_MEM;
+	region.start = (first << 29);
+	region.end = (last << 29) + ((1 << 29) - 1);
+	pcibios_bus_to_resource(dev->bus, res, &region);
+}
+
+static void pci_of_scan_bus(struct pci_pbm_info *pbm,
+			    struct device_node *node,
+			    struct pci_bus *bus);
+
+#define GET_64BIT(prop, i)	((((u64) (prop)[(i)]) << 32) | (prop)[(i)+1])
+
+static void of_scan_pci_bridge(struct pci_pbm_info *pbm,
+			       struct device_node *node,
+			       struct pci_dev *dev)
+{
+	struct pci_bus *bus;
+	const u32 *busrange, *ranges;
+	int len, i, simba;
+	struct pci_bus_region region;
+	struct resource *res;
+	unsigned int flags;
+	u64 size;
+
+	if (ofpci_verbose)
+		pci_info(dev, "of_scan_pci_bridge(%s)\n", node->full_name);
+
+	/* parse bus-range property */
+	busrange = of_get_property(node, "bus-range", &len);
+	if (busrange == NULL || len != 8) {
+		pci_info(dev, "Can't get bus-range for PCI-PCI bridge %s\n",
+		       node->full_name);
+		return;
+	}
+
+	if (ofpci_verbose)
+		pci_info(dev, "    Bridge bus range [%u --> %u]\n",
+			 busrange[0], busrange[1]);
+
+	ranges = of_get_property(node, "ranges", &len);
+	simba = 0;
+	if (ranges == NULL) {
+		const char *model = of_get_property(node, "model", NULL);
+		if (model && !strcmp(model, "SUNW,simba"))
+			simba = 1;
+	}
+
+	bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+	if (!bus) {
+		pci_err(dev, "Failed to create pci bus for %s\n",
+			node->full_name);
+		return;
+	}
+
+	bus->primary = dev->bus->number;
+	pci_bus_insert_busn_res(bus, busrange[0], busrange[1]);
+	bus->bridge_ctl = 0;
+
+	if (ofpci_verbose)
+		pci_info(dev, "    Bridge ranges[%p] simba[%d]\n",
+			 ranges, simba);
+
+	/* parse ranges property, or cook one up by hand for Simba */
+	/* PCI #address-cells == 3 and #size-cells == 2 always */
+	res = &dev->resource[PCI_BRIDGE_RESOURCES];
+	for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {
+		res->flags = 0;
+		bus->resource[i] = res;
+		++res;
+	}
+	if (simba) {
+		apb_fake_ranges(dev, bus, pbm);
+		goto after_ranges;
+	} else if (ranges == NULL) {
+		pci_read_bridge_bases(bus);
+		goto after_ranges;
+	}
+	i = 1;
+	for (; len >= 32; len -= 32, ranges += 8) {
+		u64 start;
+
+		if (ofpci_verbose)
+			pci_info(dev, "    RAW Range[%08x:%08x:%08x:%08x:%08x:%08x:"
+				 "%08x:%08x]\n",
+				 ranges[0], ranges[1], ranges[2], ranges[3],
+				 ranges[4], ranges[5], ranges[6], ranges[7]);
+
+		flags = pci_parse_of_flags(ranges[0]);
+		size = GET_64BIT(ranges, 6);
+		if (flags == 0 || size == 0)
+			continue;
+
+		/* On PCI-Express systems, PCI bridges that have no devices downstream
+		 * have a bogus size value where the first 32-bit cell is 0xffffffff.
+		 * This results in a bogus range where start + size overflows.
+		 *
+		 * Just skip these otherwise the kernel will complain when the resource
+		 * tries to be claimed.
+		 */
+		if (size >> 32 == 0xffffffff)
+			continue;
+
+		if (flags & IORESOURCE_IO) {
+			res = bus->resource[0];
+			if (res->flags) {
+				pci_err(dev, "ignoring extra I/O range"
+					" for bridge %s\n", node->full_name);
+				continue;
+			}
+		} else {
+			if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
+				pci_err(dev, "too many memory ranges"
+					" for bridge %s\n", node->full_name);
+				continue;
+			}
+			res = bus->resource[i];
+			++i;
+		}
+
+		res->flags = flags;
+		region.start = start = GET_64BIT(ranges, 1);
+		region.end = region.start + size - 1;
+
+		if (ofpci_verbose)
+			pci_info(dev, "      Using flags[%08x] start[%016llx] size[%016llx]\n",
+				 flags, start, size);
+
+		pcibios_bus_to_resource(dev->bus, res, &region);
+	}
+after_ranges:
+	sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
+		bus->number);
+	if (ofpci_verbose)
+		pci_info(dev, "    bus name: %s\n", bus->name);
+
+	pci_of_scan_bus(pbm, node, bus);
+}
+
+static void pci_of_scan_bus(struct pci_pbm_info *pbm,
+			    struct device_node *node,
+			    struct pci_bus *bus)
+{
+	struct device_node *child;
+	const u32 *reg;
+	int reglen, devfn, prev_devfn;
+	struct pci_dev *dev;
+
+	if (ofpci_verbose)
+		pci_info(bus, "scan_bus[%s] bus no %d\n",
+			 node->full_name, bus->number);
+
+	child = NULL;
+	prev_devfn = -1;
+	while ((child = of_get_next_child(node, child)) != NULL) {
+		if (ofpci_verbose)
+			pci_info(bus, "  * %s\n", child->full_name);
+		reg = of_get_property(child, "reg", &reglen);
+		if (reg == NULL || reglen < 20)
+			continue;
+
+		devfn = (reg[0] >> 8) & 0xff;
+
+		/* This is a workaround for some device trees
+		 * which list PCI devices twice.  On the V100
+		 * for example, device number 3 is listed twice.
+		 * Once as "pm" and once again as "lomp".
+		 */
+		if (devfn == prev_devfn)
+			continue;
+		prev_devfn = devfn;
+
+		/* create a new pci_dev for this device */
+		dev = of_create_pci_dev(pbm, child, bus, devfn);
+		if (!dev)
+			continue;
+		if (ofpci_verbose)
+			pci_info(dev, "dev header type: %x\n", dev->hdr_type);
+
+		if (pci_is_bridge(dev))
+			of_scan_pci_bridge(pbm, child, dev);
+	}
+}
+
+static ssize_t
+show_pciobppath_attr(struct device * dev, struct device_attribute * attr, char * buf)
+{
+	struct pci_dev *pdev;
+	struct device_node *dp;
+
+	pdev = to_pci_dev(dev);
+	dp = pdev->dev.of_node;
+
+	return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
+}
+
+static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH, show_pciobppath_attr, NULL);
+
+static void pci_bus_register_of_sysfs(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	struct pci_bus *child_bus;
+	int err;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		/* we don't really care if we can create this file or
+		 * not, but we need to assign the result of the call
+		 * or the world will fall under alien invasion and
+		 * everybody will be frozen on a spaceship ready to be
+		 * eaten on alpha centauri by some green and jelly
+		 * humanoid.
+		 */
+		err = sysfs_create_file(&dev->dev.kobj, &dev_attr_obppath.attr);
+		(void) err;
+	}
+	list_for_each_entry(child_bus, &bus->children, node)
+		pci_bus_register_of_sysfs(child_bus);
+}
+
+static void pci_claim_legacy_resources(struct pci_dev *dev)
+{
+	struct pci_bus_region region;
+	struct resource *p, *root, *conflict;
+
+	if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+		return;
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return;
+
+	p->name = "Video RAM area";
+	p->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+
+	region.start = 0xa0000UL;
+	region.end = region.start + 0x1ffffUL;
+	pcibios_bus_to_resource(dev->bus, p, &region);
+
+	root = pci_find_parent_resource(dev, p);
+	if (!root) {
+		pci_info(dev, "can't claim VGA legacy %pR: no compatible bridge window\n", p);
+		goto err;
+	}
+
+	conflict = request_resource_conflict(root, p);
+	if (conflict) {
+		pci_info(dev, "can't claim VGA legacy %pR: address conflict with %s %pR\n",
+			 p, conflict->name, conflict);
+		goto err;
+	}
+
+	pci_info(dev, "VGA legacy framebuffer %pR\n", p);
+	return;
+
+err:
+	kfree(p);
+}
+
+static void pci_claim_bus_resources(struct pci_bus *bus)
+{
+	struct pci_bus *child_bus;
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		int i;
+
+		for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+			struct resource *r = &dev->resource[i];
+
+			if (r->parent || !r->start || !r->flags)
+				continue;
+
+			if (ofpci_verbose)
+				pci_info(dev, "Claiming Resource %d: %pR\n",
+					 i, r);
+
+			pci_claim_resource(dev, i);
+		}
+
+		pci_claim_legacy_resources(dev);
+	}
+
+	list_for_each_entry(child_bus, &bus->children, node)
+		pci_claim_bus_resources(child_bus);
+}
+
+struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
+				 struct device *parent)
+{
+	LIST_HEAD(resources);
+	struct device_node *node = pbm->op->dev.of_node;
+	struct pci_bus *bus;
+
+	printk("PCI: Scanning PBM %s\n", node->full_name);
+
+	pci_add_resource_offset(&resources, &pbm->io_space,
+				pbm->io_offset);
+	pci_add_resource_offset(&resources, &pbm->mem_space,
+				pbm->mem_offset);
+	if (pbm->mem64_space.flags)
+		pci_add_resource_offset(&resources, &pbm->mem64_space,
+					pbm->mem64_offset);
+	pbm->busn.start = pbm->pci_first_busno;
+	pbm->busn.end	= pbm->pci_last_busno;
+	pbm->busn.flags	= IORESOURCE_BUS;
+	pci_add_resource(&resources, &pbm->busn);
+	bus = pci_create_root_bus(parent, pbm->pci_first_busno, pbm->pci_ops,
+				  pbm, &resources);
+	if (!bus) {
+		printk(KERN_ERR "Failed to create bus for %s\n",
+		       node->full_name);
+		pci_free_resource_list(&resources);
+		return NULL;
+	}
+
+	pci_of_scan_bus(pbm, node, bus);
+	pci_bus_register_of_sysfs(bus);
+
+	pci_claim_bus_resources(bus);
+
+	pci_bus_add_devices(bus);
+	return bus;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, oldcmd;
+	int i;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	oldcmd = cmd;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = &dev->resource[i];
+
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<i)))
+			continue;
+
+		if (res->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (res->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	if (cmd != oldcmd) {
+		pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/* Platform support for /proc/bus/pci/X/Y mmap()s. */
+
+/* If the user uses a host-bridge as the PCI device, he may use
+ * this to perform a raw mmap() of the I/O or MEM space behind
+ * that controller.
+ *
+ * This can be useful for execution of x86 PCI bios initialization code
+ * on a PCI card, like the xfree86 int10 stuff does.
+ */
+static int __pci_mmap_make_offset_bus(struct pci_dev *pdev, struct vm_area_struct *vma,
+				      enum pci_mmap_state mmap_state)
+{
+	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+	unsigned long space_size, user_offset, user_size;
+
+	if (mmap_state == pci_mmap_io) {
+		space_size = resource_size(&pbm->io_space);
+	} else {
+		space_size = resource_size(&pbm->mem_space);
+	}
+
+	/* Make sure the request is in range. */
+	user_offset = vma->vm_pgoff << PAGE_SHIFT;
+	user_size = vma->vm_end - vma->vm_start;
+
+	if (user_offset >= space_size ||
+	    (user_offset + user_size) > space_size)
+		return -EINVAL;
+
+	if (mmap_state == pci_mmap_io) {
+		vma->vm_pgoff = (pbm->io_space.start +
+				 user_offset) >> PAGE_SHIFT;
+	} else {
+		vma->vm_pgoff = (pbm->mem_space.start +
+				 user_offset) >> PAGE_SHIFT;
+	}
+
+	return 0;
+}
+
+/* Adjust vm_pgoff of VMA such that it is the physical page offset
+ * corresponding to the 32-bit pci bus offset for DEV requested by the user.
+ *
+ * Basically, the user finds the base address for his device which he wishes
+ * to mmap.  They read the 32-bit value from the config space base register,
+ * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
+ * offset parameter of mmap on /proc/bus/pci/XXX for that device.
+ *
+ * Returns negative error code on failure, zero on success.
+ */
+static int __pci_mmap_make_offset(struct pci_dev *pdev,
+				  struct vm_area_struct *vma,
+				  enum pci_mmap_state mmap_state)
+{
+	unsigned long user_paddr, user_size;
+	int i, err;
+
+	/* First compute the physical address in vma->vm_pgoff,
+	 * making sure the user offset is within range in the
+	 * appropriate PCI space.
+	 */
+	err = __pci_mmap_make_offset_bus(pdev, vma, mmap_state);
+	if (err)
+		return err;
+
+	/* If this is a mapping on a host bridge, any address
+	 * is OK.
+	 */
+	if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+		return err;
+
+	/* Otherwise make sure it's in the range for one of the
+	 * device's resources.
+	 */
+	user_paddr = vma->vm_pgoff << PAGE_SHIFT;
+	user_size = vma->vm_end - vma->vm_start;
+
+	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+		struct resource *rp = &pdev->resource[i];
+		resource_size_t aligned_end;
+
+		/* Active? */
+		if (!rp->flags)
+			continue;
+
+		/* Same type? */
+		if (i == PCI_ROM_RESOURCE) {
+			if (mmap_state != pci_mmap_mem)
+				continue;
+		} else {
+			if ((mmap_state == pci_mmap_io &&
+			     (rp->flags & IORESOURCE_IO) == 0) ||
+			    (mmap_state == pci_mmap_mem &&
+			     (rp->flags & IORESOURCE_MEM) == 0))
+				continue;
+		}
+
+		/* Align the resource end to the next page address.
+		 * PAGE_SIZE intentionally added instead of (PAGE_SIZE - 1),
+		 * because actually we need the address of the next byte
+		 * after rp->end.
+		 */
+		aligned_end = (rp->end + PAGE_SIZE) & PAGE_MASK;
+
+		if ((rp->start <= user_paddr) &&
+		    (user_paddr + user_size) <= aligned_end)
+			break;
+	}
+
+	if (i > PCI_ROM_RESOURCE)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
+ * device mapping.
+ */
+static void __pci_mmap_set_pgprot(struct pci_dev *dev, struct vm_area_struct *vma,
+					     enum pci_mmap_state mmap_state)
+{
+	/* Our io_remap_pfn_range takes care of this, do nothing.  */
+}
+
+/* Perform the actual remap of the pages for a PCI device mapping, as appropriate
+ * for this architecture.  The region in the process to map is described by vm_start
+ * and vm_end members of VMA, the base physical address is found in vm_pgoff.
+ * The pci device structure is provided so that architectures may make mapping
+ * decisions on a per-device or per-bus basis.
+ *
+ * Returns a negative error code on failure, zero on success.
+ */
+int pci_mmap_page_range(struct pci_dev *dev, int bar,
+			struct vm_area_struct *vma,
+			enum pci_mmap_state mmap_state, int write_combine)
+{
+	int ret;
+
+	ret = __pci_mmap_make_offset(dev, vma, mmap_state);
+	if (ret < 0)
+		return ret;
+
+	__pci_mmap_set_pgprot(dev, vma, mmap_state);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	ret = io_remap_pfn_range(vma, vma->vm_start,
+				 vma->vm_pgoff,
+				 vma->vm_end - vma->vm_start,
+				 vma->vm_page_prot);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+#ifdef CONFIG_NUMA
+int pcibus_to_node(struct pci_bus *pbus)
+{
+	struct pci_pbm_info *pbm = pbus->sysdata;
+
+	return pbm->numa_node;
+}
+EXPORT_SYMBOL(pcibus_to_node);
+#endif
+
+/* Return the domain number for this pci bus */
+
+int pci_domain_nr(struct pci_bus *pbus)
+{
+	struct pci_pbm_info *pbm = pbus->sysdata;
+	int ret;
+
+	if (!pbm) {
+		ret = -ENXIO;
+	} else {
+		ret = pbm->index;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(pci_domain_nr);
+
+#ifdef CONFIG_PCI_MSI
+int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
+{
+	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+	unsigned int irq;
+
+	if (!pbm->setup_msi_irq)
+		return -EINVAL;
+
+	return pbm->setup_msi_irq(&irq, pdev, desc);
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+	struct msi_desc *entry = irq_get_msi_desc(irq);
+	struct pci_dev *pdev = msi_desc_to_pci_dev(entry);
+	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+
+	if (pbm->teardown_msi_irq)
+		pbm->teardown_msi_irq(irq, pdev);
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
+static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit)
+{
+	struct pci_dev *ali_isa_bridge;
+	u8 val;
+
+	/* ALI sound chips generate 31-bits of DMA, a special register
+	 * determines what bit 31 is emitted as.
+	 */
+	ali_isa_bridge = pci_get_device(PCI_VENDOR_ID_AL,
+					 PCI_DEVICE_ID_AL_M1533,
+					 NULL);
+
+	pci_read_config_byte(ali_isa_bridge, 0x7e, &val);
+	if (set_bit)
+		val |= 0x01;
+	else
+		val &= ~0x01;
+	pci_write_config_byte(ali_isa_bridge, 0x7e, val);
+	pci_dev_put(ali_isa_bridge);
+}
+
+int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask)
+{
+	u64 dma_addr_mask;
+
+	if (pdev == NULL) {
+		dma_addr_mask = 0xffffffff;
+	} else {
+		struct iommu *iommu = pdev->dev.archdata.iommu;
+
+		dma_addr_mask = iommu->dma_addr_mask;
+
+		if (pdev->vendor == PCI_VENDOR_ID_AL &&
+		    pdev->device == PCI_DEVICE_ID_AL_M5451 &&
+		    device_mask == 0x7fffffff) {
+			ali_sound_dma_hack(pdev,
+					   (dma_addr_mask & 0x80000000) != 0);
+			return 1;
+		}
+	}
+
+	if (device_mask >= (1UL << 32UL))
+		return 0;
+
+	return (device_mask & dma_addr_mask) == dma_addr_mask;
+}
+
+void pci_resource_to_user(const struct pci_dev *pdev, int bar,
+			  const struct resource *rp, resource_size_t *start,
+			  resource_size_t *end)
+{
+	struct pci_bus_region region;
+
+	/*
+	 * "User" addresses are shown in /sys/devices/pci.../.../resource
+	 * and /proc/bus/pci/devices and used as mmap offsets for
+	 * /proc/bus/pci/BB/DD.F files (see proc_bus_pci_mmap()).
+	 *
+	 * On sparc, these are PCI bus addresses, i.e., raw BAR values.
+	 */
+	pcibios_resource_to_bus(pdev->bus, &region, (struct resource *) rp);
+	*start = region.start;
+	*end = region.end;
+}
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+	/* No special bus mastering setup handling */
+}
+
+#ifdef CONFIG_PCI_IOV
+int pcibios_add_device(struct pci_dev *dev)
+{
+	struct pci_dev *pdev;
+
+	/* Add sriov arch specific initialization here.
+	 * Copy dev_archdata from PF to VF
+	 */
+	if (dev->is_virtfn) {
+		struct dev_archdata *psd;
+
+		pdev = dev->physfn;
+		psd = &pdev->dev.archdata;
+		pci_init_dev_archdata(&dev->dev.archdata, psd->iommu,
+				      psd->stc, psd->host_controller, NULL,
+				      psd->numa_node);
+	}
+	return 0;
+}
+#endif /* CONFIG_PCI_IOV */
+
+static int __init pcibios_init(void)
+{
+	pci_dfl_cache_line_size = 64 >> 2;
+	return 0;
+}
+subsys_initcall(pcibios_init);
+
+#ifdef CONFIG_SYSFS
+
+#define SLOT_NAME_SIZE  11  /* Max decimal digits + null in u32 */
+
+static void pcie_bus_slot_names(struct pci_bus *pbus)
+{
+	struct pci_dev *pdev;
+	struct pci_bus *bus;
+
+	list_for_each_entry(pdev, &pbus->devices, bus_list) {
+		char name[SLOT_NAME_SIZE];
+		struct pci_slot *pci_slot;
+		const u32 *slot_num;
+		int len;
+
+		slot_num = of_get_property(pdev->dev.of_node,
+					   "physical-slot#", &len);
+
+		if (slot_num == NULL || len != 4)
+			continue;
+
+		snprintf(name, sizeof(name), "%u", slot_num[0]);
+		pci_slot = pci_create_slot(pbus, slot_num[0], name, NULL);
+
+		if (IS_ERR(pci_slot))
+			pr_err("PCI: pci_create_slot returned %ld.\n",
+			       PTR_ERR(pci_slot));
+	}
+
+	list_for_each_entry(bus, &pbus->children, node)
+		pcie_bus_slot_names(bus);
+}
+
+static void pci_bus_slot_names(struct device_node *node, struct pci_bus *bus)
+{
+	const struct pci_slot_names {
+		u32	slot_mask;
+		char	names[0];
+	} *prop;
+	const char *sp;
+	int len, i;
+	u32 mask;
+
+	prop = of_get_property(node, "slot-names", &len);
+	if (!prop)
+		return;
+
+	mask = prop->slot_mask;
+	sp = prop->names;
+
+	if (ofpci_verbose)
+		pci_info(bus, "Making slots for [%s] mask[0x%02x]\n",
+			 node->full_name, mask);
+
+	i = 0;
+	while (mask) {
+		struct pci_slot *pci_slot;
+		u32 this_bit = 1 << i;
+
+		if (!(mask & this_bit)) {
+			i++;
+			continue;
+		}
+
+		if (ofpci_verbose)
+			pci_info(bus, "Making slot [%s]\n", sp);
+
+		pci_slot = pci_create_slot(bus, i, sp, NULL);
+		if (IS_ERR(pci_slot))
+			pci_err(bus, "pci_create_slot returned %ld\n",
+				PTR_ERR(pci_slot));
+
+		sp += strlen(sp) + 1;
+		mask &= ~this_bit;
+		i++;
+	}
+}
+
+static int __init of_pci_slot_init(void)
+{
+	struct pci_bus *pbus = NULL;
+
+	while ((pbus = pci_find_next_bus(pbus)) != NULL) {
+		struct device_node *node;
+		struct pci_dev *pdev;
+
+		pdev = list_first_entry(&pbus->devices, struct pci_dev,
+					bus_list);
+
+		if (pdev && pci_is_pcie(pdev)) {
+			pcie_bus_slot_names(pbus);
+		} else {
+
+			if (pbus->self) {
+
+				/* PCI->PCI bridge */
+				node = pbus->self->dev.of_node;
+
+			} else {
+				struct pci_pbm_info *pbm = pbus->sysdata;
+
+				/* Host PCI controller */
+				node = pbm->op->dev.of_node;
+			}
+
+			pci_bus_slot_names(node, pbus);
+		}
+	}
+
+	return 0;
+}
+device_initcall(of_pci_slot_init);
+#endif
diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c
new file mode 100644
index 0000000..4759ccd
--- /dev/null
+++ b/arch/sparc/kernel/pci_common.c
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci_common.c: PCI controller common support.
+ *
+ * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/oplib.h>
+
+#include "pci_impl.h"
+#include "pci_sun4v.h"
+
+static int config_out_of_range(struct pci_pbm_info *pbm,
+			       unsigned long bus,
+			       unsigned long devfn,
+			       unsigned long reg)
+{
+	if (bus < pbm->pci_first_busno ||
+	    bus > pbm->pci_last_busno)
+		return 1;
+	return 0;
+}
+
+static void *sun4u_config_mkaddr(struct pci_pbm_info *pbm,
+				 unsigned long bus,
+				 unsigned long devfn,
+				 unsigned long reg)
+{
+	unsigned long rbits = pbm->config_space_reg_bits;
+
+	if (config_out_of_range(pbm, bus, devfn, reg))
+		return NULL;
+
+	reg = (reg & ((1 << rbits) - 1));
+	devfn <<= rbits;
+	bus <<= rbits + 8;
+
+	return (void *)	(pbm->config_space | bus | devfn | reg);
+}
+
+/* At least on Sabre, it is necessary to access all PCI host controller
+ * registers at their natural size, otherwise zeros are returned.
+ * Strange but true, and I see no language in the UltraSPARC-IIi
+ * programmer's manual that mentions this even indirectly.
+ */
+static int sun4u_read_pci_cfg_host(struct pci_pbm_info *pbm,
+				   unsigned char bus, unsigned int devfn,
+				   int where, int size, u32 *value)
+{
+	u32 tmp32, *addr;
+	u16 tmp16;
+	u8 tmp8;
+
+	addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+	if (!addr)
+		return PCIBIOS_SUCCESSFUL;
+
+	switch (size) {
+	case 1:
+		if (where < 8) {
+			unsigned long align = (unsigned long) addr;
+
+			align &= ~1;
+			pci_config_read16((u16 *)align, &tmp16);
+			if (where & 1)
+				*value = tmp16 >> 8;
+			else
+				*value = tmp16 & 0xff;
+		} else {
+			pci_config_read8((u8 *)addr, &tmp8);
+			*value = (u32) tmp8;
+		}
+		break;
+
+	case 2:
+		if (where < 8) {
+			pci_config_read16((u16 *)addr, &tmp16);
+			*value = (u32) tmp16;
+		} else {
+			pci_config_read8((u8 *)addr, &tmp8);
+			*value = (u32) tmp8;
+			pci_config_read8(((u8 *)addr) + 1, &tmp8);
+			*value |= ((u32) tmp8) << 8;
+		}
+		break;
+
+	case 4:
+		tmp32 = 0xffffffff;
+		sun4u_read_pci_cfg_host(pbm, bus, devfn,
+					where, 2, &tmp32);
+		*value = tmp32;
+
+		tmp32 = 0xffffffff;
+		sun4u_read_pci_cfg_host(pbm, bus, devfn,
+					where + 2, 2, &tmp32);
+		*value |= tmp32 << 16;
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+			      int where, int size, u32 *value)
+{
+	struct pci_pbm_info *pbm = bus_dev->sysdata;
+	unsigned char bus = bus_dev->number;
+	u32 *addr;
+	u16 tmp16;
+	u8 tmp8;
+
+	switch (size) {
+	case 1:
+		*value = 0xff;
+		break;
+	case 2:
+		*value = 0xffff;
+		break;
+	case 4:
+		*value = 0xffffffff;
+		break;
+	}
+
+	if (!bus_dev->number && !PCI_SLOT(devfn))
+		return sun4u_read_pci_cfg_host(pbm, bus, devfn, where,
+					       size, value);
+
+	addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+	if (!addr)
+		return PCIBIOS_SUCCESSFUL;
+
+	switch (size) {
+	case 1:
+		pci_config_read8((u8 *)addr, &tmp8);
+		*value = (u32) tmp8;
+		break;
+
+	case 2:
+		if (where & 0x01) {
+			printk("pci_read_config_word: misaligned reg [%x]\n",
+			       where);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		pci_config_read16((u16 *)addr, &tmp16);
+		*value = (u32) tmp16;
+		break;
+
+	case 4:
+		if (where & 0x03) {
+			printk("pci_read_config_dword: misaligned reg [%x]\n",
+			       where);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		pci_config_read32(addr, value);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_write_pci_cfg_host(struct pci_pbm_info *pbm,
+				    unsigned char bus, unsigned int devfn,
+				    int where, int size, u32 value)
+{
+	u32 *addr;
+
+	addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+	if (!addr)
+		return PCIBIOS_SUCCESSFUL;
+
+	switch (size) {
+	case 1:
+		if (where < 8) {
+			unsigned long align = (unsigned long) addr;
+			u16 tmp16;
+
+			align &= ~1;
+			pci_config_read16((u16 *)align, &tmp16);
+			if (where & 1) {
+				tmp16 &= 0x00ff;
+				tmp16 |= value << 8;
+			} else {
+				tmp16 &= 0xff00;
+				tmp16 |= value;
+			}
+			pci_config_write16((u16 *)align, tmp16);
+		} else
+			pci_config_write8((u8 *)addr, value);
+		break;
+	case 2:
+		if (where < 8) {
+			pci_config_write16((u16 *)addr, value);
+		} else {
+			pci_config_write8((u8 *)addr, value & 0xff);
+			pci_config_write8(((u8 *)addr) + 1, value >> 8);
+		}
+		break;
+	case 4:
+		sun4u_write_pci_cfg_host(pbm, bus, devfn,
+					 where, 2, value & 0xffff);
+		sun4u_write_pci_cfg_host(pbm, bus, devfn,
+					 where + 2, 2, value >> 16);
+		break;
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4u_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+			       int where, int size, u32 value)
+{
+	struct pci_pbm_info *pbm = bus_dev->sysdata;
+	unsigned char bus = bus_dev->number;
+	u32 *addr;
+
+	if (!bus_dev->number && !PCI_SLOT(devfn))
+		return sun4u_write_pci_cfg_host(pbm, bus, devfn, where,
+						size, value);
+
+	addr = sun4u_config_mkaddr(pbm, bus, devfn, where);
+	if (!addr)
+		return PCIBIOS_SUCCESSFUL;
+
+	switch (size) {
+	case 1:
+		pci_config_write8((u8 *)addr, value);
+		break;
+
+	case 2:
+		if (where & 0x01) {
+			printk("pci_write_config_word: misaligned reg [%x]\n",
+			       where);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		pci_config_write16((u16 *)addr, value);
+		break;
+
+	case 4:
+		if (where & 0x03) {
+			printk("pci_write_config_dword: misaligned reg [%x]\n",
+			       where);
+			return PCIBIOS_SUCCESSFUL;
+		}
+		pci_config_write32(addr, value);
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops sun4u_pci_ops = {
+	.read =		sun4u_read_pci_cfg,
+	.write =	sun4u_write_pci_cfg,
+};
+
+static int sun4v_read_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+			      int where, int size, u32 *value)
+{
+	struct pci_pbm_info *pbm = bus_dev->sysdata;
+	u32 devhandle = pbm->devhandle;
+	unsigned int bus = bus_dev->number;
+	unsigned int device = PCI_SLOT(devfn);
+	unsigned int func = PCI_FUNC(devfn);
+	unsigned long ret;
+
+	if (config_out_of_range(pbm, bus, devfn, where)) {
+		ret = ~0UL;
+	} else {
+		ret = pci_sun4v_config_get(devhandle,
+				HV_PCI_DEVICE_BUILD(bus, device, func),
+				where, size);
+	}
+	switch (size) {
+	case 1:
+		*value = ret & 0xff;
+		break;
+	case 2:
+		*value = ret & 0xffff;
+		break;
+	case 4:
+		*value = ret & 0xffffffff;
+		break;
+	}
+
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int sun4v_write_pci_cfg(struct pci_bus *bus_dev, unsigned int devfn,
+			       int where, int size, u32 value)
+{
+	struct pci_pbm_info *pbm = bus_dev->sysdata;
+	u32 devhandle = pbm->devhandle;
+	unsigned int bus = bus_dev->number;
+	unsigned int device = PCI_SLOT(devfn);
+	unsigned int func = PCI_FUNC(devfn);
+
+	if (config_out_of_range(pbm, bus, devfn, where)) {
+		/* Do nothing. */
+	} else {
+		/* We don't check for hypervisor errors here, but perhaps
+		 * we should and influence our return value depending upon
+		 * what kind of error is thrown.
+		 */
+		pci_sun4v_config_put(devhandle,
+				     HV_PCI_DEVICE_BUILD(bus, device, func),
+				     where, size, value);
+	}
+	return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops sun4v_pci_ops = {
+	.read =		sun4v_read_pci_cfg,
+	.write =	sun4v_write_pci_cfg,
+};
+
+void pci_get_pbm_props(struct pci_pbm_info *pbm)
+{
+	const u32 *val = of_get_property(pbm->op->dev.of_node, "bus-range", NULL);
+
+	pbm->pci_first_busno = val[0];
+	pbm->pci_last_busno = val[1];
+
+	val = of_get_property(pbm->op->dev.of_node, "ino-bitmap", NULL);
+	if (val) {
+		pbm->ino_bitmap = (((u64)val[1] << 32UL) |
+				   ((u64)val[0] <<  0UL));
+	}
+}
+
+static void pci_register_iommu_region(struct pci_pbm_info *pbm)
+{
+	const u32 *vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma",
+					  NULL);
+
+	if (vdma) {
+		struct resource *rp = kzalloc(sizeof(*rp), GFP_KERNEL);
+
+		if (!rp) {
+			pr_info("%s: Cannot allocate IOMMU resource.\n",
+				pbm->name);
+			return;
+		}
+		rp->name = "IOMMU";
+		rp->start = pbm->mem_space.start + (unsigned long) vdma[0];
+		rp->end = rp->start + (unsigned long) vdma[1] - 1UL;
+		rp->flags = IORESOURCE_BUSY;
+		if (request_resource(&pbm->mem_space, rp)) {
+			pr_info("%s: Unable to request IOMMU resource.\n",
+				pbm->name);
+			kfree(rp);
+		}
+	}
+}
+
+void pci_determine_mem_io_space(struct pci_pbm_info *pbm)
+{
+	const struct linux_prom_pci_ranges *pbm_ranges;
+	int i, saw_mem, saw_io;
+	int num_pbm_ranges;
+
+	/* Corresponding generic code in of_pci_get_host_bridge_resources() */
+
+	saw_mem = saw_io = 0;
+	pbm_ranges = of_get_property(pbm->op->dev.of_node, "ranges", &i);
+	if (!pbm_ranges) {
+		prom_printf("PCI: Fatal error, missing PBM ranges property "
+			    " for %s\n",
+			    pbm->name);
+		prom_halt();
+	}
+
+	num_pbm_ranges = i / sizeof(*pbm_ranges);
+	memset(&pbm->mem64_space, 0, sizeof(struct resource));
+
+	for (i = 0; i < num_pbm_ranges; i++) {
+		const struct linux_prom_pci_ranges *pr = &pbm_ranges[i];
+		unsigned long a, size, region_a;
+		u32 parent_phys_hi, parent_phys_lo;
+		u32 child_phys_mid, child_phys_lo;
+		u32 size_hi, size_lo;
+		int type;
+
+		parent_phys_hi = pr->parent_phys_hi;
+		parent_phys_lo = pr->parent_phys_lo;
+		child_phys_mid = pr->child_phys_mid;
+		child_phys_lo = pr->child_phys_lo;
+		if (tlb_type == hypervisor)
+			parent_phys_hi &= 0x0fffffff;
+
+		size_hi = pr->size_hi;
+		size_lo = pr->size_lo;
+
+		type = (pr->child_phys_hi >> 24) & 0x3;
+		a = (((unsigned long)parent_phys_hi << 32UL) |
+		     ((unsigned long)parent_phys_lo  <<  0UL));
+		region_a = (((unsigned long)child_phys_mid << 32UL) |
+		     ((unsigned long)child_phys_lo  <<  0UL));
+		size = (((unsigned long)size_hi << 32UL) |
+			((unsigned long)size_lo  <<  0UL));
+
+		switch (type) {
+		case 0:
+			/* PCI config space, 16MB */
+			pbm->config_space = a;
+			break;
+
+		case 1:
+			/* 16-bit IO space, 16MB */
+			pbm->io_space.start = a;
+			pbm->io_space.end = a + size - 1UL;
+			pbm->io_space.flags = IORESOURCE_IO;
+			pbm->io_offset = a - region_a;
+			saw_io = 1;
+			break;
+
+		case 2:
+			/* 32-bit MEM space, 2GB */
+			pbm->mem_space.start = a;
+			pbm->mem_space.end = a + size - 1UL;
+			pbm->mem_space.flags = IORESOURCE_MEM;
+			pbm->mem_offset = a - region_a;
+			saw_mem = 1;
+			break;
+
+		case 3:
+			/* 64-bit MEM handling */
+			pbm->mem64_space.start = a;
+			pbm->mem64_space.end = a + size - 1UL;
+			pbm->mem64_space.flags = IORESOURCE_MEM;
+			pbm->mem64_offset = a - region_a;
+			saw_mem = 1;
+			break;
+
+		default:
+			break;
+		}
+	}
+
+	if (!saw_io || !saw_mem) {
+		prom_printf("%s: Fatal error, missing %s PBM range.\n",
+			    pbm->name,
+			    (!saw_io ? "IO" : "MEM"));
+		prom_halt();
+	}
+
+	if (pbm->io_space.flags)
+		printk("%s: PCI IO %pR offset %llx\n",
+		       pbm->name, &pbm->io_space, pbm->io_offset);
+	if (pbm->mem_space.flags)
+		printk("%s: PCI MEM %pR offset %llx\n",
+		       pbm->name, &pbm->mem_space, pbm->mem_offset);
+	if (pbm->mem64_space.flags && pbm->mem_space.flags) {
+		if (pbm->mem64_space.start <= pbm->mem_space.end)
+			pbm->mem64_space.start = pbm->mem_space.end + 1;
+		if (pbm->mem64_space.start > pbm->mem64_space.end)
+			pbm->mem64_space.flags = 0;
+	}
+
+	if (pbm->mem64_space.flags)
+		printk("%s: PCI MEM64 %pR offset %llx\n",
+		       pbm->name, &pbm->mem64_space, pbm->mem64_offset);
+
+	pbm->io_space.name = pbm->mem_space.name = pbm->name;
+	pbm->mem64_space.name = pbm->name;
+
+	request_resource(&ioport_resource, &pbm->io_space);
+	request_resource(&iomem_resource, &pbm->mem_space);
+	if (pbm->mem64_space.flags)
+		request_resource(&iomem_resource, &pbm->mem64_space);
+
+	pci_register_iommu_region(pbm);
+}
+
+/* Generic helper routines for PCI error reporting. */
+void pci_scan_for_target_abort(struct pci_pbm_info *pbm,
+			       struct pci_bus *pbus)
+{
+	struct pci_dev *pdev;
+	struct pci_bus *bus;
+
+	list_for_each_entry(pdev, &pbus->devices, bus_list) {
+		u16 status, error_bits;
+
+		pci_read_config_word(pdev, PCI_STATUS, &status);
+		error_bits =
+			(status & (PCI_STATUS_SIG_TARGET_ABORT |
+				   PCI_STATUS_REC_TARGET_ABORT));
+		if (error_bits) {
+			pci_write_config_word(pdev, PCI_STATUS, error_bits);
+			pci_info(pdev, "%s: Device saw Target Abort [%016x]\n",
+				 pbm->name, status);
+		}
+	}
+
+	list_for_each_entry(bus, &pbus->children, node)
+		pci_scan_for_target_abort(pbm, bus);
+}
+
+void pci_scan_for_master_abort(struct pci_pbm_info *pbm,
+			       struct pci_bus *pbus)
+{
+	struct pci_dev *pdev;
+	struct pci_bus *bus;
+
+	list_for_each_entry(pdev, &pbus->devices, bus_list) {
+		u16 status, error_bits;
+
+		pci_read_config_word(pdev, PCI_STATUS, &status);
+		error_bits =
+			(status & (PCI_STATUS_REC_MASTER_ABORT));
+		if (error_bits) {
+			pci_write_config_word(pdev, PCI_STATUS, error_bits);
+			pci_info(pdev, "%s: Device received Master Abort "
+				 "[%016x]\n", pbm->name, status);
+		}
+	}
+
+	list_for_each_entry(bus, &pbus->children, node)
+		pci_scan_for_master_abort(pbm, bus);
+}
+
+void pci_scan_for_parity_error(struct pci_pbm_info *pbm,
+			       struct pci_bus *pbus)
+{
+	struct pci_dev *pdev;
+	struct pci_bus *bus;
+
+	list_for_each_entry(pdev, &pbus->devices, bus_list) {
+		u16 status, error_bits;
+
+		pci_read_config_word(pdev, PCI_STATUS, &status);
+		error_bits =
+			(status & (PCI_STATUS_PARITY |
+				   PCI_STATUS_DETECTED_PARITY));
+		if (error_bits) {
+			pci_write_config_word(pdev, PCI_STATUS, error_bits);
+			pci_info(pdev, "%s: Device saw Parity Error [%016x]\n",
+				 pbm->name, status);
+		}
+	}
+
+	list_for_each_entry(bus, &pbus->children, node)
+		pci_scan_for_parity_error(pbm, bus);
+}
diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c
new file mode 100644
index 0000000..be71ae0
--- /dev/null
+++ b/arch/sparc/kernel/pci_fire.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci_fire.c: Sun4u platform PCI-E controller support.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/irq.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/irq.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+
+#define DRIVER_NAME	"fire"
+#define PFX		DRIVER_NAME ": "
+
+#define FIRE_IOMMU_CONTROL	0x40000UL
+#define FIRE_IOMMU_TSBBASE	0x40008UL
+#define FIRE_IOMMU_FLUSH	0x40100UL
+#define FIRE_IOMMU_FLUSHINV	0x40108UL
+
+static int pci_fire_pbm_iommu_init(struct pci_pbm_info *pbm)
+{
+	struct iommu *iommu = pbm->iommu;
+	u32 vdma[2], dma_mask;
+	u64 control;
+	int tsbsize, err;
+
+	/* No virtual-dma property on these guys, use largest size.  */
+	vdma[0] = 0xc0000000; /* base */
+	vdma[1] = 0x40000000; /* size */
+	dma_mask = 0xffffffff;
+	tsbsize = 128;
+
+	/* Register addresses. */
+	iommu->iommu_control  = pbm->pbm_regs + FIRE_IOMMU_CONTROL;
+	iommu->iommu_tsbbase  = pbm->pbm_regs + FIRE_IOMMU_TSBBASE;
+	iommu->iommu_flush    = pbm->pbm_regs + FIRE_IOMMU_FLUSH;
+	iommu->iommu_flushinv = pbm->pbm_regs + FIRE_IOMMU_FLUSHINV;
+
+	/* We use the main control/status register of FIRE as the write
+	 * completion register.
+	 */
+	iommu->write_complete_reg = pbm->controller_regs + 0x410000UL;
+
+	/*
+	 * Invalidate TLB Entries.
+	 */
+	upa_writeq(~(u64)0, iommu->iommu_flushinv);
+
+	err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+			       pbm->numa_node);
+	if (err)
+		return err;
+
+	upa_writeq(__pa(iommu->page_table) | 0x7UL, iommu->iommu_tsbbase);
+
+	control = upa_readq(iommu->iommu_control);
+	control |= (0x00000400 /* TSB cache snoop enable */	|
+		    0x00000300 /* Cache mode */			|
+		    0x00000002 /* Bypass enable */		|
+		    0x00000001 /* Translation enable */);
+	upa_writeq(control, iommu->iommu_control);
+
+	return 0;
+}
+
+#ifdef CONFIG_PCI_MSI
+struct pci_msiq_entry {
+	u64		word0;
+#define MSIQ_WORD0_RESV			0x8000000000000000UL
+#define MSIQ_WORD0_FMT_TYPE		0x7f00000000000000UL
+#define MSIQ_WORD0_FMT_TYPE_SHIFT	56
+#define MSIQ_WORD0_LEN			0x00ffc00000000000UL
+#define MSIQ_WORD0_LEN_SHIFT		46
+#define MSIQ_WORD0_ADDR0		0x00003fff00000000UL
+#define MSIQ_WORD0_ADDR0_SHIFT		32
+#define MSIQ_WORD0_RID			0x00000000ffff0000UL
+#define MSIQ_WORD0_RID_SHIFT		16
+#define MSIQ_WORD0_DATA0		0x000000000000ffffUL
+#define MSIQ_WORD0_DATA0_SHIFT		0
+
+#define MSIQ_TYPE_MSG			0x6
+#define MSIQ_TYPE_MSI32			0xb
+#define MSIQ_TYPE_MSI64			0xf
+
+	u64		word1;
+#define MSIQ_WORD1_ADDR1		0xffffffffffff0000UL
+#define MSIQ_WORD1_ADDR1_SHIFT		16
+#define MSIQ_WORD1_DATA1		0x000000000000ffffUL
+#define MSIQ_WORD1_DATA1_SHIFT		0
+
+	u64		resv[6];
+};
+
+/* All MSI registers are offset from pbm->pbm_regs */
+#define EVENT_QUEUE_BASE_ADDR_REG	0x010000UL
+#define  EVENT_QUEUE_BASE_ADDR_ALL_ONES	0xfffc000000000000UL
+
+#define EVENT_QUEUE_CONTROL_SET(EQ)	(0x011000UL + (EQ) * 0x8UL)
+#define  EVENT_QUEUE_CONTROL_SET_OFLOW	0x0200000000000000UL
+#define  EVENT_QUEUE_CONTROL_SET_EN	0x0000100000000000UL
+
+#define EVENT_QUEUE_CONTROL_CLEAR(EQ)	(0x011200UL + (EQ) * 0x8UL)
+#define  EVENT_QUEUE_CONTROL_CLEAR_OF	0x0200000000000000UL
+#define  EVENT_QUEUE_CONTROL_CLEAR_E2I	0x0000800000000000UL
+#define  EVENT_QUEUE_CONTROL_CLEAR_DIS	0x0000100000000000UL
+
+#define EVENT_QUEUE_STATE(EQ)		(0x011400UL + (EQ) * 0x8UL)
+#define  EVENT_QUEUE_STATE_MASK		0x0000000000000007UL
+#define  EVENT_QUEUE_STATE_IDLE		0x0000000000000001UL
+#define  EVENT_QUEUE_STATE_ACTIVE	0x0000000000000002UL
+#define  EVENT_QUEUE_STATE_ERROR	0x0000000000000004UL
+
+#define EVENT_QUEUE_TAIL(EQ)		(0x011600UL + (EQ) * 0x8UL)
+#define  EVENT_QUEUE_TAIL_OFLOW		0x0200000000000000UL
+#define  EVENT_QUEUE_TAIL_VAL		0x000000000000007fUL
+
+#define EVENT_QUEUE_HEAD(EQ)		(0x011800UL + (EQ) * 0x8UL)
+#define  EVENT_QUEUE_HEAD_VAL		0x000000000000007fUL
+
+#define MSI_MAP(MSI)			(0x020000UL + (MSI) * 0x8UL)
+#define  MSI_MAP_VALID			0x8000000000000000UL
+#define  MSI_MAP_EQWR_N			0x4000000000000000UL
+#define  MSI_MAP_EQNUM			0x000000000000003fUL
+
+#define MSI_CLEAR(MSI)			(0x028000UL + (MSI) * 0x8UL)
+#define  MSI_CLEAR_EQWR_N		0x4000000000000000UL
+
+#define IMONDO_DATA0			0x02C000UL
+#define  IMONDO_DATA0_DATA		0xffffffffffffffc0UL
+
+#define IMONDO_DATA1			0x02C008UL
+#define  IMONDO_DATA1_DATA		0xffffffffffffffffUL
+
+#define MSI_32BIT_ADDR			0x034000UL
+#define  MSI_32BIT_ADDR_VAL		0x00000000ffff0000UL
+
+#define MSI_64BIT_ADDR			0x034008UL
+#define  MSI_64BIT_ADDR_VAL		0xffffffffffff0000UL
+
+static int pci_fire_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+			     unsigned long *head)
+{
+	*head = upa_readq(pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+	return 0;
+}
+
+static int pci_fire_dequeue_msi(struct pci_pbm_info *pbm, unsigned long msiqid,
+				unsigned long *head, unsigned long *msi)
+{
+	unsigned long type_fmt, type, msi_num;
+	struct pci_msiq_entry *base, *ep;
+
+	base = (pbm->msi_queues + ((msiqid - pbm->msiq_first) * 8192));
+	ep = &base[*head];
+
+	if ((ep->word0 & MSIQ_WORD0_FMT_TYPE) == 0)
+		return 0;
+
+	type_fmt = ((ep->word0 & MSIQ_WORD0_FMT_TYPE) >>
+		    MSIQ_WORD0_FMT_TYPE_SHIFT);
+	type = (type_fmt >> 3);
+	if (unlikely(type != MSIQ_TYPE_MSI32 &&
+		     type != MSIQ_TYPE_MSI64))
+		return -EINVAL;
+
+	*msi = msi_num = ((ep->word0 & MSIQ_WORD0_DATA0) >>
+			  MSIQ_WORD0_DATA0_SHIFT);
+
+	upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi_num));
+
+	/* Clear the entry.  */
+	ep->word0 &= ~MSIQ_WORD0_FMT_TYPE;
+
+	/* Go to next entry in ring.  */
+	(*head)++;
+	if (*head >= pbm->msiq_ent_count)
+		*head = 0;
+
+	return 1;
+}
+
+static int pci_fire_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+			     unsigned long head)
+{
+	upa_writeq(head, pbm->pbm_regs + EVENT_QUEUE_HEAD(msiqid));
+	return 0;
+}
+
+static int pci_fire_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+			      unsigned long msi, int is_msi64)
+{
+	u64 val;
+
+	val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+	val &= ~(MSI_MAP_EQNUM);
+	val |= msiqid;
+	upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+	upa_writeq(MSI_CLEAR_EQWR_N, pbm->pbm_regs + MSI_CLEAR(msi));
+
+	val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+	val |= MSI_MAP_VALID;
+	upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+	return 0;
+}
+
+static int pci_fire_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
+{
+	u64 val;
+
+	val = upa_readq(pbm->pbm_regs + MSI_MAP(msi));
+
+	val &= ~MSI_MAP_VALID;
+
+	upa_writeq(val, pbm->pbm_regs + MSI_MAP(msi));
+
+	return 0;
+}
+
+static int pci_fire_msiq_alloc(struct pci_pbm_info *pbm)
+{
+	unsigned long pages, order, i;
+
+	order = get_order(512 * 1024);
+	pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
+	if (pages == 0UL) {
+		printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
+		       order);
+		return -ENOMEM;
+	}
+	memset((char *)pages, 0, PAGE_SIZE << order);
+	pbm->msi_queues = (void *) pages;
+
+	upa_writeq((EVENT_QUEUE_BASE_ADDR_ALL_ONES |
+		    __pa(pbm->msi_queues)),
+		   pbm->pbm_regs + EVENT_QUEUE_BASE_ADDR_REG);
+
+	upa_writeq(pbm->portid << 6, pbm->pbm_regs + IMONDO_DATA0);
+	upa_writeq(0, pbm->pbm_regs + IMONDO_DATA1);
+
+	upa_writeq(pbm->msi32_start, pbm->pbm_regs + MSI_32BIT_ADDR);
+	upa_writeq(pbm->msi64_start, pbm->pbm_regs + MSI_64BIT_ADDR);
+
+	for (i = 0; i < pbm->msiq_num; i++) {
+		upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_HEAD(i));
+		upa_writeq(0, pbm->pbm_regs + EVENT_QUEUE_TAIL(i));
+	}
+
+	return 0;
+}
+
+static void pci_fire_msiq_free(struct pci_pbm_info *pbm)
+{
+	unsigned long pages, order;
+
+	order = get_order(512 * 1024);
+	pages = (unsigned long) pbm->msi_queues;
+
+	free_pages(pages, order);
+
+	pbm->msi_queues = NULL;
+}
+
+static int pci_fire_msiq_build_irq(struct pci_pbm_info *pbm,
+				   unsigned long msiqid,
+				   unsigned long devino)
+{
+	unsigned long cregs = (unsigned long) pbm->pbm_regs;
+	unsigned long imap_reg, iclr_reg, int_ctrlr;
+	unsigned int irq;
+	int fixup;
+	u64 val;
+
+	imap_reg = cregs + (0x001000UL + (devino * 0x08UL));
+	iclr_reg = cregs + (0x001400UL + (devino * 0x08UL));
+
+	/* XXX iterate amongst the 4 IRQ controllers XXX */
+	int_ctrlr = (1UL << 6);
+
+	val = upa_readq(imap_reg);
+	val |= (1UL << 63) | int_ctrlr;
+	upa_writeq(val, imap_reg);
+
+	fixup = ((pbm->portid << 6) | devino) - int_ctrlr;
+
+	irq = build_irq(fixup, iclr_reg, imap_reg);
+	if (!irq)
+		return -ENOMEM;
+
+	upa_writeq(EVENT_QUEUE_CONTROL_SET_EN,
+		   pbm->pbm_regs + EVENT_QUEUE_CONTROL_SET(msiqid));
+
+	return irq;
+}
+
+static const struct sparc64_msiq_ops pci_fire_msiq_ops = {
+	.get_head	=	pci_fire_get_head,
+	.dequeue_msi	=	pci_fire_dequeue_msi,
+	.set_head	=	pci_fire_set_head,
+	.msi_setup	=	pci_fire_msi_setup,
+	.msi_teardown	=	pci_fire_msi_teardown,
+	.msiq_alloc	=	pci_fire_msiq_alloc,
+	.msiq_free	=	pci_fire_msiq_free,
+	.msiq_build_irq	=	pci_fire_msiq_build_irq,
+};
+
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+	sparc64_pbm_msi_init(pbm, &pci_fire_msiq_ops);
+}
+#else /* CONFIG_PCI_MSI */
+static void pci_fire_msi_init(struct pci_pbm_info *pbm)
+{
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
+/* Based at pbm->controller_regs */
+#define FIRE_PARITY_CONTROL	0x470010UL
+#define  FIRE_PARITY_ENAB	0x8000000000000000UL
+#define FIRE_FATAL_RESET_CTL	0x471028UL
+#define  FIRE_FATAL_RESET_SPARE	0x0000000004000000UL
+#define  FIRE_FATAL_RESET_MB	0x0000000002000000UL
+#define  FIRE_FATAL_RESET_CPE	0x0000000000008000UL
+#define  FIRE_FATAL_RESET_APE	0x0000000000004000UL
+#define  FIRE_FATAL_RESET_PIO	0x0000000000000040UL
+#define  FIRE_FATAL_RESET_JW	0x0000000000000004UL
+#define  FIRE_FATAL_RESET_JI	0x0000000000000002UL
+#define  FIRE_FATAL_RESET_JR	0x0000000000000001UL
+#define FIRE_CORE_INTR_ENABLE	0x471800UL
+
+/* Based at pbm->pbm_regs */
+#define FIRE_TLU_CTRL		0x80000UL
+#define  FIRE_TLU_CTRL_TIM	0x00000000da000000UL
+#define  FIRE_TLU_CTRL_QDET	0x0000000000000100UL
+#define  FIRE_TLU_CTRL_CFG	0x0000000000000001UL
+#define FIRE_TLU_DEV_CTRL	0x90008UL
+#define FIRE_TLU_LINK_CTRL	0x90020UL
+#define FIRE_TLU_LINK_CTRL_CLK	0x0000000000000040UL
+#define FIRE_LPU_RESET		0xe2008UL
+#define FIRE_LPU_LLCFG		0xe2200UL
+#define  FIRE_LPU_LLCFG_VC0	0x0000000000000100UL
+#define FIRE_LPU_FCTRL_UCTRL	0xe2240UL
+#define  FIRE_LPU_FCTRL_UCTRL_N	0x0000000000000002UL
+#define  FIRE_LPU_FCTRL_UCTRL_P	0x0000000000000001UL
+#define FIRE_LPU_TXL_FIFOP	0xe2430UL
+#define FIRE_LPU_LTSSM_CFG2	0xe2788UL
+#define FIRE_LPU_LTSSM_CFG3	0xe2790UL
+#define FIRE_LPU_LTSSM_CFG4	0xe2798UL
+#define FIRE_LPU_LTSSM_CFG5	0xe27a0UL
+#define FIRE_DMC_IENAB		0x31800UL
+#define FIRE_DMC_DBG_SEL_A	0x53000UL
+#define FIRE_DMC_DBG_SEL_B	0x53008UL
+#define FIRE_PEC_IENAB		0x51800UL
+
+static void pci_fire_hw_init(struct pci_pbm_info *pbm)
+{
+	u64 val;
+
+	upa_writeq(FIRE_PARITY_ENAB,
+		   pbm->controller_regs + FIRE_PARITY_CONTROL);
+
+	upa_writeq((FIRE_FATAL_RESET_SPARE |
+		    FIRE_FATAL_RESET_MB |
+		    FIRE_FATAL_RESET_CPE |
+		    FIRE_FATAL_RESET_APE |
+		    FIRE_FATAL_RESET_PIO |
+		    FIRE_FATAL_RESET_JW |
+		    FIRE_FATAL_RESET_JI |
+		    FIRE_FATAL_RESET_JR),
+		   pbm->controller_regs + FIRE_FATAL_RESET_CTL);
+
+	upa_writeq(~(u64)0, pbm->controller_regs + FIRE_CORE_INTR_ENABLE);
+
+	val = upa_readq(pbm->pbm_regs + FIRE_TLU_CTRL);
+	val |= (FIRE_TLU_CTRL_TIM |
+		FIRE_TLU_CTRL_QDET |
+		FIRE_TLU_CTRL_CFG);
+	upa_writeq(val, pbm->pbm_regs + FIRE_TLU_CTRL);
+	upa_writeq(0, pbm->pbm_regs + FIRE_TLU_DEV_CTRL);
+	upa_writeq(FIRE_TLU_LINK_CTRL_CLK,
+		   pbm->pbm_regs + FIRE_TLU_LINK_CTRL);
+
+	upa_writeq(0, pbm->pbm_regs + FIRE_LPU_RESET);
+	upa_writeq(FIRE_LPU_LLCFG_VC0, pbm->pbm_regs + FIRE_LPU_LLCFG);
+	upa_writeq((FIRE_LPU_FCTRL_UCTRL_N | FIRE_LPU_FCTRL_UCTRL_P),
+		   pbm->pbm_regs + FIRE_LPU_FCTRL_UCTRL);
+	upa_writeq(((0xffff << 16) | (0x0000 << 0)),
+		   pbm->pbm_regs + FIRE_LPU_TXL_FIFOP);
+	upa_writeq(3000000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG2);
+	upa_writeq(500000, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG3);
+	upa_writeq((2 << 16) | (140 << 8),
+		   pbm->pbm_regs + FIRE_LPU_LTSSM_CFG4);
+	upa_writeq(0, pbm->pbm_regs + FIRE_LPU_LTSSM_CFG5);
+
+	upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_DMC_IENAB);
+	upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_A);
+	upa_writeq(0, pbm->pbm_regs + FIRE_DMC_DBG_SEL_B);
+
+	upa_writeq(~(u64)0, pbm->pbm_regs + FIRE_PEC_IENAB);
+}
+
+static int pci_fire_pbm_init(struct pci_pbm_info *pbm,
+			     struct platform_device *op, u32 portid)
+{
+	const struct linux_prom64_registers *regs;
+	struct device_node *dp = op->dev.of_node;
+	int err;
+
+	pbm->numa_node = -1;
+
+	pbm->pci_ops = &sun4u_pci_ops;
+	pbm->config_space_reg_bits = 12;
+
+	pbm->index = pci_num_pbms++;
+
+	pbm->portid = portid;
+	pbm->op = op;
+	pbm->name = dp->full_name;
+
+	regs = of_get_property(dp, "reg", NULL);
+	pbm->pbm_regs = regs[0].phys_addr;
+	pbm->controller_regs = regs[1].phys_addr - 0x410000UL;
+
+	printk("%s: SUN4U PCIE Bus Module\n", pbm->name);
+
+	pci_determine_mem_io_space(pbm);
+
+	pci_get_pbm_props(pbm);
+
+	pci_fire_hw_init(pbm);
+
+	err = pci_fire_pbm_iommu_init(pbm);
+	if (err)
+		return err;
+
+	pci_fire_msi_init(pbm);
+
+	pbm->pci_bus = pci_scan_one_pbm(pbm, &op->dev);
+
+	/* XXX register error interrupt handlers XXX */
+
+	pbm->next = pci_pbm_root;
+	pci_pbm_root = pbm;
+
+	return 0;
+}
+
+static int fire_probe(struct platform_device *op)
+{
+	struct device_node *dp = op->dev.of_node;
+	struct pci_pbm_info *pbm;
+	struct iommu *iommu;
+	u32 portid;
+	int err;
+
+	portid = of_getintprop_default(dp, "portid", 0xff);
+
+	err = -ENOMEM;
+	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+	if (!pbm) {
+		printk(KERN_ERR PFX "Cannot allocate pci_pbminfo.\n");
+		goto out_err;
+	}
+
+	iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+	if (!iommu) {
+		printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+		goto out_free_controller;
+	}
+
+	pbm->iommu = iommu;
+
+	err = pci_fire_pbm_init(pbm, op, portid);
+	if (err)
+		goto out_free_iommu;
+
+	dev_set_drvdata(&op->dev, pbm);
+
+	return 0;
+
+out_free_iommu:
+	kfree(pbm->iommu);
+			
+out_free_controller:
+	kfree(pbm);
+
+out_err:
+	return err;
+}
+
+static const struct of_device_id fire_match[] = {
+	{
+		.name = "pci",
+		.compatible = "pciex108e,80f0",
+	},
+	{},
+};
+
+static struct platform_driver fire_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = fire_match,
+	},
+	.probe		= fire_probe,
+};
+
+static int __init fire_init(void)
+{
+	return platform_driver_register(&fire_driver);
+}
+
+subsys_initcall(fire_init);
diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h
new file mode 100644
index 0000000..4e3d151
--- /dev/null
+++ b/arch/sparc/kernel/pci_impl.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* pci_impl.h: Helper definitions for PCI controller support.
+ *
+ * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef PCI_IMPL_H
+#define PCI_IMPL_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/of_device.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/iommu.h>
+
+/* The abstraction used here is that there are PCI controllers,
+ * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules
+ * underneath.  Each PCI bus module uses an IOMMU (shared by both
+ * PBMs of a controller, or per-PBM), and if a streaming buffer
+ * is present, each PCI bus module has it's own. (ie. the IOMMU
+ * might be shared between PBMs, the STC is never shared)
+ * Furthermore, each PCI bus module controls it's own autonomous
+ * PCI bus.
+ */
+
+#define PCI_STC_FLUSHFLAG_INIT(STC) \
+	(*((STC)->strbuf_flushflag) = 0UL)
+#define PCI_STC_FLUSHFLAG_SET(STC) \
+	(*((STC)->strbuf_flushflag) != 0UL)
+
+#ifdef CONFIG_PCI_MSI
+struct pci_pbm_info;
+struct sparc64_msiq_ops {
+	int (*get_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+			unsigned long *head);
+	int (*dequeue_msi)(struct pci_pbm_info *pbm, unsigned long msiqid,
+			   unsigned long *head, unsigned long *msi);
+	int (*set_head)(struct pci_pbm_info *pbm, unsigned long msiqid,
+			unsigned long head);
+	int (*msi_setup)(struct pci_pbm_info *pbm, unsigned long msiqid,
+			 unsigned long msi, int is_msi64);
+	int (*msi_teardown)(struct pci_pbm_info *pbm, unsigned long msi);
+	int (*msiq_alloc)(struct pci_pbm_info *pbm);
+	void (*msiq_free)(struct pci_pbm_info *pbm);
+	int (*msiq_build_irq)(struct pci_pbm_info *pbm, unsigned long msiqid,
+			      unsigned long devino);
+};
+
+void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+			  const struct sparc64_msiq_ops *ops);
+
+struct sparc64_msiq_cookie {
+	struct pci_pbm_info *pbm;
+	unsigned long msiqid;
+};
+#endif
+
+struct pci_pbm_info {
+	struct pci_pbm_info		*next;
+	struct pci_pbm_info		*sibling;
+	int				index;
+
+	/* Physical address base of controller registers. */
+	unsigned long			controller_regs;
+
+	/* Physical address base of PBM registers. */
+	unsigned long			pbm_regs;
+
+	/* Physical address of DMA sync register, if any.  */
+	unsigned long			sync_reg;
+
+	/* Opaque 32-bit system bus Port ID. */
+	u32				portid;
+
+	/* Opaque 32-bit handle used for hypervisor calls.  */
+	u32				devhandle;
+
+	/* Chipset version information. */
+	int				chip_type;
+#define PBM_CHIP_TYPE_SABRE		1
+#define PBM_CHIP_TYPE_PSYCHO		2
+#define PBM_CHIP_TYPE_SCHIZO		3
+#define PBM_CHIP_TYPE_SCHIZO_PLUS	4
+#define PBM_CHIP_TYPE_TOMATILLO		5
+	int				chip_version;
+	int				chip_revision;
+
+	/* Name used for top-level resources. */
+	const char			*name;
+
+	/* OBP specific information. */
+	struct platform_device		*op;
+	u64				ino_bitmap;
+
+	/* PBM I/O and Memory space resources. */
+	struct resource			io_space;
+	struct resource			mem_space;
+	struct resource			mem64_space;
+	struct resource			busn;
+	/* offset */
+	resource_size_t			io_offset;
+	resource_size_t			mem_offset;
+	resource_size_t			mem64_offset;
+
+	/* Base of PCI Config space, can be per-PBM or shared. */
+	unsigned long			config_space;
+
+	/* This will be 12 on PCI-E controllers, 8 elsewhere.  */
+	unsigned long			config_space_reg_bits;
+
+	unsigned long			pci_afsr;
+	unsigned long			pci_afar;
+	unsigned long			pci_csr;
+
+	/* State of 66MHz capabilities on this PBM. */
+	int				is_66mhz_capable;
+	int				all_devs_66mhz;
+
+#ifdef CONFIG_PCI_MSI
+	/* MSI info.  */
+	u32				msiq_num;
+	u32				msiq_ent_count;
+	u32				msiq_first;
+	u32				msiq_first_devino;
+	u32				msiq_rotor;
+	struct sparc64_msiq_cookie	*msiq_irq_cookies;
+	u32				msi_num;
+	u32				msi_first;
+	u32				msi_data_mask;
+	u32				msix_data_width;
+	u64				msi32_start;
+	u64				msi64_start;
+	u32				msi32_len;
+	u32				msi64_len;
+	void				*msi_queues;
+	unsigned long			*msi_bitmap;
+	unsigned int			*msi_irq_table;
+	int (*setup_msi_irq)(unsigned int *irq_p, struct pci_dev *pdev,
+			     struct msi_desc *entry);
+	void (*teardown_msi_irq)(unsigned int irq, struct pci_dev *pdev);
+	const struct sparc64_msiq_ops	*msi_ops;
+#endif /* !(CONFIG_PCI_MSI) */
+
+	/* This PBM's streaming buffer. */
+	struct strbuf			stc;
+
+	/* IOMMU state, potentially shared by both PBM segments. */
+	struct iommu			*iommu;
+
+	/* Now things for the actual PCI bus probes. */
+	unsigned int			pci_first_busno;
+	unsigned int			pci_last_busno;
+	struct pci_bus			*pci_bus;
+	struct pci_ops			*pci_ops;
+
+	int				numa_node;
+};
+
+extern struct pci_pbm_info *pci_pbm_root;
+
+extern int pci_num_pbms;
+
+/* PCI bus scanning and fixup support. */
+void pci_get_pbm_props(struct pci_pbm_info *pbm);
+struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm,
+				 struct device *parent);
+void pci_determine_mem_io_space(struct pci_pbm_info *pbm);
+
+/* Error reporting support. */
+void pci_scan_for_target_abort(struct pci_pbm_info *, struct pci_bus *);
+void pci_scan_for_master_abort(struct pci_pbm_info *, struct pci_bus *);
+void pci_scan_for_parity_error(struct pci_pbm_info *, struct pci_bus *);
+
+/* Configuration space access. */
+void pci_config_read8(u8 *addr, u8 *ret);
+void pci_config_read16(u16 *addr, u16 *ret);
+void pci_config_read32(u32 *addr, u32 *ret);
+void pci_config_write8(u8 *addr, u8 val);
+void pci_config_write16(u16 *addr, u16 val);
+void pci_config_write32(u32 *addr, u32 val);
+
+extern struct pci_ops sun4u_pci_ops;
+extern struct pci_ops sun4v_pci_ops;
+
+extern volatile int pci_poke_in_progress;
+extern volatile int pci_poke_cpu;
+extern volatile int pci_poke_faulted;
+
+#endif /* !(PCI_IMPL_H) */
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
new file mode 100644
index 0000000..fb5899c
--- /dev/null
+++ b/arch/sparc/kernel/pci_msi.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci_msi.c: Sparc64 MSI support common layer.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+
+#include "pci_impl.h"
+
+static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie)
+{
+	struct sparc64_msiq_cookie *msiq_cookie = cookie;
+	struct pci_pbm_info *pbm = msiq_cookie->pbm;
+	unsigned long msiqid = msiq_cookie->msiqid;
+	const struct sparc64_msiq_ops *ops;
+	unsigned long orig_head, head;
+	int err;
+
+	ops = pbm->msi_ops;
+
+	err = ops->get_head(pbm, msiqid, &head);
+	if (unlikely(err < 0))
+		goto err_get_head;
+
+	orig_head = head;
+	for (;;) {
+		unsigned long msi;
+
+		err = ops->dequeue_msi(pbm, msiqid, &head, &msi);
+		if (likely(err > 0)) {
+			unsigned int irq;
+
+			irq = pbm->msi_irq_table[msi - pbm->msi_first];
+			generic_handle_irq(irq);
+		}
+
+		if (unlikely(err < 0))
+			goto err_dequeue;
+
+		if (err == 0)
+			break;
+	}
+	if (likely(head != orig_head)) {
+		err = ops->set_head(pbm, msiqid, head);
+		if (unlikely(err < 0))
+			goto err_set_head;
+	}
+	return IRQ_HANDLED;
+
+err_get_head:
+	printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n",
+	       msiqid, err);
+	goto err_out;
+
+err_dequeue:
+	printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] "
+	       "gives error %d\n",
+	       head, msiqid, err);
+	goto err_out;
+
+err_set_head:
+	printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] "
+	       "gives error %d\n",
+	       head, msiqid, err);
+	goto err_out;
+
+err_out:
+	return IRQ_NONE;
+}
+
+static u32 pick_msiq(struct pci_pbm_info *pbm)
+{
+	static DEFINE_SPINLOCK(rotor_lock);
+	unsigned long flags;
+	u32 ret, rotor;
+
+	spin_lock_irqsave(&rotor_lock, flags);
+
+	rotor = pbm->msiq_rotor;
+	ret = pbm->msiq_first + rotor;
+
+	if (++rotor >= pbm->msiq_num)
+		rotor = 0;
+	pbm->msiq_rotor = rotor;
+
+	spin_unlock_irqrestore(&rotor_lock, flags);
+
+	return ret;
+}
+
+
+static int alloc_msi(struct pci_pbm_info *pbm)
+{
+	int i;
+
+	for (i = 0; i < pbm->msi_num; i++) {
+		if (!test_and_set_bit(i, pbm->msi_bitmap))
+			return i + pbm->msi_first;
+	}
+
+	return -ENOENT;
+}
+
+static void free_msi(struct pci_pbm_info *pbm, int msi_num)
+{
+	msi_num -= pbm->msi_first;
+	clear_bit(msi_num, pbm->msi_bitmap);
+}
+
+static struct irq_chip msi_irq = {
+	.name		= "PCI-MSI",
+	.irq_mask	= pci_msi_mask_irq,
+	.irq_unmask	= pci_msi_unmask_irq,
+	.irq_enable	= pci_msi_unmask_irq,
+	.irq_disable	= pci_msi_mask_irq,
+	/* XXX affinity XXX */
+};
+
+static int sparc64_setup_msi_irq(unsigned int *irq_p,
+				 struct pci_dev *pdev,
+				 struct msi_desc *entry)
+{
+	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+	const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+	struct msi_msg msg;
+	int msi, err;
+	u32 msiqid;
+
+	*irq_p = irq_alloc(0, 0);
+	err = -ENOMEM;
+	if (!*irq_p)
+		goto out_err;
+
+	irq_set_chip_and_handler_name(*irq_p, &msi_irq, handle_simple_irq,
+				      "MSI");
+
+	err = alloc_msi(pbm);
+	if (unlikely(err < 0))
+		goto out_irq_free;
+
+	msi = err;
+
+	msiqid = pick_msiq(pbm);
+
+	err = ops->msi_setup(pbm, msiqid, msi,
+			     (entry->msi_attrib.is_64 ? 1 : 0));
+	if (err)
+		goto out_msi_free;
+
+	pbm->msi_irq_table[msi - pbm->msi_first] = *irq_p;
+
+	if (entry->msi_attrib.is_64) {
+		msg.address_hi = pbm->msi64_start >> 32;
+		msg.address_lo = pbm->msi64_start & 0xffffffff;
+	} else {
+		msg.address_hi = 0;
+		msg.address_lo = pbm->msi32_start;
+	}
+	msg.data = msi;
+
+	irq_set_msi_desc(*irq_p, entry);
+	pci_write_msi_msg(*irq_p, &msg);
+
+	return 0;
+
+out_msi_free:
+	free_msi(pbm, msi);
+
+out_irq_free:
+	irq_set_chip(*irq_p, NULL);
+	irq_free(*irq_p);
+	*irq_p = 0;
+
+out_err:
+	return err;
+}
+
+static void sparc64_teardown_msi_irq(unsigned int irq,
+				     struct pci_dev *pdev)
+{
+	struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
+	const struct sparc64_msiq_ops *ops = pbm->msi_ops;
+	unsigned int msi_num;
+	int i, err;
+
+	for (i = 0; i < pbm->msi_num; i++) {
+		if (pbm->msi_irq_table[i] == irq)
+			break;
+	}
+	if (i >= pbm->msi_num) {
+		pci_err(pdev, "%s: teardown: No MSI for irq %u\n", pbm->name,
+			irq);
+		return;
+	}
+
+	msi_num = pbm->msi_first + i;
+	pbm->msi_irq_table[i] = ~0U;
+
+	err = ops->msi_teardown(pbm, msi_num);
+	if (err) {
+		pci_err(pdev, "%s: teardown: ops->teardown() on MSI %u, "
+			"irq %u, gives error %d\n", pbm->name, msi_num, irq,
+			err);
+		return;
+	}
+
+	free_msi(pbm, msi_num);
+
+	irq_set_chip(irq, NULL);
+	irq_free(irq);
+}
+
+static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
+{
+	unsigned long size, bits_per_ulong;
+
+	bits_per_ulong = sizeof(unsigned long) * 8;
+	size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
+	size /= 8;
+	BUG_ON(size % sizeof(unsigned long));
+
+	pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
+	if (!pbm->msi_bitmap)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void msi_bitmap_free(struct pci_pbm_info *pbm)
+{
+	kfree(pbm->msi_bitmap);
+	pbm->msi_bitmap = NULL;
+}
+
+static int msi_table_alloc(struct pci_pbm_info *pbm)
+{
+	int size, i;
+
+	size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie);
+	pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL);
+	if (!pbm->msiq_irq_cookies)
+		return -ENOMEM;
+
+	for (i = 0; i < pbm->msiq_num; i++) {
+		struct sparc64_msiq_cookie *p;
+
+		p = &pbm->msiq_irq_cookies[i];
+		p->pbm = pbm;
+		p->msiqid = pbm->msiq_first + i;
+	}
+
+	size = pbm->msi_num * sizeof(unsigned int);
+	pbm->msi_irq_table = kzalloc(size, GFP_KERNEL);
+	if (!pbm->msi_irq_table) {
+		kfree(pbm->msiq_irq_cookies);
+		pbm->msiq_irq_cookies = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void msi_table_free(struct pci_pbm_info *pbm)
+{
+	kfree(pbm->msiq_irq_cookies);
+	pbm->msiq_irq_cookies = NULL;
+
+	kfree(pbm->msi_irq_table);
+	pbm->msi_irq_table = NULL;
+}
+
+static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
+				 const struct sparc64_msiq_ops *ops,
+				 unsigned long msiqid,
+				 unsigned long devino)
+{
+	int irq = ops->msiq_build_irq(pbm, msiqid, devino);
+	int err, nid;
+
+	if (irq < 0)
+		return irq;
+
+	nid = pbm->numa_node;
+	if (nid != -1) {
+		cpumask_t numa_mask;
+
+		cpumask_copy(&numa_mask, cpumask_of_node(nid));
+		irq_set_affinity(irq, &numa_mask);
+	}
+	err = request_irq(irq, sparc64_msiq_interrupt, 0,
+			  "MSIQ",
+			  &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm,
+				      const struct sparc64_msiq_ops *ops)
+{
+	int i;
+
+	for (i = 0; i < pbm->msiq_num; i++) {
+		unsigned long msiqid = i + pbm->msiq_first;
+		unsigned long devino = i + pbm->msiq_first_devino;
+		int err;
+
+		err = bringup_one_msi_queue(pbm, ops, msiqid, devino);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
+			  const struct sparc64_msiq_ops *ops)
+{
+	const u32 *val;
+	int len;
+
+	val = of_get_property(pbm->op->dev.of_node, "#msi-eqs", &len);
+	if (!val || len != 4)
+		goto no_msi;
+	pbm->msiq_num = *val;
+	if (pbm->msiq_num) {
+		const struct msiq_prop {
+			u32 first_msiq;
+			u32 num_msiq;
+			u32 first_devino;
+		} *mqp;
+		const struct msi_range_prop {
+			u32 first_msi;
+			u32 num_msi;
+		} *mrng;
+		const struct addr_range_prop {
+			u32 msi32_high;
+			u32 msi32_low;
+			u32 msi32_len;
+			u32 msi64_high;
+			u32 msi64_low;
+			u32 msi64_len;
+		} *arng;
+
+		val = of_get_property(pbm->op->dev.of_node, "msi-eq-size", &len);
+		if (!val || len != 4)
+			goto no_msi;
+
+		pbm->msiq_ent_count = *val;
+
+		mqp = of_get_property(pbm->op->dev.of_node,
+				      "msi-eq-to-devino", &len);
+		if (!mqp)
+			mqp = of_get_property(pbm->op->dev.of_node,
+					      "msi-eq-devino", &len);
+		if (!mqp || len != sizeof(struct msiq_prop))
+			goto no_msi;
+
+		pbm->msiq_first = mqp->first_msiq;
+		pbm->msiq_first_devino = mqp->first_devino;
+
+		val = of_get_property(pbm->op->dev.of_node, "#msi", &len);
+		if (!val || len != 4)
+			goto no_msi;
+		pbm->msi_num = *val;
+
+		mrng = of_get_property(pbm->op->dev.of_node, "msi-ranges", &len);
+		if (!mrng || len != sizeof(struct msi_range_prop))
+			goto no_msi;
+		pbm->msi_first = mrng->first_msi;
+
+		val = of_get_property(pbm->op->dev.of_node, "msi-data-mask", &len);
+		if (!val || len != 4)
+			goto no_msi;
+		pbm->msi_data_mask = *val;
+
+		val = of_get_property(pbm->op->dev.of_node, "msix-data-width", &len);
+		if (!val || len != 4)
+			goto no_msi;
+		pbm->msix_data_width = *val;
+
+		arng = of_get_property(pbm->op->dev.of_node, "msi-address-ranges",
+				       &len);
+		if (!arng || len != sizeof(struct addr_range_prop))
+			goto no_msi;
+		pbm->msi32_start = ((u64)arng->msi32_high << 32) |
+			(u64) arng->msi32_low;
+		pbm->msi64_start = ((u64)arng->msi64_high << 32) |
+			(u64) arng->msi64_low;
+		pbm->msi32_len = arng->msi32_len;
+		pbm->msi64_len = arng->msi64_len;
+
+		if (msi_bitmap_alloc(pbm))
+			goto no_msi;
+
+		if (msi_table_alloc(pbm)) {
+			msi_bitmap_free(pbm);
+			goto no_msi;
+		}
+
+		if (ops->msiq_alloc(pbm)) {
+			msi_table_free(pbm);
+			msi_bitmap_free(pbm);
+			goto no_msi;
+		}
+
+		if (sparc64_bringup_msi_queues(pbm, ops)) {
+			ops->msiq_free(pbm);
+			msi_table_free(pbm);
+			msi_bitmap_free(pbm);
+			goto no_msi;
+		}
+
+		printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
+		       "devino[0x%x]\n",
+		       pbm->name,
+		       pbm->msiq_first, pbm->msiq_num,
+		       pbm->msiq_ent_count,
+		       pbm->msiq_first_devino);
+		printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
+		       "width[%u]\n",
+		       pbm->name,
+		       pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
+		       pbm->msix_data_width);
+		printk(KERN_INFO "%s: MSI addr32[0x%llx:0x%x] "
+		       "addr64[0x%llx:0x%x]\n",
+		       pbm->name,
+		       pbm->msi32_start, pbm->msi32_len,
+		       pbm->msi64_start, pbm->msi64_len);
+		printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
+		       pbm->name,
+		       __pa(pbm->msi_queues));
+
+		pbm->msi_ops = ops;
+		pbm->setup_msi_irq = sparc64_setup_msi_irq;
+		pbm->teardown_msi_irq = sparc64_teardown_msi_irq;
+	}
+	return;
+
+no_msi:
+	pbm->msiq_num = 0;
+	printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
+}
diff --git a/arch/sparc/kernel/pci_psycho.c b/arch/sparc/kernel/pci_psycho.c
new file mode 100644
index 0000000..f413371
--- /dev/null
+++ b/arch/sparc/kernel/pci_psycho.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci_psycho.c: PSYCHO/U2P specific PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998, 1999 Eddie C. Dost   (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek   (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/starfire.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "psycho_common.h"
+
+#define DRIVER_NAME	"psycho"
+#define PFX		DRIVER_NAME ": "
+
+/* Misc. PSYCHO PCI controller register offsets and definitions. */
+#define PSYCHO_CONTROL		0x0010UL
+#define  PSYCHO_CONTROL_IMPL	 0xf000000000000000UL /* Implementation of this PSYCHO*/
+#define  PSYCHO_CONTROL_VER	 0x0f00000000000000UL /* Version of this PSYCHO       */
+#define  PSYCHO_CONTROL_MID	 0x00f8000000000000UL /* UPA Module ID of PSYCHO      */
+#define  PSYCHO_CONTROL_IGN	 0x0007c00000000000UL /* Interrupt Group Number       */
+#define  PSYCHO_CONTROL_RESV     0x00003ffffffffff0UL /* Reserved                     */
+#define  PSYCHO_CONTROL_APCKEN	 0x0000000000000008UL /* Address Parity Check Enable  */
+#define  PSYCHO_CONTROL_APERR	 0x0000000000000004UL /* Incoming System Addr Parerr  */
+#define  PSYCHO_CONTROL_IAP	 0x0000000000000002UL /* Invert UPA Parity            */
+#define  PSYCHO_CONTROL_MODE	 0x0000000000000001UL /* PSYCHO clock mode            */
+#define PSYCHO_PCIA_CTRL	0x2000UL
+#define PSYCHO_PCIB_CTRL	0x4000UL
+#define  PSYCHO_PCICTRL_RESV1	 0xfffffff000000000UL /* Reserved                     */
+#define  PSYCHO_PCICTRL_SBH_ERR	 0x0000000800000000UL /* Streaming byte hole error    */
+#define  PSYCHO_PCICTRL_SERR	 0x0000000400000000UL /* SERR signal asserted         */
+#define  PSYCHO_PCICTRL_SPEED	 0x0000000200000000UL /* PCI speed (1 is U2P clock)   */
+#define  PSYCHO_PCICTRL_RESV2	 0x00000001ffc00000UL /* Reserved                     */
+#define  PSYCHO_PCICTRL_ARB_PARK 0x0000000000200000UL /* PCI arbitration parking      */
+#define  PSYCHO_PCICTRL_RESV3	 0x00000000001ff800UL /* Reserved                     */
+#define  PSYCHO_PCICTRL_SBH_INT	 0x0000000000000400UL /* Streaming byte hole int enab */
+#define  PSYCHO_PCICTRL_WEN	 0x0000000000000200UL /* Power Mgmt Wake Enable       */
+#define  PSYCHO_PCICTRL_EEN	 0x0000000000000100UL /* PCI Error Interrupt Enable   */
+#define  PSYCHO_PCICTRL_RESV4	 0x00000000000000c0UL /* Reserved                     */
+#define  PSYCHO_PCICTRL_AEN	 0x000000000000003fUL /* PCI DVMA Arbitration Enable  */
+
+/* PSYCHO error handling support. */
+
+/* Helper function of IOMMU error checking, which checks out
+ * the state of the streaming buffers.  The IOMMU lock is
+ * held when this is called.
+ *
+ * For the PCI error case we know which PBM (and thus which
+ * streaming buffer) caused the error, but for the uncorrectable
+ * error case we do not.  So we always check both streaming caches.
+ */
+#define PSYCHO_STRBUF_CONTROL_A 0x2800UL
+#define PSYCHO_STRBUF_CONTROL_B 0x4800UL
+#define  PSYCHO_STRBUF_CTRL_LPTR    0x00000000000000f0UL /* LRU Lock Pointer */
+#define  PSYCHO_STRBUF_CTRL_LENAB   0x0000000000000008UL /* LRU Lock Enable */
+#define  PSYCHO_STRBUF_CTRL_RRDIS   0x0000000000000004UL /* Rerun Disable */
+#define  PSYCHO_STRBUF_CTRL_DENAB   0x0000000000000002UL /* Diagnostic Mode Enable */
+#define  PSYCHO_STRBUF_CTRL_ENAB    0x0000000000000001UL /* Streaming Buffer Enable */
+#define PSYCHO_STRBUF_FLUSH_A   0x2808UL
+#define PSYCHO_STRBUF_FLUSH_B   0x4808UL
+#define PSYCHO_STRBUF_FSYNC_A   0x2810UL
+#define PSYCHO_STRBUF_FSYNC_B   0x4810UL
+#define PSYCHO_STC_DATA_A	0xb000UL
+#define PSYCHO_STC_DATA_B	0xc000UL
+#define PSYCHO_STC_ERR_A	0xb400UL
+#define PSYCHO_STC_ERR_B	0xc400UL
+#define PSYCHO_STC_TAG_A	0xb800UL
+#define PSYCHO_STC_TAG_B	0xc800UL
+#define PSYCHO_STC_LINE_A	0xb900UL
+#define PSYCHO_STC_LINE_B	0xc900UL
+
+/* When an Uncorrectable Error or a PCI Error happens, we
+ * interrogate the IOMMU state to see if it is the cause.
+ */
+#define PSYCHO_IOMMU_CONTROL	0x0200UL
+#define  PSYCHO_IOMMU_CTRL_RESV     0xfffffffff9000000UL /* Reserved                      */
+#define  PSYCHO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status      */
+#define  PSYCHO_IOMMU_CTRL_XLTEERR  0x0000000001000000UL /* Translation Error encountered */
+#define  PSYCHO_IOMMU_CTRL_LCKEN    0x0000000000800000UL /* Enable translation locking    */
+#define  PSYCHO_IOMMU_CTRL_LCKPTR   0x0000000000780000UL /* Translation lock pointer      */
+#define  PSYCHO_IOMMU_CTRL_TSBSZ    0x0000000000070000UL /* TSB Size                      */
+#define  PSYCHO_IOMMU_TSBSZ_1K      0x0000000000000000UL /* TSB Table 1024 8-byte entries */
+#define  PSYCHO_IOMMU_TSBSZ_2K      0x0000000000010000UL /* TSB Table 2048 8-byte entries */
+#define  PSYCHO_IOMMU_TSBSZ_4K      0x0000000000020000UL /* TSB Table 4096 8-byte entries */
+#define  PSYCHO_IOMMU_TSBSZ_8K      0x0000000000030000UL /* TSB Table 8192 8-byte entries */
+#define  PSYCHO_IOMMU_TSBSZ_16K     0x0000000000040000UL /* TSB Table 16k 8-byte entries  */
+#define  PSYCHO_IOMMU_TSBSZ_32K     0x0000000000050000UL /* TSB Table 32k 8-byte entries  */
+#define  PSYCHO_IOMMU_TSBSZ_64K     0x0000000000060000UL /* TSB Table 64k 8-byte entries  */
+#define  PSYCHO_IOMMU_TSBSZ_128K    0x0000000000070000UL /* TSB Table 128k 8-byte entries */
+#define  PSYCHO_IOMMU_CTRL_RESV2    0x000000000000fff8UL /* Reserved                      */
+#define  PSYCHO_IOMMU_CTRL_TBWSZ    0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */
+#define  PSYCHO_IOMMU_CTRL_DENAB    0x0000000000000002UL /* Diagnostic mode enable        */
+#define  PSYCHO_IOMMU_CTRL_ENAB     0x0000000000000001UL /* IOMMU Enable                  */
+#define PSYCHO_IOMMU_TSBBASE	0x0208UL
+#define PSYCHO_IOMMU_FLUSH	0x0210UL
+#define PSYCHO_IOMMU_TAG	0xa580UL
+#define PSYCHO_IOMMU_DATA	0xa600UL
+
+/* Uncorrectable Errors.  Cause of the error and the address are
+ * recorded in the UE_AFSR and UE_AFAR of PSYCHO.  They are errors
+ * relating to UPA interface transactions.
+ */
+#define PSYCHO_UE_AFSR	0x0030UL
+#define  PSYCHO_UEAFSR_PPIO	0x8000000000000000UL /* Primary PIO is cause         */
+#define  PSYCHO_UEAFSR_PDRD	0x4000000000000000UL /* Primary DVMA read is cause   */
+#define  PSYCHO_UEAFSR_PDWR	0x2000000000000000UL /* Primary DVMA write is cause  */
+#define  PSYCHO_UEAFSR_SPIO	0x1000000000000000UL /* Secondary PIO is cause       */
+#define  PSYCHO_UEAFSR_SDRD	0x0800000000000000UL /* Secondary DVMA read is cause */
+#define  PSYCHO_UEAFSR_SDWR	0x0400000000000000UL /* Secondary DVMA write is cause*/
+#define  PSYCHO_UEAFSR_RESV1	0x03ff000000000000UL /* Reserved                     */
+#define  PSYCHO_UEAFSR_BMSK	0x0000ffff00000000UL /* Bytemask of failed transfer  */
+#define  PSYCHO_UEAFSR_DOFF	0x00000000e0000000UL /* Doubleword Offset            */
+#define  PSYCHO_UEAFSR_MID	0x000000001f000000UL /* UPA MID causing the fault    */
+#define  PSYCHO_UEAFSR_BLK	0x0000000000800000UL /* Trans was block operation    */
+#define  PSYCHO_UEAFSR_RESV2	0x00000000007fffffUL /* Reserved                     */
+#define PSYCHO_UE_AFAR	0x0038UL
+
+static irqreturn_t psycho_ue_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	unsigned long afsr_reg = pbm->controller_regs + PSYCHO_UE_AFSR;
+	unsigned long afar_reg = pbm->controller_regs + PSYCHO_UE_AFAR;
+	unsigned long afsr, afar, error_bits;
+	int reported;
+
+	/* Latch uncorrectable error status. */
+	afar = upa_readq(afar_reg);
+	afsr = upa_readq(afsr_reg);
+
+	/* Clear the primary/secondary error status bits. */
+	error_bits = afsr &
+		(PSYCHO_UEAFSR_PPIO | PSYCHO_UEAFSR_PDRD | PSYCHO_UEAFSR_PDWR |
+		 PSYCHO_UEAFSR_SPIO | PSYCHO_UEAFSR_SDRD | PSYCHO_UEAFSR_SDWR);
+	if (!error_bits)
+		return IRQ_NONE;
+	upa_writeq(error_bits, afsr_reg);
+
+	/* Log the error. */
+	printk("%s: Uncorrectable Error, primary error type[%s]\n",
+	       pbm->name,
+	       (((error_bits & PSYCHO_UEAFSR_PPIO) ?
+		 "PIO" :
+		 ((error_bits & PSYCHO_UEAFSR_PDRD) ?
+		  "DMA Read" :
+		  ((error_bits & PSYCHO_UEAFSR_PDWR) ?
+		   "DMA Write" : "???")))));
+	printk("%s: bytemask[%04lx] dword_offset[%lx] UPA_MID[%02lx] was_block(%d)\n",
+	       pbm->name,
+	       (afsr & PSYCHO_UEAFSR_BMSK) >> 32UL,
+	       (afsr & PSYCHO_UEAFSR_DOFF) >> 29UL,
+	       (afsr & PSYCHO_UEAFSR_MID) >> 24UL,
+	       ((afsr & PSYCHO_UEAFSR_BLK) ? 1 : 0));
+	printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+	printk("%s: UE Secondary errors [", pbm->name);
+	reported = 0;
+	if (afsr & PSYCHO_UEAFSR_SPIO) {
+		reported++;
+		printk("(PIO)");
+	}
+	if (afsr & PSYCHO_UEAFSR_SDRD) {
+		reported++;
+		printk("(DMA Read)");
+	}
+	if (afsr & PSYCHO_UEAFSR_SDWR) {
+		reported++;
+		printk("(DMA Write)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	/* Interrogate both IOMMUs for error status. */
+	psycho_check_iommu_error(pbm, afsr, afar, UE_ERR);
+	if (pbm->sibling)
+		psycho_check_iommu_error(pbm->sibling, afsr, afar, UE_ERR);
+
+	return IRQ_HANDLED;
+}
+
+/* Correctable Errors. */
+#define PSYCHO_CE_AFSR	0x0040UL
+#define  PSYCHO_CEAFSR_PPIO	0x8000000000000000UL /* Primary PIO is cause         */
+#define  PSYCHO_CEAFSR_PDRD	0x4000000000000000UL /* Primary DVMA read is cause   */
+#define  PSYCHO_CEAFSR_PDWR	0x2000000000000000UL /* Primary DVMA write is cause  */
+#define  PSYCHO_CEAFSR_SPIO	0x1000000000000000UL /* Secondary PIO is cause       */
+#define  PSYCHO_CEAFSR_SDRD	0x0800000000000000UL /* Secondary DVMA read is cause */
+#define  PSYCHO_CEAFSR_SDWR	0x0400000000000000UL /* Secondary DVMA write is cause*/
+#define  PSYCHO_CEAFSR_RESV1	0x0300000000000000UL /* Reserved                     */
+#define  PSYCHO_CEAFSR_ESYND	0x00ff000000000000UL /* Syndrome Bits                */
+#define  PSYCHO_CEAFSR_BMSK	0x0000ffff00000000UL /* Bytemask of failed transfer  */
+#define  PSYCHO_CEAFSR_DOFF	0x00000000e0000000UL /* Double Offset                */
+#define  PSYCHO_CEAFSR_MID	0x000000001f000000UL /* UPA MID causing the fault    */
+#define  PSYCHO_CEAFSR_BLK	0x0000000000800000UL /* Trans was block operation    */
+#define  PSYCHO_CEAFSR_RESV2	0x00000000007fffffUL /* Reserved                     */
+#define PSYCHO_CE_AFAR	0x0040UL
+
+static irqreturn_t psycho_ce_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	unsigned long afsr_reg = pbm->controller_regs + PSYCHO_CE_AFSR;
+	unsigned long afar_reg = pbm->controller_regs + PSYCHO_CE_AFAR;
+	unsigned long afsr, afar, error_bits;
+	int reported;
+
+	/* Latch error status. */
+	afar = upa_readq(afar_reg);
+	afsr = upa_readq(afsr_reg);
+
+	/* Clear primary/secondary error status bits. */
+	error_bits = afsr &
+		(PSYCHO_CEAFSR_PPIO | PSYCHO_CEAFSR_PDRD | PSYCHO_CEAFSR_PDWR |
+		 PSYCHO_CEAFSR_SPIO | PSYCHO_CEAFSR_SDRD | PSYCHO_CEAFSR_SDWR);
+	if (!error_bits)
+		return IRQ_NONE;
+	upa_writeq(error_bits, afsr_reg);
+
+	/* Log the error. */
+	printk("%s: Correctable Error, primary error type[%s]\n",
+	       pbm->name,
+	       (((error_bits & PSYCHO_CEAFSR_PPIO) ?
+		 "PIO" :
+		 ((error_bits & PSYCHO_CEAFSR_PDRD) ?
+		  "DMA Read" :
+		  ((error_bits & PSYCHO_CEAFSR_PDWR) ?
+		   "DMA Write" : "???")))));
+
+	/* XXX Use syndrome and afar to print out module string just like
+	 * XXX UDB CE trap handler does... -DaveM
+	 */
+	printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] "
+	       "UPA_MID[%02lx] was_block(%d)\n",
+	       pbm->name,
+	       (afsr & PSYCHO_CEAFSR_ESYND) >> 48UL,
+	       (afsr & PSYCHO_CEAFSR_BMSK) >> 32UL,
+	       (afsr & PSYCHO_CEAFSR_DOFF) >> 29UL,
+	       (afsr & PSYCHO_CEAFSR_MID) >> 24UL,
+	       ((afsr & PSYCHO_CEAFSR_BLK) ? 1 : 0));
+	printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+	printk("%s: CE Secondary errors [", pbm->name);
+	reported = 0;
+	if (afsr & PSYCHO_CEAFSR_SPIO) {
+		reported++;
+		printk("(PIO)");
+	}
+	if (afsr & PSYCHO_CEAFSR_SDRD) {
+		reported++;
+		printk("(DMA Read)");
+	}
+	if (afsr & PSYCHO_CEAFSR_SDWR) {
+		reported++;
+		printk("(DMA Write)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	return IRQ_HANDLED;
+}
+
+/* PCI Errors.  They are signalled by the PCI bus module since they
+ * are associated with a specific bus segment.
+ */
+#define PSYCHO_PCI_AFSR_A	0x2010UL
+#define PSYCHO_PCI_AFSR_B	0x4010UL
+#define PSYCHO_PCI_AFAR_A	0x2018UL
+#define PSYCHO_PCI_AFAR_B	0x4018UL
+
+/* XXX What about PowerFail/PowerManagement??? -DaveM */
+#define PSYCHO_ECC_CTRL		0x0020
+#define  PSYCHO_ECCCTRL_EE	 0x8000000000000000UL /* Enable ECC Checking */
+#define  PSYCHO_ECCCTRL_UE	 0x4000000000000000UL /* Enable UE Interrupts */
+#define  PSYCHO_ECCCTRL_CE	 0x2000000000000000UL /* Enable CE INterrupts */
+static void psycho_register_error_handlers(struct pci_pbm_info *pbm)
+{
+	struct platform_device *op = of_find_device_by_node(pbm->op->dev.of_node);
+	unsigned long base = pbm->controller_regs;
+	u64 tmp;
+	int err;
+
+	if (!op)
+		return;
+
+	/* Psycho interrupt property order is:
+	 * 0: PCIERR INO for this PBM
+	 * 1: UE ERR
+	 * 2: CE ERR
+	 * 3: POWER FAIL
+	 * 4: SPARE HARDWARE
+	 * 5: POWER MANAGEMENT
+	 */
+
+	if (op->archdata.num_irqs < 6)
+		return;
+
+	/* We really mean to ignore the return result here.  Two
+	 * PCI controller share the same interrupt numbers and
+	 * drive the same front-end hardware.
+	 */
+	err = request_irq(op->archdata.irqs[1], psycho_ue_intr, IRQF_SHARED,
+			  "PSYCHO_UE", pbm);
+	err = request_irq(op->archdata.irqs[2], psycho_ce_intr, IRQF_SHARED,
+			  "PSYCHO_CE", pbm);
+
+	/* This one, however, ought not to fail.  We can just warn
+	 * about it since the system can still operate properly even
+	 * if this fails.
+	 */
+	err = request_irq(op->archdata.irqs[0], psycho_pcierr_intr, IRQF_SHARED,
+			  "PSYCHO_PCIERR", pbm);
+	if (err)
+		printk(KERN_WARNING "%s: Could not register PCIERR, "
+		       "err=%d\n", pbm->name, err);
+
+	/* Enable UE and CE interrupts for controller. */
+	upa_writeq((PSYCHO_ECCCTRL_EE |
+		    PSYCHO_ECCCTRL_UE |
+		    PSYCHO_ECCCTRL_CE), base + PSYCHO_ECC_CTRL);
+
+	/* Enable PCI Error interrupts and clear error
+	 * bits for each PBM.
+	 */
+	tmp = upa_readq(base + PSYCHO_PCIA_CTRL);
+	tmp |= (PSYCHO_PCICTRL_SERR |
+		PSYCHO_PCICTRL_SBH_ERR |
+		PSYCHO_PCICTRL_EEN);
+	tmp &= ~(PSYCHO_PCICTRL_SBH_INT);
+	upa_writeq(tmp, base + PSYCHO_PCIA_CTRL);
+		     
+	tmp = upa_readq(base + PSYCHO_PCIB_CTRL);
+	tmp |= (PSYCHO_PCICTRL_SERR |
+		PSYCHO_PCICTRL_SBH_ERR |
+		PSYCHO_PCICTRL_EEN);
+	tmp &= ~(PSYCHO_PCICTRL_SBH_INT);
+	upa_writeq(tmp, base + PSYCHO_PCIB_CTRL);
+}
+
+/* PSYCHO boot time probing and initialization. */
+static void pbm_config_busmastering(struct pci_pbm_info *pbm)
+{
+	u8 *addr;
+
+	/* Set cache-line size to 64 bytes, this is actually
+	 * a nop but I do it for completeness.
+	 */
+	addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+					0, PCI_CACHE_LINE_SIZE);
+	pci_config_write8(addr, 64 / sizeof(u32));
+
+	/* Set PBM latency timer to 64 PCI clocks. */
+	addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+					0, PCI_LATENCY_TIMER);
+	pci_config_write8(addr, 64);
+}
+
+static void psycho_scan_bus(struct pci_pbm_info *pbm,
+			    struct device *parent)
+{
+	pbm_config_busmastering(pbm);
+	pbm->is_66mhz_capable = 0;
+	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+
+	/* After the PCI bus scan is complete, we can register
+	 * the error interrupt handlers.
+	 */
+	psycho_register_error_handlers(pbm);
+}
+
+#define PSYCHO_IRQ_RETRY	0x1a00UL
+#define PSYCHO_PCIA_DIAG	0x2020UL
+#define PSYCHO_PCIB_DIAG	0x4020UL
+#define  PSYCHO_PCIDIAG_RESV	 0xffffffffffffff80UL /* Reserved                     */
+#define  PSYCHO_PCIDIAG_DRETRY	 0x0000000000000040UL /* Disable retry limit          */
+#define  PSYCHO_PCIDIAG_DISYNC	 0x0000000000000020UL /* Disable DMA wr / irq sync    */
+#define  PSYCHO_PCIDIAG_DDWSYNC	 0x0000000000000010UL /* Disable DMA wr / PIO rd sync */
+#define  PSYCHO_PCIDIAG_IDDPAR	 0x0000000000000008UL /* Invert DMA data parity       */
+#define  PSYCHO_PCIDIAG_IPDPAR	 0x0000000000000004UL /* Invert PIO data parity       */
+#define  PSYCHO_PCIDIAG_IPAPAR	 0x0000000000000002UL /* Invert PIO address parity    */
+#define  PSYCHO_PCIDIAG_LPBACK	 0x0000000000000001UL /* Enable loopback mode         */
+
+static void psycho_controller_hwinit(struct pci_pbm_info *pbm)
+{
+	u64 tmp;
+
+	upa_writeq(5, pbm->controller_regs + PSYCHO_IRQ_RETRY);
+
+	/* Enable arbiter for all PCI slots. */
+	tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_CTRL);
+	tmp |= PSYCHO_PCICTRL_AEN;
+	upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_CTRL);
+
+	tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_CTRL);
+	tmp |= PSYCHO_PCICTRL_AEN;
+	upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_CTRL);
+
+	/* Disable DMA write / PIO read synchronization on
+	 * both PCI bus segments.
+	 * [ U2P Erratum 1243770, STP2223BGA data sheet ]
+	 */
+	tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIA_DIAG);
+	tmp |= PSYCHO_PCIDIAG_DDWSYNC;
+	upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIA_DIAG);
+
+	tmp = upa_readq(pbm->controller_regs + PSYCHO_PCIB_DIAG);
+	tmp |= PSYCHO_PCIDIAG_DDWSYNC;
+	upa_writeq(tmp, pbm->controller_regs + PSYCHO_PCIB_DIAG);
+}
+
+static void psycho_pbm_strbuf_init(struct pci_pbm_info *pbm,
+				   int is_pbm_a)
+{
+	unsigned long base = pbm->controller_regs;
+	u64 control;
+
+	if (is_pbm_a) {
+		pbm->stc.strbuf_control  = base + PSYCHO_STRBUF_CONTROL_A;
+		pbm->stc.strbuf_pflush   = base + PSYCHO_STRBUF_FLUSH_A;
+		pbm->stc.strbuf_fsync    = base + PSYCHO_STRBUF_FSYNC_A;
+		pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_A;
+		pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_A;
+		pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_A;
+	} else {
+		pbm->stc.strbuf_control  = base + PSYCHO_STRBUF_CONTROL_B;
+		pbm->stc.strbuf_pflush   = base + PSYCHO_STRBUF_FLUSH_B;
+		pbm->stc.strbuf_fsync    = base + PSYCHO_STRBUF_FSYNC_B;
+		pbm->stc.strbuf_err_stat = base + PSYCHO_STC_ERR_B;
+		pbm->stc.strbuf_tag_diag = base + PSYCHO_STC_TAG_B;
+		pbm->stc.strbuf_line_diag= base + PSYCHO_STC_LINE_B;
+	}
+	/* PSYCHO's streaming buffer lacks ctx flushing. */
+	pbm->stc.strbuf_ctxflush      = 0;
+	pbm->stc.strbuf_ctxmatch_base = 0;
+
+	pbm->stc.strbuf_flushflag = (volatile unsigned long *)
+		((((unsigned long)&pbm->stc.__flushflag_buf[0])
+		  + 63UL)
+		 & ~63UL);
+	pbm->stc.strbuf_flushflag_pa = (unsigned long)
+		__pa(pbm->stc.strbuf_flushflag);
+
+	/* Enable the streaming buffer.  We have to be careful
+	 * just in case OBP left it with LRU locking enabled.
+	 *
+	 * It is possible to control if PBM will be rerun on
+	 * line misses.  Currently I just retain whatever setting
+	 * OBP left us with.  All checks so far show it having
+	 * a value of zero.
+	 */
+#undef PSYCHO_STRBUF_RERUN_ENABLE
+#undef PSYCHO_STRBUF_RERUN_DISABLE
+	control = upa_readq(pbm->stc.strbuf_control);
+	control |= PSYCHO_STRBUF_CTRL_ENAB;
+	control &= ~(PSYCHO_STRBUF_CTRL_LENAB | PSYCHO_STRBUF_CTRL_LPTR);
+#ifdef PSYCHO_STRBUF_RERUN_ENABLE
+	control &= ~(PSYCHO_STRBUF_CTRL_RRDIS);
+#else
+#ifdef PSYCHO_STRBUF_RERUN_DISABLE
+	control |= PSYCHO_STRBUF_CTRL_RRDIS;
+#endif
+#endif
+	upa_writeq(control, pbm->stc.strbuf_control);
+
+	pbm->stc.strbuf_enabled = 1;
+}
+
+#define PSYCHO_IOSPACE_A	0x002000000UL
+#define PSYCHO_IOSPACE_B	0x002010000UL
+#define PSYCHO_IOSPACE_SIZE	0x00000ffffUL
+#define PSYCHO_MEMSPACE_A	0x100000000UL
+#define PSYCHO_MEMSPACE_B	0x180000000UL
+#define PSYCHO_MEMSPACE_SIZE	0x07fffffffUL
+
+static void psycho_pbm_init(struct pci_pbm_info *pbm,
+			    struct platform_device *op, int is_pbm_a)
+{
+	psycho_pbm_init_common(pbm, op, "PSYCHO", PBM_CHIP_TYPE_PSYCHO);
+	psycho_pbm_strbuf_init(pbm, is_pbm_a);
+	psycho_scan_bus(pbm, &op->dev);
+}
+
+static struct pci_pbm_info *psycho_find_sibling(u32 upa_portid)
+{
+	struct pci_pbm_info *pbm;
+
+	for (pbm = pci_pbm_root; pbm; pbm = pbm->next) {
+		if (pbm->portid == upa_portid)
+			return pbm;
+	}
+	return NULL;
+}
+
+#define PSYCHO_CONFIGSPACE	0x001000000UL
+
+static int psycho_probe(struct platform_device *op)
+{
+	const struct linux_prom64_registers *pr_regs;
+	struct device_node *dp = op->dev.of_node;
+	struct pci_pbm_info *pbm;
+	struct iommu *iommu;
+	int is_pbm_a, err;
+	u32 upa_portid;
+
+	upa_portid = of_getintprop_default(dp, "upa-portid", 0xff);
+
+	err = -ENOMEM;
+	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+	if (!pbm) {
+		printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+		goto out_err;
+	}
+
+	pbm->sibling = psycho_find_sibling(upa_portid);
+	if (pbm->sibling) {
+		iommu = pbm->sibling->iommu;
+	} else {
+		iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+		if (!iommu) {
+			printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+			goto out_free_controller;
+		}
+	}
+
+	pbm->iommu = iommu;
+	pbm->portid = upa_portid;
+
+	pr_regs = of_get_property(dp, "reg", NULL);
+	err = -ENODEV;
+	if (!pr_regs) {
+		printk(KERN_ERR PFX "No reg property.\n");
+		goto out_free_iommu;
+	}
+
+	is_pbm_a = ((pr_regs[0].phys_addr & 0x6000) == 0x2000);
+
+	pbm->controller_regs = pr_regs[2].phys_addr;
+	pbm->config_space = (pr_regs[2].phys_addr + PSYCHO_CONFIGSPACE);
+
+	if (is_pbm_a) {
+		pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_A;
+		pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_A;
+		pbm->pci_csr  = pbm->controller_regs + PSYCHO_PCIA_CTRL;
+	} else {
+		pbm->pci_afsr = pbm->controller_regs + PSYCHO_PCI_AFSR_B;
+		pbm->pci_afar = pbm->controller_regs + PSYCHO_PCI_AFAR_B;
+		pbm->pci_csr  = pbm->controller_regs + PSYCHO_PCIB_CTRL;
+	}
+
+	psycho_controller_hwinit(pbm);
+	if (!pbm->sibling) {
+		err = psycho_iommu_init(pbm, 128, 0xc0000000,
+					0xffffffff, PSYCHO_CONTROL);
+		if (err)
+			goto out_free_iommu;
+
+		/* If necessary, hook us up for starfire IRQ translations. */
+		if (this_is_starfire)
+			starfire_hookup(pbm->portid);
+	}
+
+	psycho_pbm_init(pbm, op, is_pbm_a);
+
+	pbm->next = pci_pbm_root;
+	pci_pbm_root = pbm;
+
+	if (pbm->sibling)
+		pbm->sibling->sibling = pbm;
+
+	dev_set_drvdata(&op->dev, pbm);
+
+	return 0;
+
+out_free_iommu:
+	if (!pbm->sibling)
+		kfree(pbm->iommu);
+
+out_free_controller:
+	kfree(pbm);
+
+out_err:
+	return err;
+}
+
+static const struct of_device_id psycho_match[] = {
+	{
+		.name = "pci",
+		.compatible = "pci108e,8000",
+	},
+	{},
+};
+
+static struct platform_driver psycho_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = psycho_match,
+	},
+	.probe		= psycho_probe,
+};
+
+static int __init psycho_init(void)
+{
+	return platform_driver_register(&psycho_driver);
+}
+
+subsys_initcall(psycho_init);
diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c
new file mode 100644
index 0000000..8107286
--- /dev/null
+++ b/arch/sparc/kernel/pci_sabre.c
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci_sabre.c: Sabre specific PCI controller support.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998, 1999 Eddie C. Dost   (ecd@skynet.be)
+ * Copyright (C) 1999 Jakub Jelinek   (jakub@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/apb.h>
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "psycho_common.h"
+
+#define DRIVER_NAME	"sabre"
+#define PFX		DRIVER_NAME ": "
+
+/* SABRE PCI controller register offsets and definitions. */
+#define SABRE_UE_AFSR		0x0030UL
+#define  SABRE_UEAFSR_PDRD	 0x4000000000000000UL	/* Primary PCI DMA Read */
+#define  SABRE_UEAFSR_PDWR	 0x2000000000000000UL	/* Primary PCI DMA Write */
+#define  SABRE_UEAFSR_SDRD	 0x0800000000000000UL	/* Secondary PCI DMA Read */
+#define  SABRE_UEAFSR_SDWR	 0x0400000000000000UL	/* Secondary PCI DMA Write */
+#define  SABRE_UEAFSR_SDTE	 0x0200000000000000UL	/* Secondary DMA Translation Error */
+#define  SABRE_UEAFSR_PDTE	 0x0100000000000000UL	/* Primary DMA Translation Error */
+#define  SABRE_UEAFSR_BMSK	 0x0000ffff00000000UL	/* Bytemask */
+#define  SABRE_UEAFSR_OFF	 0x00000000e0000000UL	/* Offset (AFAR bits [5:3] */
+#define  SABRE_UEAFSR_BLK	 0x0000000000800000UL	/* Was block operation */
+#define SABRE_UECE_AFAR		0x0038UL
+#define SABRE_CE_AFSR		0x0040UL
+#define  SABRE_CEAFSR_PDRD	 0x4000000000000000UL	/* Primary PCI DMA Read */
+#define  SABRE_CEAFSR_PDWR	 0x2000000000000000UL	/* Primary PCI DMA Write */
+#define  SABRE_CEAFSR_SDRD	 0x0800000000000000UL	/* Secondary PCI DMA Read */
+#define  SABRE_CEAFSR_SDWR	 0x0400000000000000UL	/* Secondary PCI DMA Write */
+#define  SABRE_CEAFSR_ESYND	 0x00ff000000000000UL	/* ECC Syndrome */
+#define  SABRE_CEAFSR_BMSK	 0x0000ffff00000000UL	/* Bytemask */
+#define  SABRE_CEAFSR_OFF	 0x00000000e0000000UL	/* Offset */
+#define  SABRE_CEAFSR_BLK	 0x0000000000800000UL	/* Was block operation */
+#define SABRE_UECE_AFAR_ALIAS	0x0048UL	/* Aliases to 0x0038 */
+#define SABRE_IOMMU_CONTROL	0x0200UL
+#define  SABRE_IOMMUCTRL_ERRSTS	 0x0000000006000000UL	/* Error status bits */
+#define  SABRE_IOMMUCTRL_ERR	 0x0000000001000000UL	/* Error present in IOTLB */
+#define  SABRE_IOMMUCTRL_LCKEN	 0x0000000000800000UL	/* IOTLB lock enable */
+#define  SABRE_IOMMUCTRL_LCKPTR	 0x0000000000780000UL	/* IOTLB lock pointer */
+#define  SABRE_IOMMUCTRL_TSBSZ	 0x0000000000070000UL	/* TSB Size */
+#define  SABRE_IOMMU_TSBSZ_1K   0x0000000000000000
+#define  SABRE_IOMMU_TSBSZ_2K   0x0000000000010000
+#define  SABRE_IOMMU_TSBSZ_4K   0x0000000000020000
+#define  SABRE_IOMMU_TSBSZ_8K   0x0000000000030000
+#define  SABRE_IOMMU_TSBSZ_16K  0x0000000000040000
+#define  SABRE_IOMMU_TSBSZ_32K  0x0000000000050000
+#define  SABRE_IOMMU_TSBSZ_64K  0x0000000000060000
+#define  SABRE_IOMMU_TSBSZ_128K 0x0000000000070000
+#define  SABRE_IOMMUCTRL_TBWSZ	 0x0000000000000004UL	/* TSB assumed page size */
+#define  SABRE_IOMMUCTRL_DENAB	 0x0000000000000002UL	/* Diagnostic Mode Enable */
+#define  SABRE_IOMMUCTRL_ENAB	 0x0000000000000001UL	/* IOMMU Enable */
+#define SABRE_IOMMU_TSBBASE	0x0208UL
+#define SABRE_IOMMU_FLUSH	0x0210UL
+#define SABRE_IMAP_A_SLOT0	0x0c00UL
+#define SABRE_IMAP_B_SLOT0	0x0c20UL
+#define SABRE_IMAP_SCSI		0x1000UL
+#define SABRE_IMAP_ETH		0x1008UL
+#define SABRE_IMAP_BPP		0x1010UL
+#define SABRE_IMAP_AU_REC	0x1018UL
+#define SABRE_IMAP_AU_PLAY	0x1020UL
+#define SABRE_IMAP_PFAIL	0x1028UL
+#define SABRE_IMAP_KMS		0x1030UL
+#define SABRE_IMAP_FLPY		0x1038UL
+#define SABRE_IMAP_SHW		0x1040UL
+#define SABRE_IMAP_KBD		0x1048UL
+#define SABRE_IMAP_MS		0x1050UL
+#define SABRE_IMAP_SER		0x1058UL
+#define SABRE_IMAP_UE		0x1070UL
+#define SABRE_IMAP_CE		0x1078UL
+#define SABRE_IMAP_PCIERR	0x1080UL
+#define SABRE_IMAP_GFX		0x1098UL
+#define SABRE_IMAP_EUPA		0x10a0UL
+#define SABRE_ICLR_A_SLOT0	0x1400UL
+#define SABRE_ICLR_B_SLOT0	0x1480UL
+#define SABRE_ICLR_SCSI		0x1800UL
+#define SABRE_ICLR_ETH		0x1808UL
+#define SABRE_ICLR_BPP		0x1810UL
+#define SABRE_ICLR_AU_REC	0x1818UL
+#define SABRE_ICLR_AU_PLAY	0x1820UL
+#define SABRE_ICLR_PFAIL	0x1828UL
+#define SABRE_ICLR_KMS		0x1830UL
+#define SABRE_ICLR_FLPY		0x1838UL
+#define SABRE_ICLR_SHW		0x1840UL
+#define SABRE_ICLR_KBD		0x1848UL
+#define SABRE_ICLR_MS		0x1850UL
+#define SABRE_ICLR_SER		0x1858UL
+#define SABRE_ICLR_UE		0x1870UL
+#define SABRE_ICLR_CE		0x1878UL
+#define SABRE_ICLR_PCIERR	0x1880UL
+#define SABRE_WRSYNC		0x1c20UL
+#define SABRE_PCICTRL		0x2000UL
+#define  SABRE_PCICTRL_MRLEN	 0x0000001000000000UL	/* Use MemoryReadLine for block loads/stores */
+#define  SABRE_PCICTRL_SERR	 0x0000000400000000UL	/* Set when SERR asserted on PCI bus */
+#define  SABRE_PCICTRL_ARBPARK	 0x0000000000200000UL	/* Bus Parking 0=Ultra-IIi 1=prev-bus-owner */
+#define  SABRE_PCICTRL_CPUPRIO	 0x0000000000100000UL	/* Ultra-IIi granted every other bus cycle */
+#define  SABRE_PCICTRL_ARBPRIO	 0x00000000000f0000UL	/* Slot which is granted every other bus cycle */
+#define  SABRE_PCICTRL_ERREN	 0x0000000000000100UL	/* PCI Error Interrupt Enable */
+#define  SABRE_PCICTRL_RTRYWE	 0x0000000000000080UL	/* DMA Flow Control 0=wait-if-possible 1=retry */
+#define  SABRE_PCICTRL_AEN	 0x000000000000000fUL	/* Slot PCI arbitration enables */
+#define SABRE_PIOAFSR		0x2010UL
+#define  SABRE_PIOAFSR_PMA	 0x8000000000000000UL	/* Primary Master Abort */
+#define  SABRE_PIOAFSR_PTA	 0x4000000000000000UL	/* Primary Target Abort */
+#define  SABRE_PIOAFSR_PRTRY	 0x2000000000000000UL	/* Primary Excessive Retries */
+#define  SABRE_PIOAFSR_PPERR	 0x1000000000000000UL	/* Primary Parity Error */
+#define  SABRE_PIOAFSR_SMA	 0x0800000000000000UL	/* Secondary Master Abort */
+#define  SABRE_PIOAFSR_STA	 0x0400000000000000UL	/* Secondary Target Abort */
+#define  SABRE_PIOAFSR_SRTRY	 0x0200000000000000UL	/* Secondary Excessive Retries */
+#define  SABRE_PIOAFSR_SPERR	 0x0100000000000000UL	/* Secondary Parity Error */
+#define  SABRE_PIOAFSR_BMSK	 0x0000ffff00000000UL	/* Byte Mask */
+#define  SABRE_PIOAFSR_BLK	 0x0000000080000000UL	/* Was Block Operation */
+#define SABRE_PIOAFAR		0x2018UL
+#define SABRE_PCIDIAG		0x2020UL
+#define  SABRE_PCIDIAG_DRTRY	 0x0000000000000040UL	/* Disable PIO Retry Limit */
+#define  SABRE_PCIDIAG_IPAPAR	 0x0000000000000008UL	/* Invert PIO Address Parity */
+#define  SABRE_PCIDIAG_IPDPAR	 0x0000000000000004UL	/* Invert PIO Data Parity */
+#define  SABRE_PCIDIAG_IDDPAR	 0x0000000000000002UL	/* Invert DMA Data Parity */
+#define  SABRE_PCIDIAG_ELPBK	 0x0000000000000001UL	/* Loopback Enable - not supported */
+#define SABRE_PCITASR		0x2028UL
+#define  SABRE_PCITASR_EF	 0x0000000000000080UL	/* Respond to 0xe0000000-0xffffffff */
+#define  SABRE_PCITASR_CD	 0x0000000000000040UL	/* Respond to 0xc0000000-0xdfffffff */
+#define  SABRE_PCITASR_AB	 0x0000000000000020UL	/* Respond to 0xa0000000-0xbfffffff */
+#define  SABRE_PCITASR_89	 0x0000000000000010UL	/* Respond to 0x80000000-0x9fffffff */
+#define  SABRE_PCITASR_67	 0x0000000000000008UL	/* Respond to 0x60000000-0x7fffffff */
+#define  SABRE_PCITASR_45	 0x0000000000000004UL	/* Respond to 0x40000000-0x5fffffff */
+#define  SABRE_PCITASR_23	 0x0000000000000002UL	/* Respond to 0x20000000-0x3fffffff */
+#define  SABRE_PCITASR_01	 0x0000000000000001UL	/* Respond to 0x00000000-0x1fffffff */
+#define SABRE_PIOBUF_DIAG	0x5000UL
+#define SABRE_DMABUF_DIAGLO	0x5100UL
+#define SABRE_DMABUF_DIAGHI	0x51c0UL
+#define SABRE_IMAP_GFX_ALIAS	0x6000UL	/* Aliases to 0x1098 */
+#define SABRE_IMAP_EUPA_ALIAS	0x8000UL	/* Aliases to 0x10a0 */
+#define SABRE_IOMMU_VADIAG	0xa400UL
+#define SABRE_IOMMU_TCDIAG	0xa408UL
+#define SABRE_IOMMU_TAG		0xa580UL
+#define  SABRE_IOMMUTAG_ERRSTS	 0x0000000001800000UL	/* Error status bits */
+#define  SABRE_IOMMUTAG_ERR	 0x0000000000400000UL	/* Error present */
+#define  SABRE_IOMMUTAG_WRITE	 0x0000000000200000UL	/* Page is writable */
+#define  SABRE_IOMMUTAG_STREAM	 0x0000000000100000UL	/* Streamable bit - unused */
+#define  SABRE_IOMMUTAG_SIZE	 0x0000000000080000UL	/* 0=8k 1=16k */
+#define  SABRE_IOMMUTAG_VPN	 0x000000000007ffffUL	/* Virtual Page Number [31:13] */
+#define SABRE_IOMMU_DATA	0xa600UL
+#define SABRE_IOMMUDATA_VALID	 0x0000000040000000UL	/* Valid */
+#define SABRE_IOMMUDATA_USED	 0x0000000020000000UL	/* Used (for LRU algorithm) */
+#define SABRE_IOMMUDATA_CACHE	 0x0000000010000000UL	/* Cacheable */
+#define SABRE_IOMMUDATA_PPN	 0x00000000001fffffUL	/* Physical Page Number [33:13] */
+#define SABRE_PCI_IRQSTATE	0xa800UL
+#define SABRE_OBIO_IRQSTATE	0xa808UL
+#define SABRE_FFBCFG		0xf000UL
+#define  SABRE_FFBCFG_SPRQS	 0x000000000f000000	/* Slave P_RQST queue size */
+#define  SABRE_FFBCFG_ONEREAD	 0x0000000000004000	/* Slave supports one outstanding read */
+#define SABRE_MCCTRL0		0xf010UL
+#define  SABRE_MCCTRL0_RENAB	 0x0000000080000000	/* Refresh Enable */
+#define  SABRE_MCCTRL0_EENAB	 0x0000000010000000	/* Enable all ECC functions */
+#define  SABRE_MCCTRL0_11BIT	 0x0000000000001000	/* Enable 11-bit column addressing */
+#define  SABRE_MCCTRL0_DPP	 0x0000000000000f00	/* DIMM Pair Present Bits */
+#define  SABRE_MCCTRL0_RINTVL	 0x00000000000000ff	/* Refresh Interval */
+#define SABRE_MCCTRL1		0xf018UL
+#define  SABRE_MCCTRL1_AMDC	 0x0000000038000000	/* Advance Memdata Clock */
+#define  SABRE_MCCTRL1_ARDC	 0x0000000007000000	/* Advance DRAM Read Data Clock */
+#define  SABRE_MCCTRL1_CSR	 0x0000000000e00000	/* CAS to RAS delay for CBR refresh */
+#define  SABRE_MCCTRL1_CASRW	 0x00000000001c0000	/* CAS length for read/write */
+#define  SABRE_MCCTRL1_RCD	 0x0000000000038000	/* RAS to CAS delay */
+#define  SABRE_MCCTRL1_CP	 0x0000000000007000	/* CAS Precharge */
+#define  SABRE_MCCTRL1_RP	 0x0000000000000e00	/* RAS Precharge */
+#define  SABRE_MCCTRL1_RAS	 0x00000000000001c0	/* Length of RAS for refresh */
+#define  SABRE_MCCTRL1_CASRW2	 0x0000000000000038	/* Must be same as CASRW */
+#define  SABRE_MCCTRL1_RSC	 0x0000000000000007	/* RAS after CAS hold time */
+#define SABRE_RESETCTRL		0xf020UL
+
+#define SABRE_CONFIGSPACE	0x001000000UL
+#define SABRE_IOSPACE		0x002000000UL
+#define SABRE_IOSPACE_SIZE	0x000ffffffUL
+#define SABRE_MEMSPACE		0x100000000UL
+#define SABRE_MEMSPACE_SIZE	0x07fffffffUL
+
+static int hummingbird_p;
+static struct pci_bus *sabre_root_bus;
+
+static irqreturn_t sabre_ue_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	unsigned long afsr_reg = pbm->controller_regs + SABRE_UE_AFSR;
+	unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+	unsigned long afsr, afar, error_bits;
+	int reported;
+
+	/* Latch uncorrectable error status. */
+	afar = upa_readq(afar_reg);
+	afsr = upa_readq(afsr_reg);
+
+	/* Clear the primary/secondary error status bits. */
+	error_bits = afsr &
+		(SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+		 SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+		 SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE);
+	if (!error_bits)
+		return IRQ_NONE;
+	upa_writeq(error_bits, afsr_reg);
+
+	/* Log the error. */
+	printk("%s: Uncorrectable Error, primary error type[%s%s]\n",
+	       pbm->name,
+	       ((error_bits & SABRE_UEAFSR_PDRD) ?
+		"DMA Read" :
+		((error_bits & SABRE_UEAFSR_PDWR) ?
+		 "DMA Write" : "???")),
+	       ((error_bits & SABRE_UEAFSR_PDTE) ?
+		":Translation Error" : ""));
+	printk("%s: bytemask[%04lx] dword_offset[%lx] was_block(%d)\n",
+	       pbm->name,
+	       (afsr & SABRE_UEAFSR_BMSK) >> 32UL,
+	       (afsr & SABRE_UEAFSR_OFF) >> 29UL,
+	       ((afsr & SABRE_UEAFSR_BLK) ? 1 : 0));
+	printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+	printk("%s: UE Secondary errors [", pbm->name);
+	reported = 0;
+	if (afsr & SABRE_UEAFSR_SDRD) {
+		reported++;
+		printk("(DMA Read)");
+	}
+	if (afsr & SABRE_UEAFSR_SDWR) {
+		reported++;
+		printk("(DMA Write)");
+	}
+	if (afsr & SABRE_UEAFSR_SDTE) {
+		reported++;
+		printk("(Translation Error)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	/* Interrogate IOMMU for error status. */
+	psycho_check_iommu_error(pbm, afsr, afar, UE_ERR);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t sabre_ce_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	unsigned long afsr_reg = pbm->controller_regs + SABRE_CE_AFSR;
+	unsigned long afar_reg = pbm->controller_regs + SABRE_UECE_AFAR;
+	unsigned long afsr, afar, error_bits;
+	int reported;
+
+	/* Latch error status. */
+	afar = upa_readq(afar_reg);
+	afsr = upa_readq(afsr_reg);
+
+	/* Clear primary/secondary error status bits. */
+	error_bits = afsr &
+		(SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+		 SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR);
+	if (!error_bits)
+		return IRQ_NONE;
+	upa_writeq(error_bits, afsr_reg);
+
+	/* Log the error. */
+	printk("%s: Correctable Error, primary error type[%s]\n",
+	       pbm->name,
+	       ((error_bits & SABRE_CEAFSR_PDRD) ?
+		"DMA Read" :
+		((error_bits & SABRE_CEAFSR_PDWR) ?
+		 "DMA Write" : "???")));
+
+	/* XXX Use syndrome and afar to print out module string just like
+	 * XXX UDB CE trap handler does... -DaveM
+	 */
+	printk("%s: syndrome[%02lx] bytemask[%04lx] dword_offset[%lx] "
+	       "was_block(%d)\n",
+	       pbm->name,
+	       (afsr & SABRE_CEAFSR_ESYND) >> 48UL,
+	       (afsr & SABRE_CEAFSR_BMSK) >> 32UL,
+	       (afsr & SABRE_CEAFSR_OFF) >> 29UL,
+	       ((afsr & SABRE_CEAFSR_BLK) ? 1 : 0));
+	printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+	printk("%s: CE Secondary errors [", pbm->name);
+	reported = 0;
+	if (afsr & SABRE_CEAFSR_SDRD) {
+		reported++;
+		printk("(DMA Read)");
+	}
+	if (afsr & SABRE_CEAFSR_SDWR) {
+		reported++;
+		printk("(DMA Write)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	return IRQ_HANDLED;
+}
+
+static void sabre_register_error_handlers(struct pci_pbm_info *pbm)
+{
+	struct device_node *dp = pbm->op->dev.of_node;
+	struct platform_device *op;
+	unsigned long base = pbm->controller_regs;
+	u64 tmp;
+	int err;
+
+	if (pbm->chip_type == PBM_CHIP_TYPE_SABRE)
+		dp = dp->parent;
+
+	op = of_find_device_by_node(dp);
+	if (!op)
+		return;
+
+	/* Sabre/Hummingbird IRQ property layout is:
+	 * 0: PCI ERR
+	 * 1: UE ERR
+	 * 2: CE ERR
+	 * 3: POWER FAIL
+	 */
+	if (op->archdata.num_irqs < 4)
+		return;
+
+	/* We clear the error bits in the appropriate AFSR before
+	 * registering the handler so that we don't get spurious
+	 * interrupts.
+	 */
+	upa_writeq((SABRE_UEAFSR_PDRD | SABRE_UEAFSR_PDWR |
+		    SABRE_UEAFSR_SDRD | SABRE_UEAFSR_SDWR |
+		    SABRE_UEAFSR_SDTE | SABRE_UEAFSR_PDTE),
+		   base + SABRE_UE_AFSR);
+
+	err = request_irq(op->archdata.irqs[1], sabre_ue_intr, 0, "SABRE_UE", pbm);
+	if (err)
+		printk(KERN_WARNING "%s: Couldn't register UE, err=%d.\n",
+		       pbm->name, err);
+
+	upa_writeq((SABRE_CEAFSR_PDRD | SABRE_CEAFSR_PDWR |
+		    SABRE_CEAFSR_SDRD | SABRE_CEAFSR_SDWR),
+		   base + SABRE_CE_AFSR);
+
+
+	err = request_irq(op->archdata.irqs[2], sabre_ce_intr, 0, "SABRE_CE", pbm);
+	if (err)
+		printk(KERN_WARNING "%s: Couldn't register CE, err=%d.\n",
+		       pbm->name, err);
+	err = request_irq(op->archdata.irqs[0], psycho_pcierr_intr, 0,
+			  "SABRE_PCIERR", pbm);
+	if (err)
+		printk(KERN_WARNING "%s: Couldn't register PCIERR, err=%d.\n",
+		       pbm->name, err);
+
+	tmp = upa_readq(base + SABRE_PCICTRL);
+	tmp |= SABRE_PCICTRL_ERREN;
+	upa_writeq(tmp, base + SABRE_PCICTRL);
+}
+
+static void apb_init(struct pci_bus *sabre_bus)
+{
+	struct pci_dev *pdev;
+
+	list_for_each_entry(pdev, &sabre_bus->devices, bus_list) {
+		if (pdev->vendor == PCI_VENDOR_ID_SUN &&
+		    pdev->device == PCI_DEVICE_ID_SUN_SIMBA) {
+			u16 word16;
+
+			pci_read_config_word(pdev, PCI_COMMAND, &word16);
+			word16 |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY |
+				PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY |
+				PCI_COMMAND_IO;
+			pci_write_config_word(pdev, PCI_COMMAND, word16);
+
+			/* Status register bits are "write 1 to clear". */
+			pci_write_config_word(pdev, PCI_STATUS, 0xffff);
+			pci_write_config_word(pdev, PCI_SEC_STATUS, 0xffff);
+
+			/* Use a primary/seconday latency timer value
+			 * of 64.
+			 */
+			pci_write_config_byte(pdev, PCI_LATENCY_TIMER, 64);
+			pci_write_config_byte(pdev, PCI_SEC_LATENCY_TIMER, 64);
+
+			/* Enable reporting/forwarding of master aborts,
+			 * parity, and SERR.
+			 */
+			pci_write_config_byte(pdev, PCI_BRIDGE_CONTROL,
+					      (PCI_BRIDGE_CTL_PARITY |
+					       PCI_BRIDGE_CTL_SERR |
+					       PCI_BRIDGE_CTL_MASTER_ABORT));
+		}
+	}
+}
+
+static void sabre_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
+{
+	static int once;
+
+	/* The APB bridge speaks to the Sabre host PCI bridge
+	 * at 66Mhz, but the front side of APB runs at 33Mhz
+	 * for both segments.
+	 *
+	 * Hummingbird systems do not use APB, so they run
+	 * at 66MHZ.
+	 */
+	if (hummingbird_p)
+		pbm->is_66mhz_capable = 1;
+	else
+		pbm->is_66mhz_capable = 0;
+
+	/* This driver has not been verified to handle
+	 * multiple SABREs yet, so trap this.
+	 *
+	 * Also note that the SABRE host bridge is hardwired
+	 * to live at bus 0.
+	 */
+	if (once != 0) {
+		printk(KERN_ERR PFX "Multiple controllers unsupported.\n");
+		return;
+	}
+	once++;
+
+	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+	if (!pbm->pci_bus)
+		return;
+
+	sabre_root_bus = pbm->pci_bus;
+
+	apb_init(pbm->pci_bus);
+
+	sabre_register_error_handlers(pbm);
+}
+
+static void sabre_pbm_init(struct pci_pbm_info *pbm,
+			   struct platform_device *op)
+{
+	psycho_pbm_init_common(pbm, op, "SABRE", PBM_CHIP_TYPE_SABRE);
+	pbm->pci_afsr = pbm->controller_regs + SABRE_PIOAFSR;
+	pbm->pci_afar = pbm->controller_regs + SABRE_PIOAFAR;
+	pbm->pci_csr = pbm->controller_regs + SABRE_PCICTRL;
+	sabre_scan_bus(pbm, &op->dev);
+}
+
+static const struct of_device_id sabre_match[];
+static int sabre_probe(struct platform_device *op)
+{
+	const struct of_device_id *match;
+	const struct linux_prom64_registers *pr_regs;
+	struct device_node *dp = op->dev.of_node;
+	struct pci_pbm_info *pbm;
+	u32 upa_portid, dma_mask;
+	struct iommu *iommu;
+	int tsbsize, err;
+	const u32 *vdma;
+	u64 clear_irq;
+
+	match = of_match_device(sabre_match, &op->dev);
+	hummingbird_p = match && (match->data != NULL);
+	if (!hummingbird_p) {
+		struct device_node *cpu_dp;
+
+		/* Of course, Sun has to encode things a thousand
+		 * different ways, inconsistently.
+		 */
+		for_each_node_by_type(cpu_dp, "cpu") {
+			if (!strcmp(cpu_dp->name, "SUNW,UltraSPARC-IIe"))
+				hummingbird_p = 1;
+		}
+	}
+
+	err = -ENOMEM;
+	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+	if (!pbm) {
+		printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+		goto out_err;
+	}
+
+	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+	if (!iommu) {
+		printk(KERN_ERR PFX "Cannot allocate PBM iommu.\n");
+		goto out_free_controller;
+	}
+
+	pbm->iommu = iommu;
+
+	upa_portid = of_getintprop_default(dp, "upa-portid", 0xff);
+
+	pbm->portid = upa_portid;
+
+	/*
+	 * Map in SABRE register set and report the presence of this SABRE.
+	 */
+	
+	pr_regs = of_get_property(dp, "reg", NULL);
+	err = -ENODEV;
+	if (!pr_regs) {
+		printk(KERN_ERR PFX "No reg property\n");
+		goto out_free_iommu;
+	}
+
+	/*
+	 * First REG in property is base of entire SABRE register space.
+	 */
+	pbm->controller_regs = pr_regs[0].phys_addr;
+
+	/* Clear interrupts */
+
+	/* PCI first */
+	for (clear_irq = SABRE_ICLR_A_SLOT0; clear_irq < SABRE_ICLR_B_SLOT0 + 0x80; clear_irq += 8)
+		upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+
+	/* Then OBIO */
+	for (clear_irq = SABRE_ICLR_SCSI; clear_irq < SABRE_ICLR_SCSI + 0x80; clear_irq += 8)
+		upa_writeq(0x0UL, pbm->controller_regs + clear_irq);
+
+	/* Error interrupts are enabled later after the bus scan. */
+	upa_writeq((SABRE_PCICTRL_MRLEN   | SABRE_PCICTRL_SERR |
+		    SABRE_PCICTRL_ARBPARK | SABRE_PCICTRL_AEN),
+		   pbm->controller_regs + SABRE_PCICTRL);
+
+	/* Now map in PCI config space for entire SABRE. */
+	pbm->config_space = pbm->controller_regs + SABRE_CONFIGSPACE;
+
+	vdma = of_get_property(dp, "virtual-dma", NULL);
+	if (!vdma) {
+		printk(KERN_ERR PFX "No virtual-dma property\n");
+		goto out_free_iommu;
+	}
+
+	dma_mask = vdma[0];
+	switch(vdma[1]) {
+		case 0x20000000:
+			dma_mask |= 0x1fffffff;
+			tsbsize = 64;
+			break;
+		case 0x40000000:
+			dma_mask |= 0x3fffffff;
+			tsbsize = 128;
+			break;
+
+		case 0x80000000:
+			dma_mask |= 0x7fffffff;
+			tsbsize = 128;
+			break;
+		default:
+			printk(KERN_ERR PFX "Strange virtual-dma size.\n");
+			goto out_free_iommu;
+	}
+
+	err = psycho_iommu_init(pbm, tsbsize, vdma[0], dma_mask, SABRE_WRSYNC);
+	if (err)
+		goto out_free_iommu;
+
+	/*
+	 * Look for APB underneath.
+	 */
+	sabre_pbm_init(pbm, op);
+
+	pbm->next = pci_pbm_root;
+	pci_pbm_root = pbm;
+
+	dev_set_drvdata(&op->dev, pbm);
+
+	return 0;
+
+out_free_iommu:
+	kfree(pbm->iommu);
+
+out_free_controller:
+	kfree(pbm);
+
+out_err:
+	return err;
+}
+
+static const struct of_device_id sabre_match[] = {
+	{
+		.name = "pci",
+		.compatible = "pci108e,a001",
+		.data = (void *) 1,
+	},
+	{
+		.name = "pci",
+		.compatible = "pci108e,a000",
+	},
+	{},
+};
+
+static struct platform_driver sabre_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = sabre_match,
+	},
+	.probe		= sabre_probe,
+};
+
+static int __init sabre_init(void)
+{
+	return platform_driver_register(&sabre_driver);
+}
+
+subsys_initcall(sabre_init);
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
new file mode 100644
index 0000000..934b97c
--- /dev/null
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -0,0 +1,1509 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci_schizo.c: SCHIZO/TOMATILLO specific PCI controller support.
+ *
+ * Copyright (C) 2001, 2002, 2003, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/pstate.h>
+#include <asm/prom.h>
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+
+#define DRIVER_NAME	"schizo"
+#define PFX		DRIVER_NAME ": "
+
+/* This is a convention that at least Excalibur and Merlin
+ * follow.  I suppose the SCHIZO used in Starcat and friends
+ * will do similar.
+ *
+ * The only way I could see this changing is if the newlink
+ * block requires more space in Schizo's address space than
+ * they predicted, thus requiring an address space reorg when
+ * the newer Schizo is taped out.
+ */
+
+/* Streaming buffer control register. */
+#define SCHIZO_STRBUF_CTRL_LPTR    0x00000000000000f0UL /* LRU Lock Pointer */
+#define SCHIZO_STRBUF_CTRL_LENAB   0x0000000000000008UL /* LRU Lock Enable */
+#define SCHIZO_STRBUF_CTRL_RRDIS   0x0000000000000004UL /* Rerun Disable */
+#define SCHIZO_STRBUF_CTRL_DENAB   0x0000000000000002UL /* Diagnostic Mode Enable */
+#define SCHIZO_STRBUF_CTRL_ENAB    0x0000000000000001UL /* Streaming Buffer Enable */
+
+/* IOMMU control register. */
+#define SCHIZO_IOMMU_CTRL_RESV     0xfffffffff9000000UL /* Reserved                      */
+#define SCHIZO_IOMMU_CTRL_XLTESTAT 0x0000000006000000UL /* Translation Error Status      */
+#define SCHIZO_IOMMU_CTRL_XLTEERR  0x0000000001000000UL /* Translation Error encountered */
+#define SCHIZO_IOMMU_CTRL_LCKEN    0x0000000000800000UL /* Enable translation locking    */
+#define SCHIZO_IOMMU_CTRL_LCKPTR   0x0000000000780000UL /* Translation lock pointer      */
+#define SCHIZO_IOMMU_CTRL_TSBSZ    0x0000000000070000UL /* TSB Size                      */
+#define SCHIZO_IOMMU_TSBSZ_1K      0x0000000000000000UL /* TSB Table 1024 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_2K      0x0000000000010000UL /* TSB Table 2048 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_4K      0x0000000000020000UL /* TSB Table 4096 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_8K      0x0000000000030000UL /* TSB Table 8192 8-byte entries */
+#define SCHIZO_IOMMU_TSBSZ_16K     0x0000000000040000UL /* TSB Table 16k 8-byte entries  */
+#define SCHIZO_IOMMU_TSBSZ_32K     0x0000000000050000UL /* TSB Table 32k 8-byte entries  */
+#define SCHIZO_IOMMU_TSBSZ_64K     0x0000000000060000UL /* TSB Table 64k 8-byte entries  */
+#define SCHIZO_IOMMU_TSBSZ_128K    0x0000000000070000UL /* TSB Table 128k 8-byte entries */
+#define SCHIZO_IOMMU_CTRL_RESV2    0x000000000000fff8UL /* Reserved                      */
+#define SCHIZO_IOMMU_CTRL_TBWSZ    0x0000000000000004UL /* Assumed page size, 0=8k 1=64k */
+#define SCHIZO_IOMMU_CTRL_DENAB    0x0000000000000002UL /* Diagnostic mode enable        */
+#define SCHIZO_IOMMU_CTRL_ENAB     0x0000000000000001UL /* IOMMU Enable                  */
+
+/* Schizo config space address format is nearly identical to
+ * that of PSYCHO:
+ *
+ *  32             24 23 16 15    11 10       8 7   2  1 0
+ * ---------------------------------------------------------
+ * |0 0 0 0 0 0 0 0 0| bus | device | function | reg | 0 0 |
+ * ---------------------------------------------------------
+ */
+#define SCHIZO_CONFIG_BASE(PBM)	((PBM)->config_space)
+#define SCHIZO_CONFIG_ENCODE(BUS, DEVFN, REG)	\
+	(((unsigned long)(BUS)   << 16) |	\
+	 ((unsigned long)(DEVFN) << 8)  |	\
+	 ((unsigned long)(REG)))
+
+static void *schizo_pci_config_mkaddr(struct pci_pbm_info *pbm,
+				      unsigned char bus,
+				      unsigned int devfn,
+				      int where)
+{
+	if (!pbm)
+		return NULL;
+	bus -= pbm->pci_first_busno;
+	return (void *)
+		(SCHIZO_CONFIG_BASE(pbm) |
+		 SCHIZO_CONFIG_ENCODE(bus, devfn, where));
+}
+
+/* SCHIZO error handling support. */
+enum schizo_error_type {
+	UE_ERR, CE_ERR, PCI_ERR, SAFARI_ERR
+};
+
+static DEFINE_SPINLOCK(stc_buf_lock);
+static unsigned long stc_error_buf[128];
+static unsigned long stc_tag_buf[16];
+static unsigned long stc_line_buf[16];
+
+#define SCHIZO_UE_INO		0x30 /* Uncorrectable ECC error */
+#define SCHIZO_CE_INO		0x31 /* Correctable ECC error */
+#define SCHIZO_PCIERR_A_INO	0x32 /* PBM A PCI bus error */
+#define SCHIZO_PCIERR_B_INO	0x33 /* PBM B PCI bus error */
+#define SCHIZO_SERR_INO		0x34 /* Safari interface error */
+
+#define SCHIZO_STC_ERR	0xb800UL /* --> 0xba00 */
+#define SCHIZO_STC_TAG	0xba00UL /* --> 0xba80 */
+#define SCHIZO_STC_LINE	0xbb00UL /* --> 0xbb80 */
+
+#define SCHIZO_STCERR_WRITE	0x2UL
+#define SCHIZO_STCERR_READ	0x1UL
+
+#define SCHIZO_STCTAG_PPN	0x3fffffff00000000UL
+#define SCHIZO_STCTAG_VPN	0x00000000ffffe000UL
+#define SCHIZO_STCTAG_VALID	0x8000000000000000UL
+#define SCHIZO_STCTAG_READ	0x4000000000000000UL
+
+#define SCHIZO_STCLINE_LINDX	0x0000000007800000UL
+#define SCHIZO_STCLINE_SPTR	0x000000000007e000UL
+#define SCHIZO_STCLINE_LADDR	0x0000000000001fc0UL
+#define SCHIZO_STCLINE_EPTR	0x000000000000003fUL
+#define SCHIZO_STCLINE_VALID	0x0000000000600000UL
+#define SCHIZO_STCLINE_FOFN	0x0000000000180000UL
+
+static void __schizo_check_stc_error_pbm(struct pci_pbm_info *pbm,
+					 enum schizo_error_type type)
+{
+	struct strbuf *strbuf = &pbm->stc;
+	unsigned long regbase = pbm->pbm_regs;
+	unsigned long err_base, tag_base, line_base;
+	u64 control;
+	int i;
+
+	err_base = regbase + SCHIZO_STC_ERR;
+	tag_base = regbase + SCHIZO_STC_TAG;
+	line_base = regbase + SCHIZO_STC_LINE;
+
+	spin_lock(&stc_buf_lock);
+
+	/* This is __REALLY__ dangerous.  When we put the
+	 * streaming buffer into diagnostic mode to probe
+	 * it's tags and error status, we _must_ clear all
+	 * of the line tag valid bits before re-enabling
+	 * the streaming buffer.  If any dirty data lives
+	 * in the STC when we do this, we will end up
+	 * invalidating it before it has a chance to reach
+	 * main memory.
+	 */
+	control = upa_readq(strbuf->strbuf_control);
+	upa_writeq((control | SCHIZO_STRBUF_CTRL_DENAB),
+		   strbuf->strbuf_control);
+	for (i = 0; i < 128; i++) {
+		unsigned long val;
+
+		val = upa_readq(err_base + (i * 8UL));
+		upa_writeq(0UL, err_base + (i * 8UL));
+		stc_error_buf[i] = val;
+	}
+	for (i = 0; i < 16; i++) {
+		stc_tag_buf[i] = upa_readq(tag_base + (i * 8UL));
+		stc_line_buf[i] = upa_readq(line_base + (i * 8UL));
+		upa_writeq(0UL, tag_base + (i * 8UL));
+		upa_writeq(0UL, line_base + (i * 8UL));
+	}
+
+	/* OK, state is logged, exit diagnostic mode. */
+	upa_writeq(control, strbuf->strbuf_control);
+
+	for (i = 0; i < 16; i++) {
+		int j, saw_error, first, last;
+
+		saw_error = 0;
+		first = i * 8;
+		last = first + 8;
+		for (j = first; j < last; j++) {
+			unsigned long errval = stc_error_buf[j];
+			if (errval != 0) {
+				saw_error++;
+				printk("%s: STC_ERR(%d)[wr(%d)rd(%d)]\n",
+				       pbm->name,
+				       j,
+				       (errval & SCHIZO_STCERR_WRITE) ? 1 : 0,
+				       (errval & SCHIZO_STCERR_READ) ? 1 : 0);
+			}
+		}
+		if (saw_error != 0) {
+			unsigned long tagval = stc_tag_buf[i];
+			unsigned long lineval = stc_line_buf[i];
+			printk("%s: STC_TAG(%d)[PA(%016lx)VA(%08lx)V(%d)R(%d)]\n",
+			       pbm->name,
+			       i,
+			       ((tagval & SCHIZO_STCTAG_PPN) >> 19UL),
+			       (tagval & SCHIZO_STCTAG_VPN),
+			       ((tagval & SCHIZO_STCTAG_VALID) ? 1 : 0),
+			       ((tagval & SCHIZO_STCTAG_READ) ? 1 : 0));
+
+			/* XXX Should spit out per-bank error information... -DaveM */
+			printk("%s: STC_LINE(%d)[LIDX(%lx)SP(%lx)LADDR(%lx)EP(%lx)"
+			       "V(%d)FOFN(%d)]\n",
+			       pbm->name,
+			       i,
+			       ((lineval & SCHIZO_STCLINE_LINDX) >> 23UL),
+			       ((lineval & SCHIZO_STCLINE_SPTR) >> 13UL),
+			       ((lineval & SCHIZO_STCLINE_LADDR) >> 6UL),
+			       ((lineval & SCHIZO_STCLINE_EPTR) >> 0UL),
+			       ((lineval & SCHIZO_STCLINE_VALID) ? 1 : 0),
+			       ((lineval & SCHIZO_STCLINE_FOFN) ? 1 : 0));
+		}
+	}
+
+	spin_unlock(&stc_buf_lock);
+}
+
+/* IOMMU is per-PBM in Schizo, so interrogate both for anonymous
+ * controller level errors.
+ */
+
+#define SCHIZO_IOMMU_TAG	0xa580UL
+#define SCHIZO_IOMMU_DATA	0xa600UL
+
+#define SCHIZO_IOMMU_TAG_CTXT	0x0000001ffe000000UL
+#define SCHIZO_IOMMU_TAG_ERRSTS	0x0000000001800000UL
+#define SCHIZO_IOMMU_TAG_ERR	0x0000000000400000UL
+#define SCHIZO_IOMMU_TAG_WRITE	0x0000000000200000UL
+#define SCHIZO_IOMMU_TAG_STREAM	0x0000000000100000UL
+#define SCHIZO_IOMMU_TAG_SIZE	0x0000000000080000UL
+#define SCHIZO_IOMMU_TAG_VPAGE	0x000000000007ffffUL
+
+#define SCHIZO_IOMMU_DATA_VALID	0x0000000100000000UL
+#define SCHIZO_IOMMU_DATA_CACHE	0x0000000040000000UL
+#define SCHIZO_IOMMU_DATA_PPAGE	0x000000003fffffffUL
+
+static void schizo_check_iommu_error_pbm(struct pci_pbm_info *pbm,
+					 enum schizo_error_type type)
+{
+	struct iommu *iommu = pbm->iommu;
+	unsigned long iommu_tag[16];
+	unsigned long iommu_data[16];
+	unsigned long flags;
+	u64 control;
+	int i;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+	control = upa_readq(iommu->iommu_control);
+	if (control & SCHIZO_IOMMU_CTRL_XLTEERR) {
+		unsigned long base;
+		char *type_string;
+
+		/* Clear the error encountered bit. */
+		control &= ~SCHIZO_IOMMU_CTRL_XLTEERR;
+		upa_writeq(control, iommu->iommu_control);
+
+		switch((control & SCHIZO_IOMMU_CTRL_XLTESTAT) >> 25UL) {
+		case 0:
+			type_string = "Protection Error";
+			break;
+		case 1:
+			type_string = "Invalid Error";
+			break;
+		case 2:
+			type_string = "TimeOut Error";
+			break;
+		case 3:
+		default:
+			type_string = "ECC Error";
+			break;
+		}
+		printk("%s: IOMMU Error, type[%s]\n",
+		       pbm->name, type_string);
+
+		/* Put the IOMMU into diagnostic mode and probe
+		 * it's TLB for entries with error status.
+		 *
+		 * It is very possible for another DVMA to occur
+		 * while we do this probe, and corrupt the system
+		 * further.  But we are so screwed at this point
+		 * that we are likely to crash hard anyways, so
+		 * get as much diagnostic information to the
+		 * console as we can.
+		 */
+		upa_writeq(control | SCHIZO_IOMMU_CTRL_DENAB,
+			   iommu->iommu_control);
+
+		base = pbm->pbm_regs;
+
+		for (i = 0; i < 16; i++) {
+			iommu_tag[i] =
+				upa_readq(base + SCHIZO_IOMMU_TAG + (i * 8UL));
+			iommu_data[i] =
+				upa_readq(base + SCHIZO_IOMMU_DATA + (i * 8UL));
+
+			/* Now clear out the entry. */
+			upa_writeq(0, base + SCHIZO_IOMMU_TAG + (i * 8UL));
+			upa_writeq(0, base + SCHIZO_IOMMU_DATA + (i * 8UL));
+		}
+
+		/* Leave diagnostic mode. */
+		upa_writeq(control, iommu->iommu_control);
+
+		for (i = 0; i < 16; i++) {
+			unsigned long tag, data;
+
+			tag = iommu_tag[i];
+			if (!(tag & SCHIZO_IOMMU_TAG_ERR))
+				continue;
+
+			data = iommu_data[i];
+			switch((tag & SCHIZO_IOMMU_TAG_ERRSTS) >> 23UL) {
+			case 0:
+				type_string = "Protection Error";
+				break;
+			case 1:
+				type_string = "Invalid Error";
+				break;
+			case 2:
+				type_string = "TimeOut Error";
+				break;
+			case 3:
+			default:
+				type_string = "ECC Error";
+				break;
+			}
+			printk("%s: IOMMU TAG(%d)[error(%s) ctx(%x) wr(%d) str(%d) "
+			       "sz(%dK) vpg(%08lx)]\n",
+			       pbm->name, i, type_string,
+			       (int)((tag & SCHIZO_IOMMU_TAG_CTXT) >> 25UL),
+			       ((tag & SCHIZO_IOMMU_TAG_WRITE) ? 1 : 0),
+			       ((tag & SCHIZO_IOMMU_TAG_STREAM) ? 1 : 0),
+			       ((tag & SCHIZO_IOMMU_TAG_SIZE) ? 64 : 8),
+			       (tag & SCHIZO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT);
+			printk("%s: IOMMU DATA(%d)[valid(%d) cache(%d) ppg(%016lx)]\n",
+			       pbm->name, i,
+			       ((data & SCHIZO_IOMMU_DATA_VALID) ? 1 : 0),
+			       ((data & SCHIZO_IOMMU_DATA_CACHE) ? 1 : 0),
+			       (data & SCHIZO_IOMMU_DATA_PPAGE) << IOMMU_PAGE_SHIFT);
+		}
+	}
+	if (pbm->stc.strbuf_enabled)
+		__schizo_check_stc_error_pbm(pbm, type);
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static void schizo_check_iommu_error(struct pci_pbm_info *pbm,
+				     enum schizo_error_type type)
+{
+	schizo_check_iommu_error_pbm(pbm, type);
+	if (pbm->sibling)
+		schizo_check_iommu_error_pbm(pbm->sibling, type);
+}
+
+/* Uncorrectable ECC error status gathering. */
+#define SCHIZO_UE_AFSR	0x10030UL
+#define SCHIZO_UE_AFAR	0x10038UL
+
+#define SCHIZO_UEAFSR_PPIO	0x8000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_PDRD	0x4000000000000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_PDWR	0x2000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_SPIO	0x1000000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_SDMA	0x0800000000000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_ERRPNDG	0x0300000000000000UL /* Safari */
+#define SCHIZO_UEAFSR_BMSK	0x000003ff00000000UL /* Safari */
+#define SCHIZO_UEAFSR_QOFF	0x00000000c0000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_AID	0x000000001f000000UL /* Safari/Tomatillo */
+#define SCHIZO_UEAFSR_PARTIAL	0x0000000000800000UL /* Safari */
+#define SCHIZO_UEAFSR_OWNEDIN	0x0000000000400000UL /* Safari */
+#define SCHIZO_UEAFSR_MTAGSYND	0x00000000000f0000UL /* Safari */
+#define SCHIZO_UEAFSR_MTAG	0x000000000000e000UL /* Safari */
+#define SCHIZO_UEAFSR_ECCSYND	0x00000000000001ffUL /* Safari */
+
+static irqreturn_t schizo_ue_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	unsigned long afsr_reg = pbm->controller_regs + SCHIZO_UE_AFSR;
+	unsigned long afar_reg = pbm->controller_regs + SCHIZO_UE_AFAR;
+	unsigned long afsr, afar, error_bits;
+	int reported, limit;
+
+	/* Latch uncorrectable error status. */
+	afar = upa_readq(afar_reg);
+
+	/* If either of the error pending bits are set in the
+	 * AFSR, the error status is being actively updated by
+	 * the hardware and we must re-read to get a clean value.
+	 */
+	limit = 1000;
+	do {
+		afsr = upa_readq(afsr_reg);
+	} while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+
+	/* Clear the primary/secondary error status bits. */
+	error_bits = afsr &
+		(SCHIZO_UEAFSR_PPIO | SCHIZO_UEAFSR_PDRD | SCHIZO_UEAFSR_PDWR |
+		 SCHIZO_UEAFSR_SPIO | SCHIZO_UEAFSR_SDMA);
+	if (!error_bits)
+		return IRQ_NONE;
+	upa_writeq(error_bits, afsr_reg);
+
+	/* Log the error. */
+	printk("%s: Uncorrectable Error, primary error type[%s]\n",
+	       pbm->name,
+	       (((error_bits & SCHIZO_UEAFSR_PPIO) ?
+		 "PIO" :
+		 ((error_bits & SCHIZO_UEAFSR_PDRD) ?
+		  "DMA Read" :
+		  ((error_bits & SCHIZO_UEAFSR_PDWR) ?
+		   "DMA Write" : "???")))));
+	printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+	       pbm->name,
+	       (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+	       (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+	       (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+	printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+	       pbm->name,
+	       (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+	       (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+	       (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+	       (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+	       (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+	printk("%s: UE AFAR [%016lx]\n", pbm->name, afar);
+	printk("%s: UE Secondary errors [", pbm->name);
+	reported = 0;
+	if (afsr & SCHIZO_UEAFSR_SPIO) {
+		reported++;
+		printk("(PIO)");
+	}
+	if (afsr & SCHIZO_UEAFSR_SDMA) {
+		reported++;
+		printk("(DMA)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	/* Interrogate IOMMU for error status. */
+	schizo_check_iommu_error(pbm, UE_ERR);
+
+	return IRQ_HANDLED;
+}
+
+#define SCHIZO_CE_AFSR	0x10040UL
+#define SCHIZO_CE_AFAR	0x10048UL
+
+#define SCHIZO_CEAFSR_PPIO	0x8000000000000000UL
+#define SCHIZO_CEAFSR_PDRD	0x4000000000000000UL
+#define SCHIZO_CEAFSR_PDWR	0x2000000000000000UL
+#define SCHIZO_CEAFSR_SPIO	0x1000000000000000UL
+#define SCHIZO_CEAFSR_SDMA	0x0800000000000000UL
+#define SCHIZO_CEAFSR_ERRPNDG	0x0300000000000000UL
+#define SCHIZO_CEAFSR_BMSK	0x000003ff00000000UL
+#define SCHIZO_CEAFSR_QOFF	0x00000000c0000000UL
+#define SCHIZO_CEAFSR_AID	0x000000001f000000UL
+#define SCHIZO_CEAFSR_PARTIAL	0x0000000000800000UL
+#define SCHIZO_CEAFSR_OWNEDIN	0x0000000000400000UL
+#define SCHIZO_CEAFSR_MTAGSYND	0x00000000000f0000UL
+#define SCHIZO_CEAFSR_MTAG	0x000000000000e000UL
+#define SCHIZO_CEAFSR_ECCSYND	0x00000000000001ffUL
+
+static irqreturn_t schizo_ce_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	unsigned long afsr_reg = pbm->controller_regs + SCHIZO_CE_AFSR;
+	unsigned long afar_reg = pbm->controller_regs + SCHIZO_CE_AFAR;
+	unsigned long afsr, afar, error_bits;
+	int reported, limit;
+
+	/* Latch error status. */
+	afar = upa_readq(afar_reg);
+
+	/* If either of the error pending bits are set in the
+	 * AFSR, the error status is being actively updated by
+	 * the hardware and we must re-read to get a clean value.
+	 */
+	limit = 1000;
+	do {
+		afsr = upa_readq(afsr_reg);
+	} while ((afsr & SCHIZO_UEAFSR_ERRPNDG) != 0 && --limit);
+
+	/* Clear primary/secondary error status bits. */
+	error_bits = afsr &
+		(SCHIZO_CEAFSR_PPIO | SCHIZO_CEAFSR_PDRD | SCHIZO_CEAFSR_PDWR |
+		 SCHIZO_CEAFSR_SPIO | SCHIZO_CEAFSR_SDMA);
+	if (!error_bits)
+		return IRQ_NONE;
+	upa_writeq(error_bits, afsr_reg);
+
+	/* Log the error. */
+	printk("%s: Correctable Error, primary error type[%s]\n",
+	       pbm->name,
+	       (((error_bits & SCHIZO_CEAFSR_PPIO) ?
+		 "PIO" :
+		 ((error_bits & SCHIZO_CEAFSR_PDRD) ?
+		  "DMA Read" :
+		  ((error_bits & SCHIZO_CEAFSR_PDWR) ?
+		   "DMA Write" : "???")))));
+
+	/* XXX Use syndrome and afar to print out module string just like
+	 * XXX UDB CE trap handler does... -DaveM
+	 */
+	printk("%s: bytemask[%04lx] qword_offset[%lx] SAFARI_AID[%02lx]\n",
+	       pbm->name,
+	       (afsr & SCHIZO_UEAFSR_BMSK) >> 32UL,
+	       (afsr & SCHIZO_UEAFSR_QOFF) >> 30UL,
+	       (afsr & SCHIZO_UEAFSR_AID) >> 24UL);
+	printk("%s: partial[%d] owned_in[%d] mtag[%lx] mtag_synd[%lx] ecc_sync[%lx]\n",
+	       pbm->name,
+	       (afsr & SCHIZO_UEAFSR_PARTIAL) ? 1 : 0,
+	       (afsr & SCHIZO_UEAFSR_OWNEDIN) ? 1 : 0,
+	       (afsr & SCHIZO_UEAFSR_MTAG) >> 13UL,
+	       (afsr & SCHIZO_UEAFSR_MTAGSYND) >> 16UL,
+	       (afsr & SCHIZO_UEAFSR_ECCSYND) >> 0UL);
+	printk("%s: CE AFAR [%016lx]\n", pbm->name, afar);
+	printk("%s: CE Secondary errors [", pbm->name);
+	reported = 0;
+	if (afsr & SCHIZO_CEAFSR_SPIO) {
+		reported++;
+		printk("(PIO)");
+	}
+	if (afsr & SCHIZO_CEAFSR_SDMA) {
+		reported++;
+		printk("(DMA)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	return IRQ_HANDLED;
+}
+
+#define SCHIZO_PCI_AFSR	0x2010UL
+#define SCHIZO_PCI_AFAR	0x2018UL
+
+#define SCHIZO_PCIAFSR_PMA	0x8000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PTA	0x4000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PRTRY	0x2000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PPERR	0x1000000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PTTO	0x0800000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_PUNUS	0x0400000000000000UL /* Schizo */
+#define SCHIZO_PCIAFSR_SMA	0x0200000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_STA	0x0100000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_SRTRY	0x0080000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_SPERR	0x0040000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_STTO	0x0020000000000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_SUNUS	0x0010000000000000UL /* Schizo */
+#define SCHIZO_PCIAFSR_BMSK	0x000003ff00000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_BLK	0x0000000080000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_CFG	0x0000000040000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_MEM	0x0000000020000000UL /* Schizo/Tomatillo */
+#define SCHIZO_PCIAFSR_IO	0x0000000010000000UL /* Schizo/Tomatillo */
+
+#define SCHIZO_PCI_CTRL		(0x2000UL)
+#define SCHIZO_PCICTRL_BUS_UNUS	(1UL << 63UL) /* Safari */
+#define SCHIZO_PCICTRL_DTO_INT	(1UL << 61UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_ARB_PRIO (0x1ff << 52UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_ESLCK	(1UL << 51UL) /* Safari */
+#define SCHIZO_PCICTRL_ERRSLOT	(7UL << 48UL) /* Safari */
+#define SCHIZO_PCICTRL_TTO_ERR	(1UL << 38UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_RTRY_ERR	(1UL << 37UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_DTO_ERR	(1UL << 36UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_SBH_ERR	(1UL << 35UL) /* Safari */
+#define SCHIZO_PCICTRL_SERR	(1UL << 34UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PCISPD	(1UL << 33UL) /* Safari */
+#define SCHIZO_PCICTRL_MRM_PREF	(1UL << 30UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_RDO_PREF	(1UL << 29UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_RDL_PREF	(1UL << 28UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_PTO	(3UL << 24UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PTO_SHIFT 24UL
+#define SCHIZO_PCICTRL_TRWSW	(7UL << 21UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_F_TGT_A	(1UL << 20UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_S_DTO_INT (1UL << 19UL) /* Safari */
+#define SCHIZO_PCICTRL_F_TGT_RT	(1UL << 19UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_SBH_INT	(1UL << 18UL) /* Safari */
+#define SCHIZO_PCICTRL_T_DTO_INT (1UL << 18UL) /* Tomatillo */
+#define SCHIZO_PCICTRL_EEN	(1UL << 17UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PARK	(1UL << 16UL) /* Safari/Tomatillo */
+#define SCHIZO_PCICTRL_PCIRST	(1UL <<  8UL) /* Safari */
+#define SCHIZO_PCICTRL_ARB_S	(0x3fUL << 0UL) /* Safari */
+#define SCHIZO_PCICTRL_ARB_T	(0xffUL << 0UL) /* Tomatillo */
+
+static irqreturn_t schizo_pcierr_intr_other(struct pci_pbm_info *pbm)
+{
+	unsigned long csr_reg, csr, csr_error_bits;
+	irqreturn_t ret = IRQ_NONE;
+	u32 stat;
+
+	csr_reg = pbm->pbm_regs + SCHIZO_PCI_CTRL;
+	csr = upa_readq(csr_reg);
+	csr_error_bits =
+		csr & (SCHIZO_PCICTRL_BUS_UNUS |
+		       SCHIZO_PCICTRL_TTO_ERR |
+		       SCHIZO_PCICTRL_RTRY_ERR |
+		       SCHIZO_PCICTRL_DTO_ERR |
+		       SCHIZO_PCICTRL_SBH_ERR |
+		       SCHIZO_PCICTRL_SERR);
+	if (csr_error_bits) {
+		/* Clear the errors.  */
+		upa_writeq(csr, csr_reg);
+
+		/* Log 'em.  */
+		if (csr_error_bits & SCHIZO_PCICTRL_BUS_UNUS)
+			printk("%s: Bus unusable error asserted.\n",
+			       pbm->name);
+		if (csr_error_bits & SCHIZO_PCICTRL_TTO_ERR)
+			printk("%s: PCI TRDY# timeout error asserted.\n",
+			       pbm->name);
+		if (csr_error_bits & SCHIZO_PCICTRL_RTRY_ERR)
+			printk("%s: PCI excessive retry error asserted.\n",
+			       pbm->name);
+		if (csr_error_bits & SCHIZO_PCICTRL_DTO_ERR)
+			printk("%s: PCI discard timeout error asserted.\n",
+			       pbm->name);
+		if (csr_error_bits & SCHIZO_PCICTRL_SBH_ERR)
+			printk("%s: PCI streaming byte hole error asserted.\n",
+			       pbm->name);
+		if (csr_error_bits & SCHIZO_PCICTRL_SERR)
+			printk("%s: PCI SERR signal asserted.\n",
+			       pbm->name);
+		ret = IRQ_HANDLED;
+	}
+	pbm->pci_ops->read(pbm->pci_bus, 0, PCI_STATUS, 2, &stat);
+	if (stat & (PCI_STATUS_PARITY |
+		    PCI_STATUS_SIG_TARGET_ABORT |
+		    PCI_STATUS_REC_TARGET_ABORT |
+		    PCI_STATUS_REC_MASTER_ABORT |
+		    PCI_STATUS_SIG_SYSTEM_ERROR)) {
+		printk("%s: PCI bus error, PCI_STATUS[%04x]\n",
+		       pbm->name, stat);
+		pbm->pci_ops->write(pbm->pci_bus, 0, PCI_STATUS, 2, 0xffff);
+		ret = IRQ_HANDLED;
+	}
+	return ret;
+}
+
+static irqreturn_t schizo_pcierr_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	unsigned long afsr_reg, afar_reg, base;
+	unsigned long afsr, afar, error_bits;
+	int reported;
+
+	base = pbm->pbm_regs;
+
+	afsr_reg = base + SCHIZO_PCI_AFSR;
+	afar_reg = base + SCHIZO_PCI_AFAR;
+
+	/* Latch error status. */
+	afar = upa_readq(afar_reg);
+	afsr = upa_readq(afsr_reg);
+
+	/* Clear primary/secondary error status bits. */
+	error_bits = afsr &
+		(SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+		 SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+		 SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS |
+		 SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+		 SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+		 SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS);
+	if (!error_bits)
+		return schizo_pcierr_intr_other(pbm);
+	upa_writeq(error_bits, afsr_reg);
+
+	/* Log the error. */
+	printk("%s: PCI Error, primary error type[%s]\n",
+	       pbm->name,
+	       (((error_bits & SCHIZO_PCIAFSR_PMA) ?
+		 "Master Abort" :
+		 ((error_bits & SCHIZO_PCIAFSR_PTA) ?
+		  "Target Abort" :
+		  ((error_bits & SCHIZO_PCIAFSR_PRTRY) ?
+		   "Excessive Retries" :
+		   ((error_bits & SCHIZO_PCIAFSR_PPERR) ?
+		    "Parity Error" :
+		    ((error_bits & SCHIZO_PCIAFSR_PTTO) ?
+		     "Timeout" :
+		     ((error_bits & SCHIZO_PCIAFSR_PUNUS) ?
+		      "Bus Unusable" : "???"))))))));
+	printk("%s: bytemask[%04lx] was_block(%d) space(%s)\n",
+	       pbm->name,
+	       (afsr & SCHIZO_PCIAFSR_BMSK) >> 32UL,
+	       (afsr & SCHIZO_PCIAFSR_BLK) ? 1 : 0,
+	       ((afsr & SCHIZO_PCIAFSR_CFG) ?
+		"Config" :
+		((afsr & SCHIZO_PCIAFSR_MEM) ?
+		 "Memory" :
+		 ((afsr & SCHIZO_PCIAFSR_IO) ?
+		  "I/O" : "???"))));
+	printk("%s: PCI AFAR [%016lx]\n",
+	       pbm->name, afar);
+	printk("%s: PCI Secondary errors [",
+	       pbm->name);
+	reported = 0;
+	if (afsr & SCHIZO_PCIAFSR_SMA) {
+		reported++;
+		printk("(Master Abort)");
+	}
+	if (afsr & SCHIZO_PCIAFSR_STA) {
+		reported++;
+		printk("(Target Abort)");
+	}
+	if (afsr & SCHIZO_PCIAFSR_SRTRY) {
+		reported++;
+		printk("(Excessive Retries)");
+	}
+	if (afsr & SCHIZO_PCIAFSR_SPERR) {
+		reported++;
+		printk("(Parity Error)");
+	}
+	if (afsr & SCHIZO_PCIAFSR_STTO) {
+		reported++;
+		printk("(Timeout)");
+	}
+	if (afsr & SCHIZO_PCIAFSR_SUNUS) {
+		reported++;
+		printk("(Bus Unusable)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	/* For the error types shown, scan PBM's PCI bus for devices
+	 * which have logged that error type.
+	 */
+
+	/* If we see a Target Abort, this could be the result of an
+	 * IOMMU translation error of some sort.  It is extremely
+	 * useful to log this information as usually it indicates
+	 * a bug in the IOMMU support code or a PCI device driver.
+	 */
+	if (error_bits & (SCHIZO_PCIAFSR_PTA | SCHIZO_PCIAFSR_STA)) {
+		schizo_check_iommu_error(pbm, PCI_ERR);
+		pci_scan_for_target_abort(pbm, pbm->pci_bus);
+	}
+	if (error_bits & (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_SMA))
+		pci_scan_for_master_abort(pbm, pbm->pci_bus);
+
+	/* For excessive retries, PSYCHO/PBM will abort the device
+	 * and there is no way to specifically check for excessive
+	 * retries in the config space status registers.  So what
+	 * we hope is that we'll catch it via the master/target
+	 * abort events.
+	 */
+
+	if (error_bits & (SCHIZO_PCIAFSR_PPERR | SCHIZO_PCIAFSR_SPERR))
+		pci_scan_for_parity_error(pbm, pbm->pci_bus);
+
+	return IRQ_HANDLED;
+}
+
+#define SCHIZO_SAFARI_ERRLOG	0x10018UL
+
+#define SAFARI_ERRLOG_ERROUT	0x8000000000000000UL
+
+#define BUS_ERROR_BADCMD	0x4000000000000000UL /* Schizo/Tomatillo */
+#define BUS_ERROR_SSMDIS	0x2000000000000000UL /* Safari */
+#define BUS_ERROR_BADMA		0x1000000000000000UL /* Safari */
+#define BUS_ERROR_BADMB		0x0800000000000000UL /* Safari */
+#define BUS_ERROR_BADMC		0x0400000000000000UL /* Safari */
+#define BUS_ERROR_SNOOP_GR	0x0000000000200000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_PCI	0x0000000000100000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RD	0x0000000000080000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RDS	0x0000000000020000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RDSA	0x0000000000010000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_OWN	0x0000000000008000UL /* Tomatillo */
+#define BUS_ERROR_SNOOP_RDO	0x0000000000004000UL /* Tomatillo */
+#define BUS_ERROR_CPU1PS	0x0000000000002000UL /* Safari */
+#define BUS_ERROR_WDATA_PERR	0x0000000000002000UL /* Tomatillo */
+#define BUS_ERROR_CPU1PB	0x0000000000001000UL /* Safari */
+#define BUS_ERROR_CTRL_PERR	0x0000000000001000UL /* Tomatillo */
+#define BUS_ERROR_CPU0PS	0x0000000000000800UL /* Safari */
+#define BUS_ERROR_SNOOP_ERR	0x0000000000000800UL /* Tomatillo */
+#define BUS_ERROR_CPU0PB	0x0000000000000400UL /* Safari */
+#define BUS_ERROR_JBUS_ILL_B	0x0000000000000400UL /* Tomatillo */
+#define BUS_ERROR_CIQTO		0x0000000000000200UL /* Safari */
+#define BUS_ERROR_LPQTO		0x0000000000000100UL /* Safari */
+#define BUS_ERROR_JBUS_ILL_C	0x0000000000000100UL /* Tomatillo */
+#define BUS_ERROR_SFPQTO	0x0000000000000080UL /* Safari */
+#define BUS_ERROR_UFPQTO	0x0000000000000040UL /* Safari */
+#define BUS_ERROR_RD_PERR	0x0000000000000040UL /* Tomatillo */
+#define BUS_ERROR_APERR		0x0000000000000020UL /* Safari/Tomatillo */
+#define BUS_ERROR_UNMAP		0x0000000000000010UL /* Safari/Tomatillo */
+#define BUS_ERROR_BUSERR	0x0000000000000004UL /* Safari/Tomatillo */
+#define BUS_ERROR_TIMEOUT	0x0000000000000002UL /* Safari/Tomatillo */
+#define BUS_ERROR_ILL		0x0000000000000001UL /* Safari */
+
+/* We only expect UNMAP errors here.  The rest of the Safari errors
+ * are marked fatal and thus cause a system reset.
+ */
+static irqreturn_t schizo_safarierr_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	u64 errlog;
+
+	errlog = upa_readq(pbm->controller_regs + SCHIZO_SAFARI_ERRLOG);
+	upa_writeq(errlog & ~(SAFARI_ERRLOG_ERROUT),
+		   pbm->controller_regs + SCHIZO_SAFARI_ERRLOG);
+
+	if (!(errlog & BUS_ERROR_UNMAP)) {
+		printk("%s: Unexpected Safari/JBUS error interrupt, errlog[%016llx]\n",
+		       pbm->name, errlog);
+
+		return IRQ_HANDLED;
+	}
+
+	printk("%s: Safari/JBUS interrupt, UNMAPPED error, interrogating IOMMUs.\n",
+	       pbm->name);
+	schizo_check_iommu_error(pbm, SAFARI_ERR);
+
+	return IRQ_HANDLED;
+}
+
+/* Nearly identical to PSYCHO equivalents... */
+#define SCHIZO_ECC_CTRL		0x10020UL
+#define  SCHIZO_ECCCTRL_EE	 0x8000000000000000UL /* Enable ECC Checking */
+#define  SCHIZO_ECCCTRL_UE	 0x4000000000000000UL /* Enable UE Interrupts */
+#define  SCHIZO_ECCCTRL_CE	 0x2000000000000000UL /* Enable CE INterrupts */
+
+#define SCHIZO_SAFARI_ERRCTRL	0x10008UL
+#define  SCHIZO_SAFERRCTRL_EN	 0x8000000000000000UL
+#define SCHIZO_SAFARI_IRQCTRL	0x10010UL
+#define  SCHIZO_SAFIRQCTRL_EN	 0x8000000000000000UL
+
+static int pbm_routes_this_ino(struct pci_pbm_info *pbm, u32 ino)
+{
+	ino &= IMAP_INO;
+
+	if (pbm->ino_bitmap & (1UL << ino))
+		return 1;
+
+	return 0;
+}
+
+/* How the Tomatillo IRQs are routed around is pure guesswork here.
+ *
+ * All the Tomatillo devices I see in prtconf dumps seem to have only
+ * a single PCI bus unit attached to it.  It would seem they are separate
+ * devices because their PortID (ie. JBUS ID) values are all different
+ * and thus the registers are mapped to totally different locations.
+ *
+ * However, two Tomatillo's look "similar" in that the only difference
+ * in their PortID is the lowest bit.
+ *
+ * So if we were to ignore this lower bit, it certainly looks like two
+ * PCI bus units of the same Tomatillo.  I still have not really
+ * figured this out...
+ */
+static void tomatillo_register_error_handlers(struct pci_pbm_info *pbm)
+{
+	struct platform_device *op = of_find_device_by_node(pbm->op->dev.of_node);
+	u64 tmp, err_mask, err_no_mask;
+	int err;
+
+	/* Tomatillo IRQ property layout is:
+	 * 0: PCIERR
+	 * 1: UE ERR
+	 * 2: CE ERR
+	 * 3: SERR
+	 * 4: POWER FAIL?
+	 */
+
+	if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) {
+		err = request_irq(op->archdata.irqs[1], schizo_ue_intr, 0,
+				  "TOMATILLO_UE", pbm);
+		if (err)
+			printk(KERN_WARNING "%s: Could not register UE, "
+			       "err=%d\n", pbm->name, err);
+	}
+	if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) {
+		err = request_irq(op->archdata.irqs[2], schizo_ce_intr, 0,
+				  "TOMATILLO_CE", pbm);
+		if (err)
+			printk(KERN_WARNING "%s: Could not register CE, "
+			       "err=%d\n", pbm->name, err);
+	}
+	err = 0;
+	if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) {
+		err = request_irq(op->archdata.irqs[0], schizo_pcierr_intr, 0,
+				  "TOMATILLO_PCIERR", pbm);
+	} else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) {
+		err = request_irq(op->archdata.irqs[0], schizo_pcierr_intr, 0,
+				  "TOMATILLO_PCIERR", pbm);
+	}
+	if (err)
+		printk(KERN_WARNING "%s: Could not register PCIERR, "
+		       "err=%d\n", pbm->name, err);
+
+	if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) {
+		err = request_irq(op->archdata.irqs[3], schizo_safarierr_intr, 0,
+				  "TOMATILLO_SERR", pbm);
+		if (err)
+			printk(KERN_WARNING "%s: Could not register SERR, "
+			       "err=%d\n", pbm->name, err);
+	}
+
+	/* Enable UE and CE interrupts for controller. */
+	upa_writeq((SCHIZO_ECCCTRL_EE |
+		    SCHIZO_ECCCTRL_UE |
+		    SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL);
+
+	/* Enable PCI Error interrupts and clear error
+	 * bits.
+	 */
+	err_mask = (SCHIZO_PCICTRL_BUS_UNUS |
+		    SCHIZO_PCICTRL_TTO_ERR |
+		    SCHIZO_PCICTRL_RTRY_ERR |
+		    SCHIZO_PCICTRL_SERR |
+		    SCHIZO_PCICTRL_EEN);
+
+	err_no_mask = SCHIZO_PCICTRL_DTO_ERR;
+
+	tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+	tmp |= err_mask;
+	tmp &= ~err_no_mask;
+	upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+	err_mask = (SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+		    SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+		    SCHIZO_PCIAFSR_PTTO |
+		    SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+		    SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+		    SCHIZO_PCIAFSR_STTO);
+
+	upa_writeq(err_mask, pbm->pbm_regs + SCHIZO_PCI_AFSR);
+
+	err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SNOOP_GR |
+		    BUS_ERROR_SNOOP_PCI | BUS_ERROR_SNOOP_RD |
+		    BUS_ERROR_SNOOP_RDS | BUS_ERROR_SNOOP_RDSA |
+		    BUS_ERROR_SNOOP_OWN | BUS_ERROR_SNOOP_RDO |
+		    BUS_ERROR_WDATA_PERR | BUS_ERROR_CTRL_PERR |
+		    BUS_ERROR_SNOOP_ERR | BUS_ERROR_JBUS_ILL_B |
+		    BUS_ERROR_JBUS_ILL_C | BUS_ERROR_RD_PERR |
+		    BUS_ERROR_APERR | BUS_ERROR_UNMAP |
+		    BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT);
+
+	upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask),
+		   pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL);
+
+	upa_writeq((SCHIZO_SAFIRQCTRL_EN | (BUS_ERROR_UNMAP)),
+		   pbm->controller_regs + SCHIZO_SAFARI_IRQCTRL);
+}
+
+static void schizo_register_error_handlers(struct pci_pbm_info *pbm)
+{
+	struct platform_device *op = of_find_device_by_node(pbm->op->dev.of_node);
+	u64 tmp, err_mask, err_no_mask;
+	int err;
+
+	/* Schizo IRQ property layout is:
+	 * 0: PCIERR
+	 * 1: UE ERR
+	 * 2: CE ERR
+	 * 3: SERR
+	 * 4: POWER FAIL?
+	 */
+
+	if (pbm_routes_this_ino(pbm, SCHIZO_UE_INO)) {
+		err = request_irq(op->archdata.irqs[1], schizo_ue_intr, 0,
+				  "SCHIZO_UE", pbm);
+		if (err)
+			printk(KERN_WARNING "%s: Could not register UE, "
+			       "err=%d\n", pbm->name, err);
+	}
+	if (pbm_routes_this_ino(pbm, SCHIZO_CE_INO)) {
+		err = request_irq(op->archdata.irqs[2], schizo_ce_intr, 0,
+				  "SCHIZO_CE", pbm);
+		if (err)
+			printk(KERN_WARNING "%s: Could not register CE, "
+			       "err=%d\n", pbm->name, err);
+	}
+	err = 0;
+	if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_A_INO)) {
+		err = request_irq(op->archdata.irqs[0], schizo_pcierr_intr, 0,
+				  "SCHIZO_PCIERR", pbm);
+	} else if (pbm_routes_this_ino(pbm, SCHIZO_PCIERR_B_INO)) {
+		err = request_irq(op->archdata.irqs[0], schizo_pcierr_intr, 0,
+				  "SCHIZO_PCIERR", pbm);
+	}
+	if (err)
+		printk(KERN_WARNING "%s: Could not register PCIERR, "
+		       "err=%d\n", pbm->name, err);
+
+	if (pbm_routes_this_ino(pbm, SCHIZO_SERR_INO)) {
+		err = request_irq(op->archdata.irqs[3], schizo_safarierr_intr, 0,
+				  "SCHIZO_SERR", pbm);
+		if (err)
+			printk(KERN_WARNING "%s: Could not register SERR, "
+			       "err=%d\n", pbm->name, err);
+	}
+
+	/* Enable UE and CE interrupts for controller. */
+	upa_writeq((SCHIZO_ECCCTRL_EE |
+		    SCHIZO_ECCCTRL_UE |
+		    SCHIZO_ECCCTRL_CE), pbm->controller_regs + SCHIZO_ECC_CTRL);
+
+	err_mask = (SCHIZO_PCICTRL_BUS_UNUS |
+		    SCHIZO_PCICTRL_ESLCK |
+		    SCHIZO_PCICTRL_TTO_ERR |
+		    SCHIZO_PCICTRL_RTRY_ERR |
+		    SCHIZO_PCICTRL_SBH_ERR |
+		    SCHIZO_PCICTRL_SERR |
+		    SCHIZO_PCICTRL_EEN);
+
+	err_no_mask = (SCHIZO_PCICTRL_DTO_ERR |
+		       SCHIZO_PCICTRL_SBH_INT);
+
+	/* Enable PCI Error interrupts and clear error
+	 * bits for each PBM.
+	 */
+	tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+	tmp |= err_mask;
+	tmp &= ~err_no_mask;
+	upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+	upa_writeq((SCHIZO_PCIAFSR_PMA | SCHIZO_PCIAFSR_PTA |
+		    SCHIZO_PCIAFSR_PRTRY | SCHIZO_PCIAFSR_PPERR |
+		    SCHIZO_PCIAFSR_PTTO | SCHIZO_PCIAFSR_PUNUS |
+		    SCHIZO_PCIAFSR_SMA | SCHIZO_PCIAFSR_STA |
+		    SCHIZO_PCIAFSR_SRTRY | SCHIZO_PCIAFSR_SPERR |
+		    SCHIZO_PCIAFSR_STTO | SCHIZO_PCIAFSR_SUNUS),
+		   pbm->pbm_regs + SCHIZO_PCI_AFSR);
+
+	/* Make all Safari error conditions fatal except unmapped
+	 * errors which we make generate interrupts.
+	 */
+	err_mask = (BUS_ERROR_BADCMD | BUS_ERROR_SSMDIS |
+		    BUS_ERROR_BADMA | BUS_ERROR_BADMB |
+		    BUS_ERROR_BADMC |
+		    BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB |
+		    BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB |
+		    BUS_ERROR_CIQTO |
+		    BUS_ERROR_LPQTO | BUS_ERROR_SFPQTO |
+		    BUS_ERROR_UFPQTO | BUS_ERROR_APERR |
+		    BUS_ERROR_BUSERR | BUS_ERROR_TIMEOUT |
+		    BUS_ERROR_ILL);
+#if 1
+	/* XXX Something wrong with some Excalibur systems
+	 * XXX Sun is shipping.  The behavior on a 2-cpu
+	 * XXX machine is that both CPU1 parity error bits
+	 * XXX are set and are immediately set again when
+	 * XXX their error status bits are cleared.  Just
+	 * XXX ignore them for now.  -DaveM
+	 */
+	err_mask &= ~(BUS_ERROR_CPU1PS | BUS_ERROR_CPU1PB |
+		      BUS_ERROR_CPU0PS | BUS_ERROR_CPU0PB);
+#endif
+
+	upa_writeq((SCHIZO_SAFERRCTRL_EN | err_mask),
+		   pbm->controller_regs + SCHIZO_SAFARI_ERRCTRL);
+}
+
+static void pbm_config_busmastering(struct pci_pbm_info *pbm)
+{
+	u8 *addr;
+
+	/* Set cache-line size to 64 bytes, this is actually
+	 * a nop but I do it for completeness.
+	 */
+	addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+					0, PCI_CACHE_LINE_SIZE);
+	pci_config_write8(addr, 64 / sizeof(u32));
+
+	/* Set PBM latency timer to 64 PCI clocks. */
+	addr = schizo_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+					0, PCI_LATENCY_TIMER);
+	pci_config_write8(addr, 64);
+}
+
+static void schizo_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
+{
+	pbm_config_busmastering(pbm);
+	pbm->is_66mhz_capable =
+		(of_find_property(pbm->op->dev.of_node, "66mhz-capable", NULL)
+		 != NULL);
+
+	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+
+	if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO)
+		tomatillo_register_error_handlers(pbm);
+	else
+		schizo_register_error_handlers(pbm);
+}
+
+#define SCHIZO_STRBUF_CONTROL		(0x02800UL)
+#define SCHIZO_STRBUF_FLUSH		(0x02808UL)
+#define SCHIZO_STRBUF_FSYNC		(0x02810UL)
+#define SCHIZO_STRBUF_CTXFLUSH		(0x02818UL)
+#define SCHIZO_STRBUF_CTXMATCH		(0x10000UL)
+
+static void schizo_pbm_strbuf_init(struct pci_pbm_info *pbm)
+{
+	unsigned long base = pbm->pbm_regs;
+	u64 control;
+
+	if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+		/* TOMATILLO lacks streaming cache.  */
+		return;
+	}
+
+	/* SCHIZO has context flushing. */
+	pbm->stc.strbuf_control		= base + SCHIZO_STRBUF_CONTROL;
+	pbm->stc.strbuf_pflush		= base + SCHIZO_STRBUF_FLUSH;
+	pbm->stc.strbuf_fsync		= base + SCHIZO_STRBUF_FSYNC;
+	pbm->stc.strbuf_ctxflush	= base + SCHIZO_STRBUF_CTXFLUSH;
+	pbm->stc.strbuf_ctxmatch_base	= base + SCHIZO_STRBUF_CTXMATCH;
+
+	pbm->stc.strbuf_flushflag = (volatile unsigned long *)
+		((((unsigned long)&pbm->stc.__flushflag_buf[0])
+		  + 63UL)
+		 & ~63UL);
+	pbm->stc.strbuf_flushflag_pa = (unsigned long)
+		__pa(pbm->stc.strbuf_flushflag);
+
+	/* Turn off LRU locking and diag mode, enable the
+	 * streaming buffer and leave the rerun-disable
+	 * setting however OBP set it.
+	 */
+	control = upa_readq(pbm->stc.strbuf_control);
+	control &= ~(SCHIZO_STRBUF_CTRL_LPTR |
+		     SCHIZO_STRBUF_CTRL_LENAB |
+		     SCHIZO_STRBUF_CTRL_DENAB);
+	control |= SCHIZO_STRBUF_CTRL_ENAB;
+	upa_writeq(control, pbm->stc.strbuf_control);
+
+	pbm->stc.strbuf_enabled = 1;
+}
+
+#define SCHIZO_IOMMU_CONTROL		(0x00200UL)
+#define SCHIZO_IOMMU_TSBBASE		(0x00208UL)
+#define SCHIZO_IOMMU_FLUSH		(0x00210UL)
+#define SCHIZO_IOMMU_CTXFLUSH		(0x00218UL)
+
+static int schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
+{
+	static const u32 vdma_default[] = { 0xc0000000, 0x40000000 };
+	unsigned long i, tagbase, database;
+	struct iommu *iommu = pbm->iommu;
+	int tsbsize, err;
+	const u32 *vdma;
+	u32 dma_mask;
+	u64 control;
+
+	vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL);
+	if (!vdma)
+		vdma = vdma_default;
+
+	dma_mask = vdma[0];
+	switch (vdma[1]) {
+		case 0x20000000:
+			dma_mask |= 0x1fffffff;
+			tsbsize = 64;
+			break;
+
+		case 0x40000000:
+			dma_mask |= 0x3fffffff;
+			tsbsize = 128;
+			break;
+
+		case 0x80000000:
+			dma_mask |= 0x7fffffff;
+			tsbsize = 128;
+			break;
+
+		default:
+			printk(KERN_ERR PFX "Strange virtual-dma size.\n");
+			return -EINVAL;
+	}
+
+	/* Register addresses, SCHIZO has iommu ctx flushing. */
+	iommu->iommu_control  = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;
+	iommu->iommu_tsbbase  = pbm->pbm_regs + SCHIZO_IOMMU_TSBBASE;
+	iommu->iommu_flush    = pbm->pbm_regs + SCHIZO_IOMMU_FLUSH;
+	iommu->iommu_tags     = iommu->iommu_flush + (0xa580UL - 0x0210UL);
+	iommu->iommu_ctxflush = pbm->pbm_regs + SCHIZO_IOMMU_CTXFLUSH;
+
+	/* We use the main control/status register of SCHIZO as the write
+	 * completion register.
+	 */
+	iommu->write_complete_reg = pbm->controller_regs + 0x10000UL;
+
+	/*
+	 * Invalidate TLB Entries.
+	 */
+	control = upa_readq(iommu->iommu_control);
+	control |= SCHIZO_IOMMU_CTRL_DENAB;
+	upa_writeq(control, iommu->iommu_control);
+
+	tagbase = SCHIZO_IOMMU_TAG, database = SCHIZO_IOMMU_DATA;
+
+	for (i = 0; i < 16; i++) {
+		upa_writeq(0, pbm->pbm_regs + tagbase + (i * 8UL));
+		upa_writeq(0, pbm->pbm_regs + database + (i * 8UL));
+	}
+
+	/* Leave diag mode enabled for full-flushing done
+	 * in pci_iommu.c
+	 */
+	err = iommu_table_init(iommu, tsbsize * 8 * 1024, vdma[0], dma_mask,
+			       pbm->numa_node);
+	if (err) {
+		printk(KERN_ERR PFX "iommu_table_init() fails with %d\n", err);
+		return err;
+	}
+
+	upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase);
+
+	control = upa_readq(iommu->iommu_control);
+	control &= ~(SCHIZO_IOMMU_CTRL_TSBSZ | SCHIZO_IOMMU_CTRL_TBWSZ);
+	switch (tsbsize) {
+	case 64:
+		control |= SCHIZO_IOMMU_TSBSZ_64K;
+		break;
+	case 128:
+		control |= SCHIZO_IOMMU_TSBSZ_128K;
+		break;
+	}
+
+	control |= SCHIZO_IOMMU_CTRL_ENAB;
+	upa_writeq(control, iommu->iommu_control);
+
+	return 0;
+}
+
+#define SCHIZO_PCI_IRQ_RETRY	(0x1a00UL)
+#define  SCHIZO_IRQ_RETRY_INF	 0xffUL
+
+#define SCHIZO_PCI_DIAG			(0x2020UL)
+#define  SCHIZO_PCIDIAG_D_BADECC	(1UL << 10UL) /* Disable BAD ECC errors (Schizo) */
+#define  SCHIZO_PCIDIAG_D_BYPASS	(1UL <<  9UL) /* Disable MMU bypass mode (Schizo/Tomatillo) */
+#define  SCHIZO_PCIDIAG_D_TTO		(1UL <<  8UL) /* Disable TTO errors (Schizo/Tomatillo) */
+#define  SCHIZO_PCIDIAG_D_RTRYARB	(1UL <<  7UL) /* Disable retry arbitration (Schizo) */
+#define  SCHIZO_PCIDIAG_D_RETRY		(1UL <<  6UL) /* Disable retry limit (Schizo/Tomatillo) */
+#define  SCHIZO_PCIDIAG_D_INTSYNC	(1UL <<  5UL) /* Disable interrupt/DMA synch (Schizo/Tomatillo) */
+#define  SCHIZO_PCIDIAG_I_DMA_PARITY	(1UL <<  3UL) /* Invert DMA parity (Schizo/Tomatillo) */
+#define  SCHIZO_PCIDIAG_I_PIOD_PARITY	(1UL <<  2UL) /* Invert PIO data parity (Schizo/Tomatillo) */
+#define  SCHIZO_PCIDIAG_I_PIOA_PARITY	(1UL <<  1UL) /* Invert PIO address parity (Schizo/Tomatillo) */
+
+#define TOMATILLO_PCI_IOC_CSR		(0x2248UL)
+#define TOMATILLO_IOC_PART_WPENAB	0x0000000000080000UL
+#define TOMATILLO_IOC_RDMULT_PENAB	0x0000000000040000UL
+#define TOMATILLO_IOC_RDONE_PENAB	0x0000000000020000UL
+#define TOMATILLO_IOC_RDLINE_PENAB	0x0000000000010000UL
+#define TOMATILLO_IOC_RDMULT_PLEN	0x000000000000c000UL
+#define TOMATILLO_IOC_RDMULT_PLEN_SHIFT	14UL
+#define TOMATILLO_IOC_RDONE_PLEN	0x0000000000003000UL
+#define TOMATILLO_IOC_RDONE_PLEN_SHIFT	12UL
+#define TOMATILLO_IOC_RDLINE_PLEN	0x0000000000000c00UL
+#define TOMATILLO_IOC_RDLINE_PLEN_SHIFT	10UL
+#define TOMATILLO_IOC_PREF_OFF		0x00000000000003f8UL
+#define TOMATILLO_IOC_PREF_OFF_SHIFT	3UL
+#define TOMATILLO_IOC_RDMULT_CPENAB	0x0000000000000004UL
+#define TOMATILLO_IOC_RDONE_CPENAB	0x0000000000000002UL
+#define TOMATILLO_IOC_RDLINE_CPENAB	0x0000000000000001UL
+
+#define TOMATILLO_PCI_IOC_TDIAG		(0x2250UL)
+#define TOMATILLO_PCI_IOC_DDIAG		(0x2290UL)
+
+static void schizo_pbm_hw_init(struct pci_pbm_info *pbm)
+{
+	u64 tmp;
+
+	upa_writeq(5, pbm->pbm_regs + SCHIZO_PCI_IRQ_RETRY);
+
+	tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+	/* Enable arbiter for all PCI slots.  */
+	tmp |= 0xff;
+
+	if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO &&
+	    pbm->chip_version >= 0x2)
+		tmp |= 0x3UL << SCHIZO_PCICTRL_PTO_SHIFT;
+
+	if (!of_find_property(pbm->op->dev.of_node, "no-bus-parking", NULL))
+		tmp |= SCHIZO_PCICTRL_PARK;
+	else
+		tmp &= ~SCHIZO_PCICTRL_PARK;
+
+	if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO &&
+	    pbm->chip_version <= 0x1)
+		tmp |= SCHIZO_PCICTRL_DTO_INT;
+	else
+		tmp &= ~SCHIZO_PCICTRL_DTO_INT;
+
+	if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO)
+		tmp |= (SCHIZO_PCICTRL_MRM_PREF |
+			SCHIZO_PCICTRL_RDO_PREF |
+			SCHIZO_PCICTRL_RDL_PREF);
+
+	upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_CTRL);
+
+	tmp = upa_readq(pbm->pbm_regs + SCHIZO_PCI_DIAG);
+	tmp &= ~(SCHIZO_PCIDIAG_D_RTRYARB |
+		 SCHIZO_PCIDIAG_D_RETRY |
+		 SCHIZO_PCIDIAG_D_INTSYNC);
+	upa_writeq(tmp, pbm->pbm_regs + SCHIZO_PCI_DIAG);
+
+	if (pbm->chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+		/* Clear prefetch lengths to workaround a bug in
+		 * Jalapeno...
+		 */
+		tmp = (TOMATILLO_IOC_PART_WPENAB |
+		       (1 << TOMATILLO_IOC_PREF_OFF_SHIFT) |
+		       TOMATILLO_IOC_RDMULT_CPENAB |
+		       TOMATILLO_IOC_RDONE_CPENAB |
+		       TOMATILLO_IOC_RDLINE_CPENAB);
+
+		upa_writeq(tmp, pbm->pbm_regs + TOMATILLO_PCI_IOC_CSR);
+	}
+}
+
+static int schizo_pbm_init(struct pci_pbm_info *pbm,
+			   struct platform_device *op, u32 portid,
+			   int chip_type)
+{
+	const struct linux_prom64_registers *regs;
+	struct device_node *dp = op->dev.of_node;
+	const char *chipset_name;
+	int err;
+
+	switch (chip_type) {
+	case PBM_CHIP_TYPE_TOMATILLO:
+		chipset_name = "TOMATILLO";
+		break;
+
+	case PBM_CHIP_TYPE_SCHIZO_PLUS:
+		chipset_name = "SCHIZO+";
+		break;
+
+	case PBM_CHIP_TYPE_SCHIZO:
+	default:
+		chipset_name = "SCHIZO";
+		break;
+	}
+
+	/* For SCHIZO, three OBP regs:
+	 * 1) PBM controller regs
+	 * 2) Schizo front-end controller regs (same for both PBMs)
+	 * 3) PBM PCI config space
+	 *
+	 * For TOMATILLO, four OBP regs:
+	 * 1) PBM controller regs
+	 * 2) Tomatillo front-end controller regs
+	 * 3) PBM PCI config space
+	 * 4) Ichip regs
+	 */
+	regs = of_get_property(dp, "reg", NULL);
+
+	pbm->next = pci_pbm_root;
+	pci_pbm_root = pbm;
+
+	pbm->numa_node = -1;
+
+	pbm->pci_ops = &sun4u_pci_ops;
+	pbm->config_space_reg_bits = 8;
+
+	pbm->index = pci_num_pbms++;
+
+	pbm->portid = portid;
+	pbm->op = op;
+
+	pbm->chip_type = chip_type;
+	pbm->chip_version = of_getintprop_default(dp, "version#", 0);
+	pbm->chip_revision = of_getintprop_default(dp, "module-version#", 0);
+
+	pbm->pbm_regs = regs[0].phys_addr;
+	pbm->controller_regs = regs[1].phys_addr - 0x10000UL;
+
+	if (chip_type == PBM_CHIP_TYPE_TOMATILLO)
+		pbm->sync_reg = regs[3].phys_addr + 0x1a18UL;
+
+	pbm->name = dp->full_name;
+
+	printk("%s: %s PCI Bus Module ver[%x:%x]\n",
+	       pbm->name, chipset_name,
+	       pbm->chip_version, pbm->chip_revision);
+
+	schizo_pbm_hw_init(pbm);
+
+	pci_determine_mem_io_space(pbm);
+
+	pci_get_pbm_props(pbm);
+
+	err = schizo_pbm_iommu_init(pbm);
+	if (err)
+		return err;
+
+	schizo_pbm_strbuf_init(pbm);
+
+	schizo_scan_bus(pbm, &op->dev);
+
+	return 0;
+}
+
+static inline int portid_compare(u32 x, u32 y, int chip_type)
+{
+	if (chip_type == PBM_CHIP_TYPE_TOMATILLO) {
+		if (x == (y ^ 1))
+			return 1;
+		return 0;
+	}
+	return (x == y);
+}
+
+static struct pci_pbm_info *schizo_find_sibling(u32 portid, int chip_type)
+{
+	struct pci_pbm_info *pbm;
+
+	for (pbm = pci_pbm_root; pbm; pbm = pbm->next) {
+		if (portid_compare(pbm->portid, portid, chip_type))
+			return pbm;
+	}
+	return NULL;
+}
+
+static int __schizo_init(struct platform_device *op, unsigned long chip_type)
+{
+	struct device_node *dp = op->dev.of_node;
+	struct pci_pbm_info *pbm;
+	struct iommu *iommu;
+	u32 portid;
+	int err;
+
+	portid = of_getintprop_default(dp, "portid", 0xff);
+
+	err = -ENOMEM;
+	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+	if (!pbm) {
+		printk(KERN_ERR PFX "Cannot allocate pci_pbm_info.\n");
+		goto out_err;
+	}
+
+	pbm->sibling = schizo_find_sibling(portid, chip_type);
+
+	iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+	if (!iommu) {
+		printk(KERN_ERR PFX "Cannot allocate PBM A iommu.\n");
+		goto out_free_pbm;
+	}
+
+	pbm->iommu = iommu;
+
+	if (schizo_pbm_init(pbm, op, portid, chip_type))
+		goto out_free_iommu;
+
+	if (pbm->sibling)
+		pbm->sibling->sibling = pbm;
+
+	dev_set_drvdata(&op->dev, pbm);
+
+	return 0;
+
+out_free_iommu:
+	kfree(pbm->iommu);
+
+out_free_pbm:
+	kfree(pbm);
+
+out_err:
+	return err;
+}
+
+static const struct of_device_id schizo_match[];
+static int schizo_probe(struct platform_device *op)
+{
+	const struct of_device_id *match;
+
+	match = of_match_device(schizo_match, &op->dev);
+	if (!match)
+		return -EINVAL;
+	return __schizo_init(op, (unsigned long)match->data);
+}
+
+/* The ordering of this table is very important.  Some Tomatillo
+ * nodes announce that they are compatible with both pci108e,a801
+ * and pci108e,8001.  So list the chips in reverse chronological
+ * order.
+ */
+static const struct of_device_id schizo_match[] = {
+	{
+		.name = "pci",
+		.compatible = "pci108e,a801",
+		.data = (void *) PBM_CHIP_TYPE_TOMATILLO,
+	},
+	{
+		.name = "pci",
+		.compatible = "pci108e,8002",
+		.data = (void *) PBM_CHIP_TYPE_SCHIZO_PLUS,
+	},
+	{
+		.name = "pci",
+		.compatible = "pci108e,8001",
+		.data = (void *) PBM_CHIP_TYPE_SCHIZO,
+	},
+	{},
+};
+
+static struct platform_driver schizo_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = schizo_match,
+	},
+	.probe		= schizo_probe,
+};
+
+static int __init schizo_init(void)
+{
+	return platform_driver_register(&schizo_driver);
+}
+
+subsys_initcall(schizo_init);
diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c
new file mode 100644
index 0000000..565d9ac
--- /dev/null
+++ b/arch/sparc/kernel/pci_sun4v.c
@@ -0,0 +1,1361 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pci_sun4v.c: SUN4V specific PCI controller support.
+ *
+ * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
+#include <linux/export.h>
+#include <linux/log2.h>
+#include <linux/of_device.h>
+#include <asm/iommu-common.h>
+
+#include <asm/iommu.h>
+#include <asm/irq.h>
+#include <asm/hypervisor.h>
+#include <asm/prom.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "kernel.h"
+
+#include "pci_sun4v.h"
+
+#define DRIVER_NAME	"pci_sun4v"
+#define PFX		DRIVER_NAME ": "
+
+static unsigned long vpci_major;
+static unsigned long vpci_minor;
+
+struct vpci_version {
+	unsigned long major;
+	unsigned long minor;
+};
+
+/* Ordered from largest major to lowest */
+static struct vpci_version vpci_versions[] = {
+	{ .major = 2, .minor = 0 },
+	{ .major = 1, .minor = 1 },
+};
+
+static unsigned long vatu_major = 1;
+static unsigned long vatu_minor = 1;
+
+#define PGLIST_NENTS	(PAGE_SIZE / sizeof(u64))
+
+struct iommu_batch {
+	struct device	*dev;		/* Device mapping is for.	*/
+	unsigned long	prot;		/* IOMMU page protections	*/
+	unsigned long	entry;		/* Index into IOTSB.		*/
+	u64		*pglist;	/* List of physical pages	*/
+	unsigned long	npages;		/* Number of pages in list.	*/
+};
+
+static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
+static int iommu_batch_initialized;
+
+/* Interrupts must be disabled.  */
+static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
+{
+	struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
+
+	p->dev		= dev;
+	p->prot		= prot;
+	p->entry	= entry;
+	p->npages	= 0;
+}
+
+/* Interrupts must be disabled.  */
+static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
+{
+	struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
+	u64 *pglist = p->pglist;
+	u64 index_count;
+	unsigned long devhandle = pbm->devhandle;
+	unsigned long prot = p->prot;
+	unsigned long entry = p->entry;
+	unsigned long npages = p->npages;
+	unsigned long iotsb_num;
+	unsigned long ret;
+	long num;
+
+	/* VPCI maj=1, min=[0,1] only supports read and write */
+	if (vpci_major < 2)
+		prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
+
+	while (npages != 0) {
+		if (mask <= DMA_BIT_MASK(32)) {
+			num = pci_sun4v_iommu_map(devhandle,
+						  HV_PCI_TSBID(0, entry),
+						  npages,
+						  prot,
+						  __pa(pglist));
+			if (unlikely(num < 0)) {
+				pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n",
+						   __func__,
+						   devhandle,
+						   HV_PCI_TSBID(0, entry),
+						   npages, prot, __pa(pglist),
+						   num);
+				return -1;
+			}
+		} else {
+			index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry),
+			iotsb_num = pbm->iommu->atu->iotsb->iotsb_num;
+			ret = pci_sun4v_iotsb_map(devhandle,
+						  iotsb_num,
+						  index_count,
+						  prot,
+						  __pa(pglist),
+						  &num);
+			if (unlikely(ret != HV_EOK)) {
+				pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n",
+						   __func__,
+						   devhandle, iotsb_num,
+						   index_count, prot,
+						   __pa(pglist), ret);
+				return -1;
+			}
+		}
+		entry += num;
+		npages -= num;
+		pglist += num;
+	}
+
+	p->entry = entry;
+	p->npages = 0;
+
+	return 0;
+}
+
+static inline void iommu_batch_new_entry(unsigned long entry, u64 mask)
+{
+	struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
+
+	if (p->entry + p->npages == entry)
+		return;
+	if (p->entry != ~0UL)
+		iommu_batch_flush(p, mask);
+	p->entry = entry;
+}
+
+/* Interrupts must be disabled.  */
+static inline long iommu_batch_add(u64 phys_page, u64 mask)
+{
+	struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
+
+	BUG_ON(p->npages >= PGLIST_NENTS);
+
+	p->pglist[p->npages++] = phys_page;
+	if (p->npages == PGLIST_NENTS)
+		return iommu_batch_flush(p, mask);
+
+	return 0;
+}
+
+/* Interrupts must be disabled.  */
+static inline long iommu_batch_end(u64 mask)
+{
+	struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
+
+	BUG_ON(p->npages >= PGLIST_NENTS);
+
+	return iommu_batch_flush(p, mask);
+}
+
+static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
+				   dma_addr_t *dma_addrp, gfp_t gfp,
+				   unsigned long attrs)
+{
+	u64 mask;
+	unsigned long flags, order, first_page, npages, n;
+	unsigned long prot = 0;
+	struct iommu *iommu;
+	struct atu *atu;
+	struct iommu_map_table *tbl;
+	struct page *page;
+	void *ret;
+	long entry;
+	int nid;
+
+	size = IO_PAGE_ALIGN(size);
+	order = get_order(size);
+	if (unlikely(order >= MAX_ORDER))
+		return NULL;
+
+	npages = size >> IO_PAGE_SHIFT;
+
+	if (attrs & DMA_ATTR_WEAK_ORDERING)
+		prot = HV_PCI_MAP_ATTR_RELAXED_ORDER;
+
+	nid = dev->archdata.numa_node;
+	page = alloc_pages_node(nid, gfp, order);
+	if (unlikely(!page))
+		return NULL;
+
+	first_page = (unsigned long) page_address(page);
+	memset((char *)first_page, 0, PAGE_SIZE << order);
+
+	iommu = dev->archdata.iommu;
+	atu = iommu->atu;
+
+	mask = dev->coherent_dma_mask;
+	if (mask <= DMA_BIT_MASK(32))
+		tbl = &iommu->tbl;
+	else
+		tbl = &atu->tbl;
+
+	entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
+				      (unsigned long)(-1), 0);
+
+	if (unlikely(entry == IOMMU_ERROR_CODE))
+		goto range_alloc_fail;
+
+	*dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
+	ret = (void *) first_page;
+	first_page = __pa(first_page);
+
+	local_irq_save(flags);
+
+	iommu_batch_start(dev,
+			  (HV_PCI_MAP_ATTR_READ | prot |
+			   HV_PCI_MAP_ATTR_WRITE),
+			  entry);
+
+	for (n = 0; n < npages; n++) {
+		long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
+		if (unlikely(err < 0L))
+			goto iommu_map_fail;
+	}
+
+	if (unlikely(iommu_batch_end(mask) < 0L))
+		goto iommu_map_fail;
+
+	local_irq_restore(flags);
+
+	return ret;
+
+iommu_map_fail:
+	local_irq_restore(flags);
+	iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
+
+range_alloc_fail:
+	free_pages(first_page, order);
+	return NULL;
+}
+
+unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
+				unsigned long iotsb_num,
+				struct pci_bus *bus_dev)
+{
+	struct pci_dev *pdev;
+	unsigned long err;
+	unsigned int bus;
+	unsigned int device;
+	unsigned int fun;
+
+	list_for_each_entry(pdev, &bus_dev->devices, bus_list) {
+		if (pdev->subordinate) {
+			/* No need to bind pci bridge */
+			dma_4v_iotsb_bind(devhandle, iotsb_num,
+					  pdev->subordinate);
+		} else {
+			bus = bus_dev->number;
+			device = PCI_SLOT(pdev->devfn);
+			fun = PCI_FUNC(pdev->devfn);
+			err = pci_sun4v_iotsb_bind(devhandle, iotsb_num,
+						   HV_PCI_DEVICE_BUILD(bus,
+								       device,
+								       fun));
+
+			/* If bind fails for one device it is going to fail
+			 * for rest of the devices because we are sharing
+			 * IOTSB. So in case of failure simply return with
+			 * error.
+			 */
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
+			       dma_addr_t dvma, unsigned long iotsb_num,
+			       unsigned long entry, unsigned long npages)
+{
+	unsigned long num, flags;
+	unsigned long ret;
+
+	local_irq_save(flags);
+	do {
+		if (dvma <= DMA_BIT_MASK(32)) {
+			num = pci_sun4v_iommu_demap(devhandle,
+						    HV_PCI_TSBID(0, entry),
+						    npages);
+		} else {
+			ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num,
+						    entry, npages, &num);
+			if (unlikely(ret != HV_EOK)) {
+				pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n",
+						   ret);
+			}
+		}
+		entry += num;
+		npages -= num;
+	} while (npages != 0);
+	local_irq_restore(flags);
+}
+
+static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
+				 dma_addr_t dvma, unsigned long attrs)
+{
+	struct pci_pbm_info *pbm;
+	struct iommu *iommu;
+	struct atu *atu;
+	struct iommu_map_table *tbl;
+	unsigned long order, npages, entry;
+	unsigned long iotsb_num;
+	u32 devhandle;
+
+	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
+	iommu = dev->archdata.iommu;
+	pbm = dev->archdata.host_controller;
+	atu = iommu->atu;
+	devhandle = pbm->devhandle;
+
+	if (dvma <= DMA_BIT_MASK(32)) {
+		tbl = &iommu->tbl;
+		iotsb_num = 0; /* we don't care for legacy iommu */
+	} else {
+		tbl = &atu->tbl;
+		iotsb_num = atu->iotsb->iotsb_num;
+	}
+	entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
+	dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
+	iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
+	order = get_order(size);
+	if (order < 10)
+		free_pages((unsigned long)cpu, order);
+}
+
+static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
+				  unsigned long offset, size_t sz,
+				  enum dma_data_direction direction,
+				  unsigned long attrs)
+{
+	struct iommu *iommu;
+	struct atu *atu;
+	struct iommu_map_table *tbl;
+	u64 mask;
+	unsigned long flags, npages, oaddr;
+	unsigned long i, base_paddr;
+	unsigned long prot;
+	dma_addr_t bus_addr, ret;
+	long entry;
+
+	iommu = dev->archdata.iommu;
+	atu = iommu->atu;
+
+	if (unlikely(direction == DMA_NONE))
+		goto bad;
+
+	oaddr = (unsigned long)(page_address(page) + offset);
+	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
+	npages >>= IO_PAGE_SHIFT;
+
+	mask = *dev->dma_mask;
+	if (mask <= DMA_BIT_MASK(32))
+		tbl = &iommu->tbl;
+	else
+		tbl = &atu->tbl;
+
+	entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
+				      (unsigned long)(-1), 0);
+
+	if (unlikely(entry == IOMMU_ERROR_CODE))
+		goto bad;
+
+	bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
+	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
+	base_paddr = __pa(oaddr & IO_PAGE_MASK);
+	prot = HV_PCI_MAP_ATTR_READ;
+	if (direction != DMA_TO_DEVICE)
+		prot |= HV_PCI_MAP_ATTR_WRITE;
+
+	if (attrs & DMA_ATTR_WEAK_ORDERING)
+		prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER;
+
+	local_irq_save(flags);
+
+	iommu_batch_start(dev, prot, entry);
+
+	for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
+		long err = iommu_batch_add(base_paddr, mask);
+		if (unlikely(err < 0L))
+			goto iommu_map_fail;
+	}
+	if (unlikely(iommu_batch_end(mask) < 0L))
+		goto iommu_map_fail;
+
+	local_irq_restore(flags);
+
+	return ret;
+
+bad:
+	if (printk_ratelimit())
+		WARN_ON(1);
+	return SPARC_MAPPING_ERROR;
+
+iommu_map_fail:
+	local_irq_restore(flags);
+	iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+	return SPARC_MAPPING_ERROR;
+}
+
+static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
+			      size_t sz, enum dma_data_direction direction,
+			      unsigned long attrs)
+{
+	struct pci_pbm_info *pbm;
+	struct iommu *iommu;
+	struct atu *atu;
+	struct iommu_map_table *tbl;
+	unsigned long npages;
+	unsigned long iotsb_num;
+	long entry;
+	u32 devhandle;
+
+	if (unlikely(direction == DMA_NONE)) {
+		if (printk_ratelimit())
+			WARN_ON(1);
+		return;
+	}
+
+	iommu = dev->archdata.iommu;
+	pbm = dev->archdata.host_controller;
+	atu = iommu->atu;
+	devhandle = pbm->devhandle;
+
+	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
+	npages >>= IO_PAGE_SHIFT;
+	bus_addr &= IO_PAGE_MASK;
+
+	if (bus_addr <= DMA_BIT_MASK(32)) {
+		iotsb_num = 0; /* we don't care for legacy iommu */
+		tbl = &iommu->tbl;
+	} else {
+		iotsb_num = atu->iotsb->iotsb_num;
+		tbl = &atu->tbl;
+	}
+	entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT;
+	dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages);
+	iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+}
+
+static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
+			 int nelems, enum dma_data_direction direction,
+			 unsigned long attrs)
+{
+	struct scatterlist *s, *outs, *segstart;
+	unsigned long flags, handle, prot;
+	dma_addr_t dma_next = 0, dma_addr;
+	unsigned int max_seg_size;
+	unsigned long seg_boundary_size;
+	int outcount, incount, i;
+	struct iommu *iommu;
+	struct atu *atu;
+	struct iommu_map_table *tbl;
+	u64 mask;
+	unsigned long base_shift;
+	long err;
+
+	BUG_ON(direction == DMA_NONE);
+
+	iommu = dev->archdata.iommu;
+	if (nelems == 0 || !iommu)
+		return 0;
+	atu = iommu->atu;
+
+	prot = HV_PCI_MAP_ATTR_READ;
+	if (direction != DMA_TO_DEVICE)
+		prot |= HV_PCI_MAP_ATTR_WRITE;
+
+	if (attrs & DMA_ATTR_WEAK_ORDERING)
+		prot |= HV_PCI_MAP_ATTR_RELAXED_ORDER;
+
+	outs = s = segstart = &sglist[0];
+	outcount = 1;
+	incount = nelems;
+	handle = 0;
+
+	/* Init first segment length for backout at failure */
+	outs->dma_length = 0;
+
+	local_irq_save(flags);
+
+	iommu_batch_start(dev, prot, ~0UL);
+
+	max_seg_size = dma_get_max_seg_size(dev);
+	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
+
+	mask = *dev->dma_mask;
+	if (mask <= DMA_BIT_MASK(32))
+		tbl = &iommu->tbl;
+	else
+		tbl = &atu->tbl;
+
+	base_shift = tbl->table_map_base >> IO_PAGE_SHIFT;
+
+	for_each_sg(sglist, s, nelems, i) {
+		unsigned long paddr, npages, entry, out_entry = 0, slen;
+
+		slen = s->length;
+		/* Sanity check */
+		if (slen == 0) {
+			dma_next = 0;
+			continue;
+		}
+		/* Allocate iommu entries for that segment */
+		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
+		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
+		entry = iommu_tbl_range_alloc(dev, tbl, npages,
+					      &handle, (unsigned long)(-1), 0);
+
+		/* Handle failure */
+		if (unlikely(entry == IOMMU_ERROR_CODE)) {
+			pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n",
+					   tbl, paddr, npages);
+			goto iommu_map_failed;
+		}
+
+		iommu_batch_new_entry(entry, mask);
+
+		/* Convert entry to a dma_addr_t */
+		dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT);
+		dma_addr |= (s->offset & ~IO_PAGE_MASK);
+
+		/* Insert into HW table */
+		paddr &= IO_PAGE_MASK;
+		while (npages--) {
+			err = iommu_batch_add(paddr, mask);
+			if (unlikely(err < 0L))
+				goto iommu_map_failed;
+			paddr += IO_PAGE_SIZE;
+		}
+
+		/* If we are in an open segment, try merging */
+		if (segstart != s) {
+			/* We cannot merge if:
+			 * - allocated dma_addr isn't contiguous to previous allocation
+			 */
+			if ((dma_addr != dma_next) ||
+			    (outs->dma_length + s->length > max_seg_size) ||
+			    (is_span_boundary(out_entry, base_shift,
+					      seg_boundary_size, outs, s))) {
+				/* Can't merge: create a new segment */
+				segstart = s;
+				outcount++;
+				outs = sg_next(outs);
+			} else {
+				outs->dma_length += s->length;
+			}
+		}
+
+		if (segstart == s) {
+			/* This is a new segment, fill entries */
+			outs->dma_address = dma_addr;
+			outs->dma_length = slen;
+			out_entry = entry;
+		}
+
+		/* Calculate next page pointer for contiguous check */
+		dma_next = dma_addr + slen;
+	}
+
+	err = iommu_batch_end(mask);
+
+	if (unlikely(err < 0L))
+		goto iommu_map_failed;
+
+	local_irq_restore(flags);
+
+	if (outcount < incount) {
+		outs = sg_next(outs);
+		outs->dma_address = SPARC_MAPPING_ERROR;
+		outs->dma_length = 0;
+	}
+
+	return outcount;
+
+iommu_map_failed:
+	for_each_sg(sglist, s, nelems, i) {
+		if (s->dma_length != 0) {
+			unsigned long vaddr, npages;
+
+			vaddr = s->dma_address & IO_PAGE_MASK;
+			npages = iommu_num_pages(s->dma_address, s->dma_length,
+						 IO_PAGE_SIZE);
+			iommu_tbl_range_free(tbl, vaddr, npages,
+					     IOMMU_ERROR_CODE);
+			/* XXX demap? XXX */
+			s->dma_address = SPARC_MAPPING_ERROR;
+			s->dma_length = 0;
+		}
+		if (s == outs)
+			break;
+	}
+	local_irq_restore(flags);
+
+	return 0;
+}
+
+static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
+			    int nelems, enum dma_data_direction direction,
+			    unsigned long attrs)
+{
+	struct pci_pbm_info *pbm;
+	struct scatterlist *sg;
+	struct iommu *iommu;
+	struct atu *atu;
+	unsigned long flags, entry;
+	unsigned long iotsb_num;
+	u32 devhandle;
+
+	BUG_ON(direction == DMA_NONE);
+
+	iommu = dev->archdata.iommu;
+	pbm = dev->archdata.host_controller;
+	atu = iommu->atu;
+	devhandle = pbm->devhandle;
+	
+	local_irq_save(flags);
+
+	sg = sglist;
+	while (nelems--) {
+		dma_addr_t dma_handle = sg->dma_address;
+		unsigned int len = sg->dma_length;
+		unsigned long npages;
+		struct iommu_map_table *tbl;
+		unsigned long shift = IO_PAGE_SHIFT;
+
+		if (!len)
+			break;
+		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+
+		if (dma_handle <= DMA_BIT_MASK(32)) {
+			iotsb_num = 0; /* we don't care for legacy iommu */
+			tbl = &iommu->tbl;
+		} else {
+			iotsb_num = atu->iotsb->iotsb_num;
+			tbl = &atu->tbl;
+		}
+		entry = ((dma_handle - tbl->table_map_base) >> shift);
+		dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num,
+				   entry, npages);
+		iommu_tbl_range_free(tbl, dma_handle, npages,
+				     IOMMU_ERROR_CODE);
+		sg = sg_next(sg);
+	}
+
+	local_irq_restore(flags);
+}
+
+static int dma_4v_supported(struct device *dev, u64 device_mask)
+{
+	struct iommu *iommu = dev->archdata.iommu;
+	u64 dma_addr_mask = iommu->dma_addr_mask;
+
+	if (device_mask > DMA_BIT_MASK(32)) {
+		if (iommu->atu)
+			dma_addr_mask = iommu->atu->dma_addr_mask;
+		else
+			return 0;
+	}
+
+	if ((device_mask & dma_addr_mask) == dma_addr_mask)
+		return 1;
+	return pci64_dma_supported(to_pci_dev(dev), device_mask);
+}
+
+static int dma_4v_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == SPARC_MAPPING_ERROR;
+}
+
+static const struct dma_map_ops sun4v_dma_ops = {
+	.alloc				= dma_4v_alloc_coherent,
+	.free				= dma_4v_free_coherent,
+	.map_page			= dma_4v_map_page,
+	.unmap_page			= dma_4v_unmap_page,
+	.map_sg				= dma_4v_map_sg,
+	.unmap_sg			= dma_4v_unmap_sg,
+	.dma_supported			= dma_4v_supported,
+	.mapping_error			= dma_4v_mapping_error,
+};
+
+static void pci_sun4v_scan_bus(struct pci_pbm_info *pbm, struct device *parent)
+{
+	struct property *prop;
+	struct device_node *dp;
+
+	dp = pbm->op->dev.of_node;
+	prop = of_find_property(dp, "66mhz-capable", NULL);
+	pbm->is_66mhz_capable = (prop != NULL);
+	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
+
+	/* XXX register error interrupt handlers XXX */
+}
+
+static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
+					    struct iommu_map_table *iommu)
+{
+	struct iommu_pool *pool;
+	unsigned long i, pool_nr, cnt = 0;
+	u32 devhandle;
+
+	devhandle = pbm->devhandle;
+	for (pool_nr = 0; pool_nr < iommu->nr_pools; pool_nr++) {
+		pool = &(iommu->pools[pool_nr]);
+		for (i = pool->start; i <= pool->end; i++) {
+			unsigned long ret, io_attrs, ra;
+
+			ret = pci_sun4v_iommu_getmap(devhandle,
+						     HV_PCI_TSBID(0, i),
+						     &io_attrs, &ra);
+			if (ret == HV_EOK) {
+				if (page_in_phys_avail(ra)) {
+					pci_sun4v_iommu_demap(devhandle,
+							      HV_PCI_TSBID(0,
+							      i), 1);
+				} else {
+					cnt++;
+					__set_bit(i, iommu->map);
+				}
+			}
+		}
+	}
+	return cnt;
+}
+
+static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm)
+{
+	struct atu *atu = pbm->iommu->atu;
+	struct atu_iotsb *iotsb;
+	void *table;
+	u64 table_size;
+	u64 iotsb_num;
+	unsigned long order;
+	unsigned long err;
+
+	iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL);
+	if (!iotsb) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+	atu->iotsb = iotsb;
+
+	/* calculate size of IOTSB */
+	table_size = (atu->size / IO_PAGE_SIZE) * 8;
+	order = get_order(table_size);
+	table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!table) {
+		err = -ENOMEM;
+		goto table_failed;
+	}
+	iotsb->table = table;
+	iotsb->ra = __pa(table);
+	iotsb->dvma_size = atu->size;
+	iotsb->dvma_base = atu->base;
+	iotsb->table_size = table_size;
+	iotsb->page_size = IO_PAGE_SIZE;
+
+	/* configure and register IOTSB with HV */
+	err = pci_sun4v_iotsb_conf(pbm->devhandle,
+				   iotsb->ra,
+				   iotsb->table_size,
+				   iotsb->page_size,
+				   iotsb->dvma_base,
+				   &iotsb_num);
+	if (err) {
+		pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err);
+		goto iotsb_conf_failed;
+	}
+	iotsb->iotsb_num = iotsb_num;
+
+	err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus);
+	if (err) {
+		pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err);
+		goto iotsb_conf_failed;
+	}
+
+	return 0;
+
+iotsb_conf_failed:
+	free_pages((unsigned long)table, order);
+table_failed:
+	kfree(iotsb);
+out_err:
+	return err;
+}
+
+static int pci_sun4v_atu_init(struct pci_pbm_info *pbm)
+{
+	struct atu *atu = pbm->iommu->atu;
+	unsigned long err;
+	const u64 *ranges;
+	u64 map_size, num_iotte;
+	u64 dma_mask;
+	const u32 *page_size;
+	int len;
+
+	ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges",
+				 &len);
+	if (!ranges) {
+		pr_err(PFX "No iommu-address-ranges\n");
+		return -EINVAL;
+	}
+
+	page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes",
+				    NULL);
+	if (!page_size) {
+		pr_err(PFX "No iommu-pagesizes\n");
+		return -EINVAL;
+	}
+
+	/* There are 4 iommu-address-ranges supported. Each range is pair of
+	 * {base, size}. The ranges[0] and ranges[1] are 32bit address space
+	 * while ranges[2] and ranges[3] are 64bit space.  We want to use 64bit
+	 * address ranges to support 64bit addressing. Because 'size' for
+	 * address ranges[2] and ranges[3] are same we can select either of
+	 * ranges[2] or ranges[3] for mapping. However due to 'size' is too
+	 * large for OS to allocate IOTSB we are using fix size 32G
+	 * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices
+	 * to share.
+	 */
+	atu->ranges = (struct atu_ranges *)ranges;
+	atu->base = atu->ranges[3].base;
+	atu->size = ATU_64_SPACE_SIZE;
+
+	/* Create IOTSB */
+	err = pci_sun4v_atu_alloc_iotsb(pbm);
+	if (err) {
+		pr_err(PFX "Error creating ATU IOTSB\n");
+		return err;
+	}
+
+	/* Create ATU iommu map.
+	 * One bit represents one iotte in IOTSB table.
+	 */
+	dma_mask = (roundup_pow_of_two(atu->size) - 1UL);
+	num_iotte = atu->size / IO_PAGE_SIZE;
+	map_size = num_iotte / 8;
+	atu->tbl.table_map_base = atu->base;
+	atu->dma_addr_mask = dma_mask;
+	atu->tbl.map = kzalloc(map_size, GFP_KERNEL);
+	if (!atu->tbl.map)
+		return -ENOMEM;
+
+	iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT,
+			    NULL, false /* no large_pool */,
+			    0 /* default npools */,
+			    false /* want span boundary checking */);
+
+	return 0;
+}
+
+static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
+{
+	static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
+	struct iommu *iommu = pbm->iommu;
+	unsigned long num_tsb_entries, sz;
+	u32 dma_mask, dma_offset;
+	const u32 *vdma;
+
+	vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL);
+	if (!vdma)
+		vdma = vdma_default;
+
+	if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
+		printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
+		       vdma[0], vdma[1]);
+		return -EINVAL;
+	}
+
+	dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
+	num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
+
+	dma_offset = vdma[0];
+
+	/* Setup initial software IOMMU state. */
+	spin_lock_init(&iommu->lock);
+	iommu->ctx_lowest_free = 1;
+	iommu->tbl.table_map_base = dma_offset;
+	iommu->dma_addr_mask = dma_mask;
+
+	/* Allocate and initialize the free area map.  */
+	sz = (num_tsb_entries + 7) / 8;
+	sz = (sz + 7UL) & ~7UL;
+	iommu->tbl.map = kzalloc(sz, GFP_KERNEL);
+	if (!iommu->tbl.map) {
+		printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
+		return -ENOMEM;
+	}
+	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
+			    NULL, false /* no large_pool */,
+			    0 /* default npools */,
+			    false /* want span boundary checking */);
+	sz = probe_existing_entries(pbm, &iommu->tbl);
+	if (sz)
+		printk("%s: Imported %lu TSB entries from OBP\n",
+		       pbm->name, sz);
+
+	return 0;
+}
+
+#ifdef CONFIG_PCI_MSI
+struct pci_sun4v_msiq_entry {
+	u64		version_type;
+#define MSIQ_VERSION_MASK		0xffffffff00000000UL
+#define MSIQ_VERSION_SHIFT		32
+#define MSIQ_TYPE_MASK			0x00000000000000ffUL
+#define MSIQ_TYPE_SHIFT			0
+#define MSIQ_TYPE_NONE			0x00
+#define MSIQ_TYPE_MSG			0x01
+#define MSIQ_TYPE_MSI32			0x02
+#define MSIQ_TYPE_MSI64			0x03
+#define MSIQ_TYPE_INTX			0x08
+#define MSIQ_TYPE_NONE2			0xff
+
+	u64		intx_sysino;
+	u64		reserved1;
+	u64		stick;
+	u64		req_id;  /* bus/device/func */
+#define MSIQ_REQID_BUS_MASK		0xff00UL
+#define MSIQ_REQID_BUS_SHIFT		8
+#define MSIQ_REQID_DEVICE_MASK		0x00f8UL
+#define MSIQ_REQID_DEVICE_SHIFT		3
+#define MSIQ_REQID_FUNC_MASK		0x0007UL
+#define MSIQ_REQID_FUNC_SHIFT		0
+
+	u64		msi_address;
+
+	/* The format of this value is message type dependent.
+	 * For MSI bits 15:0 are the data from the MSI packet.
+	 * For MSI-X bits 31:0 are the data from the MSI packet.
+	 * For MSG, the message code and message routing code where:
+	 * 	bits 39:32 is the bus/device/fn of the msg target-id
+	 *	bits 18:16 is the message routing code
+	 *	bits 7:0 is the message code
+	 * For INTx the low order 2-bits are:
+	 *	00 - INTA
+	 *	01 - INTB
+	 *	10 - INTC
+	 *	11 - INTD
+	 */
+	u64		msi_data;
+
+	u64		reserved2;
+};
+
+static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+			      unsigned long *head)
+{
+	unsigned long err, limit;
+
+	err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
+	if (unlikely(err))
+		return -ENXIO;
+
+	limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+	if (unlikely(*head >= limit))
+		return -EFBIG;
+
+	return 0;
+}
+
+static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
+				 unsigned long msiqid, unsigned long *head,
+				 unsigned long *msi)
+{
+	struct pci_sun4v_msiq_entry *ep;
+	unsigned long err, type;
+
+	/* Note: void pointer arithmetic, 'head' is a byte offset  */
+	ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
+				 (pbm->msiq_ent_count *
+				  sizeof(struct pci_sun4v_msiq_entry))) +
+	      *head);
+
+	if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
+		return 0;
+
+	type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
+	if (unlikely(type != MSIQ_TYPE_MSI32 &&
+		     type != MSIQ_TYPE_MSI64))
+		return -EINVAL;
+
+	*msi = ep->msi_data;
+
+	err = pci_sun4v_msi_setstate(pbm->devhandle,
+				     ep->msi_data /* msi_num */,
+				     HV_MSISTATE_IDLE);
+	if (unlikely(err))
+		return -ENXIO;
+
+	/* Clear the entry.  */
+	ep->version_type &= ~MSIQ_TYPE_MASK;
+
+	(*head) += sizeof(struct pci_sun4v_msiq_entry);
+	if (*head >=
+	    (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
+		*head = 0;
+
+	return 1;
+}
+
+static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
+			      unsigned long head)
+{
+	unsigned long err;
+
+	err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
+	if (unlikely(err))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
+			       unsigned long msi, int is_msi64)
+{
+	if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
+				  (is_msi64 ?
+				   HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
+		return -ENXIO;
+	if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
+		return -ENXIO;
+	if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
+		return -ENXIO;
+	return 0;
+}
+
+static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
+{
+	unsigned long err, msiqid;
+
+	err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
+	if (err)
+		return -ENXIO;
+
+	pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);
+
+	return 0;
+}
+
+static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
+{
+	unsigned long q_size, alloc_size, pages, order;
+	int i;
+
+	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+	alloc_size = (pbm->msiq_num * q_size);
+	order = get_order(alloc_size);
+	pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
+	if (pages == 0UL) {
+		printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
+		       order);
+		return -ENOMEM;
+	}
+	memset((char *)pages, 0, PAGE_SIZE << order);
+	pbm->msi_queues = (void *) pages;
+
+	for (i = 0; i < pbm->msiq_num; i++) {
+		unsigned long err, base = __pa(pages + (i * q_size));
+		unsigned long ret1, ret2;
+
+		err = pci_sun4v_msiq_conf(pbm->devhandle,
+					  pbm->msiq_first + i,
+					  base, pbm->msiq_ent_count);
+		if (err) {
+			printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
+			       err);
+			goto h_error;
+		}
+
+		err = pci_sun4v_msiq_info(pbm->devhandle,
+					  pbm->msiq_first + i,
+					  &ret1, &ret2);
+		if (err) {
+			printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
+			       err);
+			goto h_error;
+		}
+		if (ret1 != base || ret2 != pbm->msiq_ent_count) {
+			printk(KERN_ERR "MSI: Bogus qconf "
+			       "expected[%lx:%x] got[%lx:%lx]\n",
+			       base, pbm->msiq_ent_count,
+			       ret1, ret2);
+			goto h_error;
+		}
+	}
+
+	return 0;
+
+h_error:
+	free_pages(pages, order);
+	return -EINVAL;
+}
+
+static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
+{
+	unsigned long q_size, alloc_size, pages, order;
+	int i;
+
+	for (i = 0; i < pbm->msiq_num; i++) {
+		unsigned long msiqid = pbm->msiq_first + i;
+
+		(void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
+	}
+
+	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
+	alloc_size = (pbm->msiq_num * q_size);
+	order = get_order(alloc_size);
+
+	pages = (unsigned long) pbm->msi_queues;
+
+	free_pages(pages, order);
+
+	pbm->msi_queues = NULL;
+}
+
+static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
+				    unsigned long msiqid,
+				    unsigned long devino)
+{
+	unsigned int irq = sun4v_build_irq(pbm->devhandle, devino);
+
+	if (!irq)
+		return -ENOMEM;
+
+	if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
+		return -EINVAL;
+	if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
+		return -EINVAL;
+
+	return irq;
+}
+
+static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
+	.get_head	=	pci_sun4v_get_head,
+	.dequeue_msi	=	pci_sun4v_dequeue_msi,
+	.set_head	=	pci_sun4v_set_head,
+	.msi_setup	=	pci_sun4v_msi_setup,
+	.msi_teardown	=	pci_sun4v_msi_teardown,
+	.msiq_alloc	=	pci_sun4v_msiq_alloc,
+	.msiq_free	=	pci_sun4v_msiq_free,
+	.msiq_build_irq	=	pci_sun4v_msiq_build_irq,
+};
+
+static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
+{
+	sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
+}
+#else /* CONFIG_PCI_MSI */
+static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
+{
+}
+#endif /* !(CONFIG_PCI_MSI) */
+
+static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
+			      struct platform_device *op, u32 devhandle)
+{
+	struct device_node *dp = op->dev.of_node;
+	int err;
+
+	pbm->numa_node = of_node_to_nid(dp);
+
+	pbm->pci_ops = &sun4v_pci_ops;
+	pbm->config_space_reg_bits = 12;
+
+	pbm->index = pci_num_pbms++;
+
+	pbm->op = op;
+
+	pbm->devhandle = devhandle;
+
+	pbm->name = dp->full_name;
+
+	printk("%s: SUN4V PCI Bus Module\n", pbm->name);
+	printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
+
+	pci_determine_mem_io_space(pbm);
+
+	pci_get_pbm_props(pbm);
+
+	err = pci_sun4v_iommu_init(pbm);
+	if (err)
+		return err;
+
+	pci_sun4v_msi_init(pbm);
+
+	pci_sun4v_scan_bus(pbm, &op->dev);
+
+	/* if atu_init fails its not complete failure.
+	 * we can still continue using legacy iommu.
+	 */
+	if (pbm->iommu->atu) {
+		err = pci_sun4v_atu_init(pbm);
+		if (err) {
+			kfree(pbm->iommu->atu);
+			pbm->iommu->atu = NULL;
+			pr_err(PFX "ATU init failed, err=%d\n", err);
+		}
+	}
+
+	pbm->next = pci_pbm_root;
+	pci_pbm_root = pbm;
+
+	return 0;
+}
+
+static int pci_sun4v_probe(struct platform_device *op)
+{
+	const struct linux_prom64_registers *regs;
+	static int hvapi_negotiated = 0;
+	struct pci_pbm_info *pbm;
+	struct device_node *dp;
+	struct iommu *iommu;
+	struct atu *atu;
+	u32 devhandle;
+	int i, err = -ENODEV;
+	static bool hv_atu = true;
+
+	dp = op->dev.of_node;
+
+	if (!hvapi_negotiated++) {
+		for (i = 0; i < ARRAY_SIZE(vpci_versions); i++) {
+			vpci_major = vpci_versions[i].major;
+			vpci_minor = vpci_versions[i].minor;
+
+			err = sun4v_hvapi_register(HV_GRP_PCI, vpci_major,
+						   &vpci_minor);
+			if (!err)
+				break;
+		}
+
+		if (err) {
+			pr_err(PFX "Could not register hvapi, err=%d\n", err);
+			return err;
+		}
+		pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
+			vpci_major, vpci_minor);
+
+		err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor);
+		if (err) {
+			/* don't return an error if we fail to register the
+			 * ATU group, but ATU hcalls won't be available.
+			 */
+			hv_atu = false;
+		} else {
+			pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
+				vatu_major, vatu_minor);
+		}
+
+		dma_ops = &sun4v_dma_ops;
+	}
+
+	regs = of_get_property(dp, "reg", NULL);
+	err = -ENODEV;
+	if (!regs) {
+		printk(KERN_ERR PFX "Could not find config registers\n");
+		goto out_err;
+	}
+	devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
+
+	err = -ENOMEM;
+	if (!iommu_batch_initialized) {
+		for_each_possible_cpu(i) {
+			unsigned long page = get_zeroed_page(GFP_KERNEL);
+
+			if (!page)
+				goto out_err;
+
+			per_cpu(iommu_batch, i).pglist = (u64 *) page;
+		}
+		iommu_batch_initialized = 1;
+	}
+
+	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
+	if (!pbm) {
+		printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
+		goto out_err;
+	}
+
+	iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
+	if (!iommu) {
+		printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
+		goto out_free_controller;
+	}
+
+	pbm->iommu = iommu;
+	iommu->atu = NULL;
+	if (hv_atu) {
+		atu = kzalloc(sizeof(*atu), GFP_KERNEL);
+		if (!atu)
+			pr_err(PFX "Could not allocate atu\n");
+		else
+			iommu->atu = atu;
+	}
+
+	err = pci_sun4v_pbm_init(pbm, op, devhandle);
+	if (err)
+		goto out_free_iommu;
+
+	dev_set_drvdata(&op->dev, pbm);
+
+	return 0;
+
+out_free_iommu:
+	kfree(iommu->atu);
+	kfree(pbm->iommu);
+
+out_free_controller:
+	kfree(pbm);
+
+out_err:
+	return err;
+}
+
+static const struct of_device_id pci_sun4v_match[] = {
+	{
+		.name = "pci",
+		.compatible = "SUNW,sun4v-pci",
+	},
+	{},
+};
+
+static struct platform_driver pci_sun4v_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = pci_sun4v_match,
+	},
+	.probe		= pci_sun4v_probe,
+};
+
+static int __init pci_sun4v_init(void)
+{
+	return platform_driver_register(&pci_sun4v_driver);
+}
+
+subsys_initcall(pci_sun4v_init);
diff --git a/arch/sparc/kernel/pci_sun4v.h b/arch/sparc/kernel/pci_sun4v.h
new file mode 100644
index 0000000..d47263a
--- /dev/null
+++ b/arch/sparc/kernel/pci_sun4v.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* pci_sun4v.h: SUN4V specific PCI controller support.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#ifndef _PCI_SUN4V_H
+#define _PCI_SUN4V_H
+
+long pci_sun4v_iommu_map(unsigned long devhandle,
+			 unsigned long tsbid,
+			 unsigned long num_ttes,
+			 unsigned long io_attributes,
+			 unsigned long io_page_list_pa);
+unsigned long pci_sun4v_iommu_demap(unsigned long devhandle,
+				    unsigned long tsbid,
+				    unsigned long num_ttes);
+unsigned long pci_sun4v_iommu_getmap(unsigned long devhandle,
+				     unsigned long tsbid,
+				     unsigned long *io_attributes,
+				     unsigned long *real_address);
+unsigned long pci_sun4v_config_get(unsigned long devhandle,
+				   unsigned long pci_device,
+				   unsigned long config_offset,
+				   unsigned long size);
+int pci_sun4v_config_put(unsigned long devhandle,
+			 unsigned long pci_device,
+			 unsigned long config_offset,
+			 unsigned long size,
+			 unsigned long data);
+
+unsigned long pci_sun4v_msiq_conf(unsigned long devhandle,
+					 unsigned long msiqid,
+					 unsigned long msiq_paddr,
+					 unsigned long num_entries);
+unsigned long pci_sun4v_msiq_info(unsigned long devhandle,
+				  unsigned long msiqid,
+				  unsigned long *msiq_paddr,
+				  unsigned long *num_entries);
+unsigned long pci_sun4v_msiq_getvalid(unsigned long devhandle,
+				      unsigned long msiqid,
+				      unsigned long *valid);
+unsigned long pci_sun4v_msiq_setvalid(unsigned long devhandle,
+				      unsigned long msiqid,
+				      unsigned long valid);
+unsigned long pci_sun4v_msiq_getstate(unsigned long devhandle,
+				      unsigned long msiqid,
+				      unsigned long *state);
+unsigned long pci_sun4v_msiq_setstate(unsigned long devhandle,
+				      unsigned long msiqid,
+				      unsigned long state);
+unsigned long pci_sun4v_msiq_gethead(unsigned long devhandle,
+				     unsigned long msiqid,
+				     unsigned long *head);
+unsigned long pci_sun4v_msiq_sethead(unsigned long devhandle,
+				     unsigned long msiqid,
+				     unsigned long head);
+unsigned long pci_sun4v_msiq_gettail(unsigned long devhandle,
+				      unsigned long msiqid,
+				      unsigned long *head);
+unsigned long pci_sun4v_msi_getvalid(unsigned long devhandle,
+				     unsigned long msinum,
+				     unsigned long *valid);
+unsigned long pci_sun4v_msi_setvalid(unsigned long devhandle,
+				     unsigned long msinum,
+				     unsigned long valid);
+unsigned long pci_sun4v_msi_getmsiq(unsigned long devhandle,
+				    unsigned long msinum,
+				    unsigned long *msiq);
+unsigned long pci_sun4v_msi_setmsiq(unsigned long devhandle,
+				    unsigned long msinum,
+				    unsigned long msiq,
+				    unsigned long msitype);
+unsigned long pci_sun4v_msi_getstate(unsigned long devhandle,
+				     unsigned long msinum,
+				     unsigned long *state);
+unsigned long pci_sun4v_msi_setstate(unsigned long devhandle,
+				     unsigned long msinum,
+				     unsigned long state);
+unsigned long pci_sun4v_msg_getmsiq(unsigned long devhandle,
+				    unsigned long msinum,
+				    unsigned long *msiq);
+unsigned long pci_sun4v_msg_setmsiq(unsigned long devhandle,
+				    unsigned long msinum,
+				    unsigned long msiq);
+unsigned long pci_sun4v_msg_getvalid(unsigned long devhandle,
+				     unsigned long msinum,
+				     unsigned long *valid);
+unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
+				     unsigned long msinum,
+				     unsigned long valid);
+
+/* Sun4v HV IOMMU v2 APIs */
+unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
+				   unsigned long ra,
+				   unsigned long table_size,
+				   unsigned long page_size,
+				   unsigned long dvma_base,
+				   u64 *iotsb_num);
+unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle,
+				   unsigned long iotsb_num,
+				   unsigned int pci_device);
+unsigned long pci_sun4v_iotsb_map(unsigned long devhandle,
+				  unsigned long iotsb_num,
+				  unsigned long iotsb_index_iottes,
+				  unsigned long io_attributes,
+				  unsigned long io_page_list_pa,
+				  long *mapped);
+unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle,
+				    unsigned long iotsb_num,
+				    unsigned long iotsb_index,
+				    unsigned long iottes,
+				    unsigned long *demapped);
+#endif /* !(_PCI_SUN4V_H) */
diff --git a/arch/sparc/kernel/pci_sun4v_asm.S b/arch/sparc/kernel/pci_sun4v_asm.S
new file mode 100644
index 0000000..2b80518
--- /dev/null
+++ b/arch/sparc/kernel/pci_sun4v_asm.S
@@ -0,0 +1,431 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* pci_sun4v_asm: Hypervisor calls for PCI support.
+ *
+ * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/linkage.h>
+#include <asm/hypervisor.h>
+
+	/* %o0: devhandle
+	 * %o1:	tsbid
+	 * %o2:	num ttes
+	 * %o3:	io_attributes
+	 * %o4:	io_page_list phys address
+	 *
+	 * returns %o0:	-status if status was non-zero, else
+	 *         %o0:	num pages mapped
+	 */
+ENTRY(pci_sun4v_iommu_map)
+	mov	%o5, %g1
+	mov	HV_FAST_PCI_IOMMU_MAP, %o5
+	ta	HV_FAST_TRAP
+	brnz,pn %o0, 1f
+	 sub	%g0, %o0, %o0
+	mov	%o1, %o0
+1:	retl
+	 nop
+ENDPROC(pci_sun4v_iommu_map)
+
+	/* %o0: devhandle
+	 * %o1:	tsbid
+	 * %o2:	num ttes
+	 *
+	 * returns %o0:	num ttes demapped
+	 */
+ENTRY(pci_sun4v_iommu_demap)
+	mov	HV_FAST_PCI_IOMMU_DEMAP, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o1, %o0
+ENDPROC(pci_sun4v_iommu_demap)
+
+	/* %o0: devhandle
+	 * %o1:	tsbid
+	 * %o2:	&io_attributes
+	 * %o3:	&real_address
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(pci_sun4v_iommu_getmap)
+	mov	%o2, %o4
+	mov	HV_FAST_PCI_IOMMU_GETMAP, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	stx	%o2, [%o3]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_iommu_getmap)
+
+	/* %o0: devhandle
+	 * %o1:	pci_device
+	 * %o2:	pci_config_offset
+	 * %o3:	size
+	 *
+	 * returns %o0:	data
+	 *
+	 * If there is an error, the data will be returned
+	 * as all 1's.
+	 */
+ENTRY(pci_sun4v_config_get)
+	mov	HV_FAST_PCI_CONFIG_GET, %o5
+	ta	HV_FAST_TRAP
+	brnz,a,pn %o1, 1f
+	 mov	-1, %o2
+1:	retl
+	 mov	%o2, %o0
+ENDPROC(pci_sun4v_config_get)
+
+	/* %o0: devhandle
+	 * %o1:	pci_device
+	 * %o2:	pci_config_offset
+	 * %o3:	size
+	 * %o4:	data
+	 *
+	 * returns %o0:	status
+	 *
+	 * status will be zero if the operation completed
+	 * successfully, else -1 if not
+	 */
+ENTRY(pci_sun4v_config_put)
+	mov	HV_FAST_PCI_CONFIG_PUT, %o5
+	ta	HV_FAST_TRAP
+	brnz,a,pn %o1, 1f
+	 mov	-1, %o1
+1:	retl
+	 mov	%o1, %o0
+ENDPROC(pci_sun4v_config_put)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2: msiq phys address
+	 * %o3: num entries
+	 *
+	 * returns %o0: status
+	 *
+	 * status will be zero if the operation completed
+	 * successfully, else -1 if not
+	 */
+ENTRY(pci_sun4v_msiq_conf)
+	mov	HV_FAST_PCI_MSIQ_CONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_conf)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	&msiq_phys_addr
+	 * %o3:	&msiq_num_entries
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_info)
+	mov	%o2, %o4
+	mov	HV_FAST_PCI_MSIQ_INFO, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	stx	%o2, [%o3]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_info)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	&valid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_getvalid)
+	mov	HV_FAST_PCI_MSIQ_GETVALID, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_getvalid)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	valid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_setvalid)
+	mov	HV_FAST_PCI_MSIQ_SETVALID, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_setvalid)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	&state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_getstate)
+	mov	HV_FAST_PCI_MSIQ_GETSTATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_getstate)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_setstate)
+	mov	HV_FAST_PCI_MSIQ_SETSTATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_setstate)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	&head
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_gethead)
+	mov	HV_FAST_PCI_MSIQ_GETHEAD, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_gethead)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	head
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_sethead)
+	mov	HV_FAST_PCI_MSIQ_SETHEAD, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_sethead)
+
+	/* %o0: devhandle
+	 * %o1: msiqid
+	 * %o2:	&tail
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msiq_gettail)
+	mov	HV_FAST_PCI_MSIQ_GETTAIL, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msiq_gettail)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	&valid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msi_getvalid)
+	mov	HV_FAST_PCI_MSI_GETVALID, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msi_getvalid)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	valid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msi_setvalid)
+	mov	HV_FAST_PCI_MSI_SETVALID, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msi_setvalid)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	&msiq
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msi_getmsiq)
+	mov	HV_FAST_PCI_MSI_GETMSIQ, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msi_getmsiq)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	msitype
+	 * %o3:	msiq
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msi_setmsiq)
+	mov	HV_FAST_PCI_MSI_SETMSIQ, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msi_setmsiq)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	&state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msi_getstate)
+	mov	HV_FAST_PCI_MSI_GETSTATE, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msi_getstate)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	state
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msi_setstate)
+	mov	HV_FAST_PCI_MSI_SETSTATE, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msi_setstate)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	&msiq
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msg_getmsiq)
+	mov	HV_FAST_PCI_MSG_GETMSIQ, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msg_getmsiq)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	msiq
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msg_setmsiq)
+	mov	HV_FAST_PCI_MSG_SETMSIQ, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msg_setmsiq)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	&valid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msg_getvalid)
+	mov	HV_FAST_PCI_MSG_GETVALID, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o2]
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msg_getvalid)
+
+	/* %o0: devhandle
+	 * %o1: msinum
+	 * %o2:	valid
+	 *
+	 * returns %o0: status
+	 */
+ENTRY(pci_sun4v_msg_setvalid)
+	mov	HV_FAST_PCI_MSG_SETVALID, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 mov	%o0, %o0
+ENDPROC(pci_sun4v_msg_setvalid)
+
+	/*
+	 * %o0:	devhandle
+	 * %o1:	r_addr
+	 * %o2:	size
+	 * %o3:	pagesize
+	 * %o4:	virt
+	 * %o5: &iotsb_num/&iotsb_handle
+	 *
+	 * returns %o0:	status
+	 *         %o1:	iotsb_num/iotsb_handle
+	 */
+ENTRY(pci_sun4v_iotsb_conf)
+	mov	%o5, %g1
+	mov	HV_FAST_PCI_IOTSB_CONF, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 stx	%o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_conf)
+
+	/*
+	 * %o0:	devhandle
+	 * %o1:	iotsb_num/iotsb_handle
+	 * %o2:	pci_device
+	 *
+	 * returns %o0:	status
+	 */
+ENTRY(pci_sun4v_iotsb_bind)
+	mov	HV_FAST_PCI_IOTSB_BIND, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(pci_sun4v_iotsb_bind)
+
+	/*
+	 * %o0:	devhandle
+	 * %o1:	iotsb_num/iotsb_handle
+	 * %o2:	index_count
+	 * %o3:	iotte_attributes
+	 * %o4:	io_page_list_p
+	 * %o5: &mapped
+	 *
+	 * returns %o0:	status
+	 *         %o1:	#mapped
+	 */
+ENTRY(pci_sun4v_iotsb_map)
+	mov	%o5, %g1
+	mov	HV_FAST_PCI_IOTSB_MAP, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 stx	%o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_map)
+
+	/*
+	 * %o0:	devhandle
+	 * %o1:	iotsb_num/iotsb_handle
+	 * %o2:	iotsb_index
+	 * %o3:	#iottes
+	 * %o4: &demapped
+	 *
+	 * returns %o0:	status
+	 *         %o1:	#demapped
+	 */
+ENTRY(pci_sun4v_iotsb_demap)
+	mov	HV_FAST_PCI_IOTSB_DEMAP, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 stx	%o1, [%o4]
+ENDPROC(pci_sun4v_iotsb_demap)
diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c
new file mode 100644
index 0000000..ee4c9a9
--- /dev/null
+++ b/arch/sparc/kernel/pcic.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pcic.c: MicroSPARC-IIep PCI controller support
+ *
+ * Copyright (C) 1998 V. Roganov and G. Raiko
+ *
+ * Code is derived from Ultra/PCI PSYCHO controller support, see that
+ * for author info.
+ *
+ * Support for diverse IIep based platforms by Pete Zaitcev.
+ * CP-1200 by Eric Brower.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+
+#include <asm/swift.h> /* for cache flushing. */
+#include <asm/io.h>
+
+#include <linux/ctype.h>
+#include <linux/pci.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/interrupt.h>
+#include <linux/export.h>
+
+#include <asm/irq.h>
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/pcic.h>
+#include <asm/timex.h>
+#include <asm/timer.h>
+#include <linux/uaccess.h>
+#include <asm/irq_regs.h>
+
+#include "kernel.h"
+#include "irq.h"
+
+/*
+ * I studied different documents and many live PROMs both from 2.30
+ * family and 3.xx versions. I came to the amazing conclusion: there is
+ * absolutely no way to route interrupts in IIep systems relying on
+ * information which PROM presents. We must hardcode interrupt routing
+ * schematics. And this actually sucks.   -- zaitcev 1999/05/12
+ *
+ * To find irq for a device we determine which routing map
+ * is in effect or, in other words, on which machine we are running.
+ * We use PROM name for this although other techniques may be used
+ * in special cases (Gleb reports a PROMless IIep based system).
+ * Once we know the map we take device configuration address and
+ * find PCIC pin number where INT line goes. Then we may either program
+ * preferred irq into the PCIC or supply the preexisting irq to the device.
+ */
+struct pcic_ca2irq {
+	unsigned char busno;		/* PCI bus number */
+	unsigned char devfn;		/* Configuration address */
+	unsigned char pin;		/* PCIC external interrupt pin */
+	unsigned char irq;		/* Preferred IRQ (mappable in PCIC) */
+	unsigned int force;		/* Enforce preferred IRQ */
+};
+
+struct pcic_sn2list {
+	char *sysname;
+	struct pcic_ca2irq *intmap;
+	int mapdim;
+};
+
+/*
+ * JavaEngine-1 apparently has different versions.
+ *
+ * According to communications with Sun folks, for P2 build 501-4628-03:
+ * pin 0 - parallel, audio;
+ * pin 1 - Ethernet;
+ * pin 2 - su;
+ * pin 3 - PS/2 kbd and mouse.
+ *
+ * OEM manual (805-1486):
+ * pin 0: Ethernet
+ * pin 1: All EBus
+ * pin 2: IGA (unused)
+ * pin 3: Not connected
+ * OEM manual says that 501-4628 & 501-4811 are the same thing,
+ * only the latter has NAND flash in place.
+ *
+ * So far unofficial Sun wins over the OEM manual. Poor OEMs...
+ */
+static struct pcic_ca2irq pcic_i_je1a[] = {	/* 501-4811-03 */
+	{ 0, 0x00, 2, 12, 0 },		/* EBus: hogs all */
+	{ 0, 0x01, 1,  6, 1 },		/* Happy Meal */
+	{ 0, 0x80, 0,  7, 0 },		/* IGA (unused) */
+};
+
+/* XXX JS-E entry is incomplete - PCI Slot 2 address (pin 7)? */
+static struct pcic_ca2irq pcic_i_jse[] = {
+	{ 0, 0x00, 0, 13, 0 },		/* Ebus - serial and keyboard */
+	{ 0, 0x01, 1,  6, 0 },		/* hme */
+	{ 0, 0x08, 2,  9, 0 },		/* VGA - we hope not used :) */
+	{ 0, 0x10, 6,  8, 0 },		/* PCI INTA# in Slot 1 */
+	{ 0, 0x18, 7, 12, 0 },		/* PCI INTA# in Slot 2, shared w. RTC */
+	{ 0, 0x38, 4,  9, 0 },		/* All ISA devices. Read 8259. */
+	{ 0, 0x80, 5, 11, 0 },		/* EIDE */
+	/* {0,0x88, 0,0,0} - unknown device... PMU? Probably no interrupt. */
+	{ 0, 0xA0, 4,  9, 0 },		/* USB */
+	/*
+	 * Some pins belong to non-PCI devices, we hardcode them in drivers.
+	 * sun4m timers - irq 10, 14
+	 * PC style RTC - pin 7, irq 4 ?
+	 * Smart card, Parallel - pin 4 shared with USB, ISA
+	 * audio - pin 3, irq 5 ?
+	 */
+};
+
+/* SPARCengine-6 was the original release name of CP1200.
+ * The documentation differs between the two versions
+ */
+static struct pcic_ca2irq pcic_i_se6[] = {
+	{ 0, 0x08, 0,  2, 0 },		/* SCSI	*/
+	{ 0, 0x01, 1,  6, 0 },		/* HME	*/
+	{ 0, 0x00, 3, 13, 0 },		/* EBus	*/
+};
+
+/*
+ * Krups (courtesy of Varol Kaptan)
+ * No documentation available, but it was easy to guess
+ * because it was very similar to Espresso.
+ *  
+ * pin 0 - kbd, mouse, serial;
+ * pin 1 - Ethernet;
+ * pin 2 - igs (we do not use it);
+ * pin 3 - audio;
+ * pin 4,5,6 - unused;
+ * pin 7 - RTC (from P2 onwards as David B. says).
+ */
+static struct pcic_ca2irq pcic_i_jk[] = {
+	{ 0, 0x00, 0, 13, 0 },		/* Ebus - serial and keyboard */
+	{ 0, 0x01, 1,  6, 0 },		/* hme */
+};
+
+/*
+ * Several entries in this list may point to the same routing map
+ * as several PROMs may be installed on the same physical board.
+ */
+#define SN2L_INIT(name, map)	\
+  { name, map, ARRAY_SIZE(map) }
+
+static struct pcic_sn2list pcic_known_sysnames[] = {
+	SN2L_INIT("SUNW,JavaEngine1", pcic_i_je1a),	/* JE1, PROM 2.32 */
+	SN2L_INIT("SUNW,JS-E", pcic_i_jse),	/* PROLL JavaStation-E */
+	SN2L_INIT("SUNW,SPARCengine-6", pcic_i_se6), /* SPARCengine-6/CP-1200 */
+	SN2L_INIT("SUNW,JS-NC", pcic_i_jk),	/* PROLL JavaStation-NC */
+	SN2L_INIT("SUNW,JSIIep", pcic_i_jk),	/* OBP JavaStation-NC */
+	{ NULL, NULL, 0 }
+};
+
+/*
+ * Only one PCIC per IIep,
+ * and since we have no SMP IIep, only one per system.
+ */
+static int pcic0_up;
+static struct linux_pcic pcic0;
+
+void __iomem *pcic_regs;
+static volatile int pcic_speculative;
+static volatile int pcic_trapped;
+
+/* forward */
+unsigned int pcic_build_device_irq(struct platform_device *op,
+                                   unsigned int real_irq);
+
+#define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3))
+
+static int pcic_read_config_dword(unsigned int busno, unsigned int devfn,
+    int where, u32 *value)
+{
+	struct linux_pcic *pcic;
+	unsigned long flags;
+
+	pcic = &pcic0;
+
+	local_irq_save(flags);
+#if 0 /* does not fail here */
+	pcic_speculative = 1;
+	pcic_trapped = 0;
+#endif
+	writel(CONFIG_CMD(busno, devfn, where), pcic->pcic_config_space_addr);
+#if 0 /* does not fail here */
+	nop();
+	if (pcic_trapped) {
+		local_irq_restore(flags);
+		*value = ~0;
+		return 0;
+	}
+#endif
+	pcic_speculative = 2;
+	pcic_trapped = 0;
+	*value = readl(pcic->pcic_config_space_data + (where&4));
+	nop();
+	if (pcic_trapped) {
+		pcic_speculative = 0;
+		local_irq_restore(flags);
+		*value = ~0;
+		return 0;
+	}
+	pcic_speculative = 0;
+	local_irq_restore(flags);
+	return 0;
+}
+
+static int pcic_read_config(struct pci_bus *bus, unsigned int devfn,
+   int where, int size, u32 *val)
+{
+	unsigned int v;
+
+	if (bus->number != 0) return -EINVAL;
+	switch (size) {
+	case 1:
+		pcic_read_config_dword(bus->number, devfn, where&~3, &v);
+		*val = 0xff & (v >> (8*(where & 3)));
+		return 0;
+	case 2:
+		if (where&1) return -EINVAL;
+		pcic_read_config_dword(bus->number, devfn, where&~3, &v);
+		*val = 0xffff & (v >> (8*(where & 3)));
+		return 0;
+	case 4:
+		if (where&3) return -EINVAL;
+		pcic_read_config_dword(bus->number, devfn, where&~3, val);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int pcic_write_config_dword(unsigned int busno, unsigned int devfn,
+    int where, u32 value)
+{
+	struct linux_pcic *pcic;
+	unsigned long flags;
+
+	pcic = &pcic0;
+
+	local_irq_save(flags);
+	writel(CONFIG_CMD(busno, devfn, where), pcic->pcic_config_space_addr);
+	writel(value, pcic->pcic_config_space_data + (where&4));
+	local_irq_restore(flags);
+	return 0;
+}
+
+static int pcic_write_config(struct pci_bus *bus, unsigned int devfn,
+   int where, int size, u32 val)
+{
+	unsigned int v;
+
+	if (bus->number != 0) return -EINVAL;
+	switch (size) {
+	case 1:
+		pcic_read_config_dword(bus->number, devfn, where&~3, &v);
+		v = (v & ~(0xff << (8*(where&3)))) |
+		    ((0xff&val) << (8*(where&3)));
+		return pcic_write_config_dword(bus->number, devfn, where&~3, v);
+	case 2:
+		if (where&1) return -EINVAL;
+		pcic_read_config_dword(bus->number, devfn, where&~3, &v);
+		v = (v & ~(0xffff << (8*(where&3)))) |
+		    ((0xffff&val) << (8*(where&3)));
+		return pcic_write_config_dword(bus->number, devfn, where&~3, v);
+	case 4:
+		if (where&3) return -EINVAL;
+		return pcic_write_config_dword(bus->number, devfn, where, val);
+	}
+	return -EINVAL;
+}
+
+static struct pci_ops pcic_ops = {
+	.read =		pcic_read_config,
+	.write =	pcic_write_config,
+};
+
+/*
+ * On sparc64 pcibios_init() calls pci_controller_probe().
+ * We want PCIC probed little ahead so that interrupt controller
+ * would be operational.
+ */
+int __init pcic_probe(void)
+{
+	struct linux_pcic *pcic;
+	struct linux_prom_registers regs[PROMREG_MAX];
+	struct linux_pbm_info* pbm;
+	char namebuf[64];
+	phandle node;
+	int err;
+
+	if (pcic0_up) {
+		prom_printf("PCIC: called twice!\n");
+		prom_halt();
+	}
+	pcic = &pcic0;
+
+	node = prom_getchild (prom_root_node);
+	node = prom_searchsiblings (node, "pci");
+	if (node == 0)
+		return -ENODEV;
+	/*
+	 * Map in PCIC register set, config space, and IO base
+	 */
+	err = prom_getproperty(node, "reg", (char*)regs, sizeof(regs));
+	if (err == 0 || err == -1) {
+		prom_printf("PCIC: Error, cannot get PCIC registers "
+			    "from PROM.\n");
+		prom_halt();
+	}
+
+	pcic0_up = 1;
+
+	pcic->pcic_res_regs.name = "pcic_registers";
+	pcic->pcic_regs = ioremap(regs[0].phys_addr, regs[0].reg_size);
+	if (!pcic->pcic_regs) {
+		prom_printf("PCIC: Error, cannot map PCIC registers.\n");
+		prom_halt();
+	}
+
+	pcic->pcic_res_io.name = "pcic_io";
+	if ((pcic->pcic_io = (unsigned long)
+	    ioremap(regs[1].phys_addr, 0x10000)) == 0) {
+		prom_printf("PCIC: Error, cannot map PCIC IO Base.\n");
+		prom_halt();
+	}
+
+	pcic->pcic_res_cfg_addr.name = "pcic_cfg_addr";
+	if ((pcic->pcic_config_space_addr =
+	    ioremap(regs[2].phys_addr, regs[2].reg_size * 2)) == NULL) {
+		prom_printf("PCIC: Error, cannot map "
+			    "PCI Configuration Space Address.\n");
+		prom_halt();
+	}
+
+	/*
+	 * Docs say three least significant bits in address and data
+	 * must be the same. Thus, we need adjust size of data.
+	 */
+	pcic->pcic_res_cfg_data.name = "pcic_cfg_data";
+	if ((pcic->pcic_config_space_data =
+	    ioremap(regs[3].phys_addr, regs[3].reg_size * 2)) == NULL) {
+		prom_printf("PCIC: Error, cannot map "
+			    "PCI Configuration Space Data.\n");
+		prom_halt();
+	}
+
+	pbm = &pcic->pbm;
+	pbm->prom_node = node;
+	prom_getstring(node, "name", namebuf, 63);  namebuf[63] = 0;
+	strcpy(pbm->prom_name, namebuf);
+
+	{
+		extern int pcic_nmi_trap_patch[4];
+
+		t_nmi[0] = pcic_nmi_trap_patch[0];
+		t_nmi[1] = pcic_nmi_trap_patch[1];
+		t_nmi[2] = pcic_nmi_trap_patch[2];
+		t_nmi[3] = pcic_nmi_trap_patch[3];
+		swift_flush_dcache();
+		pcic_regs = pcic->pcic_regs;
+	}
+
+	prom_getstring(prom_root_node, "name", namebuf, 63);  namebuf[63] = 0;
+	{
+		struct pcic_sn2list *p;
+
+		for (p = pcic_known_sysnames; p->sysname != NULL; p++) {
+			if (strcmp(namebuf, p->sysname) == 0)
+				break;
+		}
+		pcic->pcic_imap = p->intmap;
+		pcic->pcic_imdim = p->mapdim;
+	}
+	if (pcic->pcic_imap == NULL) {
+		/*
+		 * We do not panic here for the sake of embedded systems.
+		 */
+		printk("PCIC: System %s is unknown, cannot route interrupts\n",
+		    namebuf);
+	}
+
+	return 0;
+}
+
+static void __init pcic_pbm_scan_bus(struct linux_pcic *pcic)
+{
+	struct linux_pbm_info *pbm = &pcic->pbm;
+
+	pbm->pci_bus = pci_scan_bus(pbm->pci_first_busno, &pcic_ops, pbm);
+	if (!pbm->pci_bus)
+		return;
+
+#if 0 /* deadwood transplanted from sparc64 */
+	pci_fill_in_pbm_cookies(pbm->pci_bus, pbm, pbm->prom_node);
+	pci_record_assignments(pbm, pbm->pci_bus);
+	pci_assign_unassigned(pbm, pbm->pci_bus);
+	pci_fixup_irq(pbm, pbm->pci_bus);
+#endif
+	pci_bus_add_devices(pbm->pci_bus);
+}
+
+/*
+ * Main entry point from the PCI subsystem.
+ */
+static int __init pcic_init(void)
+{
+	struct linux_pcic *pcic;
+
+	/*
+	 * PCIC should be initialized at start of the timer.
+	 * So, here we report the presence of PCIC and do some magic passes.
+	 */
+	if(!pcic0_up)
+		return 0;
+	pcic = &pcic0;
+
+	/*
+	 *      Switch off IOTLB translation.
+	 */
+	writeb(PCI_DVMA_CONTROL_IOTLB_DISABLE, 
+	       pcic->pcic_regs+PCI_DVMA_CONTROL);
+
+	/*
+	 *      Increase mapped size for PCI memory space (DMA access).
+	 *      Should be done in that order (size first, address second).
+	 *      Why we couldn't set up 4GB and forget about it? XXX
+	 */
+	writel(0xF0000000UL, pcic->pcic_regs+PCI_SIZE_0);
+	writel(0+PCI_BASE_ADDRESS_SPACE_MEMORY, 
+	       pcic->pcic_regs+PCI_BASE_ADDRESS_0);
+
+	pcic_pbm_scan_bus(pcic);
+
+	return 0;
+}
+
+int pcic_present(void)
+{
+	return pcic0_up;
+}
+
+static int pdev_to_pnode(struct linux_pbm_info *pbm, struct pci_dev *pdev)
+{
+	struct linux_prom_pci_registers regs[PROMREG_MAX];
+	int err;
+	phandle node = prom_getchild(pbm->prom_node);
+
+	while(node) {
+		err = prom_getproperty(node, "reg", 
+				       (char *)&regs[0], sizeof(regs));
+		if(err != 0 && err != -1) {
+			unsigned long devfn = (regs[0].which_io >> 8) & 0xff;
+			if(devfn == pdev->devfn)
+				return node;
+		}
+		node = prom_getsibling(node);
+	}
+	return 0;
+}
+
+static inline struct pcidev_cookie *pci_devcookie_alloc(void)
+{
+	return kmalloc(sizeof(struct pcidev_cookie), GFP_ATOMIC);
+}
+
+static void pcic_map_pci_device(struct linux_pcic *pcic,
+    struct pci_dev *dev, int node)
+{
+	char namebuf[64];
+	unsigned long address;
+	unsigned long flags;
+	int j;
+
+	if (node == 0 || node == -1) {
+		strcpy(namebuf, "???");
+	} else {
+		prom_getstring(node, "name", namebuf, 63); namebuf[63] = 0;
+	}
+
+	for (j = 0; j < 6; j++) {
+		address = dev->resource[j].start;
+		if (address == 0) break;	/* are sequential */
+		flags = dev->resource[j].flags;
+		if ((flags & IORESOURCE_IO) != 0) {
+			if (address < 0x10000) {
+				/*
+				 * A device responds to I/O cycles on PCI.
+				 * We generate these cycles with memory
+				 * access into the fixed map (phys 0x30000000).
+				 *
+				 * Since a device driver does not want to
+				 * do ioremap() before accessing PC-style I/O,
+				 * we supply virtual, ready to access address.
+				 *
+				 * Note that request_region()
+				 * works for these devices.
+				 *
+				 * XXX Neat trick, but it's a *bad* idea
+				 * to shit into regions like that.
+				 * What if we want to allocate one more
+				 * PCI base address...
+				 */
+				dev->resource[j].start =
+				    pcic->pcic_io + address;
+				dev->resource[j].end = 1;  /* XXX */
+				dev->resource[j].flags =
+				    (flags & ~IORESOURCE_IO) | IORESOURCE_MEM;
+			} else {
+				/*
+				 * OOPS... PCI Spec allows this. Sun does
+				 * not have any devices getting above 64K
+				 * so it must be user with a weird I/O
+				 * board in a PCI slot. We must remap it
+				 * under 64K but it is not done yet. XXX
+				 */
+				pci_info(dev, "PCIC: Skipping I/O space at "
+					 "0x%lx, this will Oops if a driver "
+					 "attaches device '%s'\n", address,
+					 namebuf);
+			}
+		}
+	}
+}
+
+static void
+pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
+{
+	struct pcic_ca2irq *p;
+	unsigned int real_irq;
+	int i, ivec;
+	char namebuf[64];
+
+	if (node == 0 || node == -1) {
+		strcpy(namebuf, "???");
+	} else {
+		prom_getstring(node, "name", namebuf, sizeof(namebuf));
+	}
+
+	if ((p = pcic->pcic_imap) == NULL) {
+		dev->irq = 0;
+		return;
+	}
+	for (i = 0; i < pcic->pcic_imdim; i++) {
+		if (p->busno == dev->bus->number && p->devfn == dev->devfn)
+			break;
+		p++;
+	}
+	if (i >= pcic->pcic_imdim) {
+		pci_info(dev, "PCIC: device %s not found in %d\n", namebuf,
+			 pcic->pcic_imdim);
+		dev->irq = 0;
+		return;
+	}
+
+	i = p->pin;
+	if (i >= 0 && i < 4) {
+		ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO);
+		real_irq = ivec >> (i << 2) & 0xF;
+	} else if (i >= 4 && i < 8) {
+		ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI);
+		real_irq = ivec >> ((i-4) << 2) & 0xF;
+	} else {					/* Corrupted map */
+		pci_info(dev, "PCIC: BAD PIN %d\n", i); for (;;) {}
+	}
+/* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */
+
+	/* real_irq means PROM did not bother to program the upper
+	 * half of PCIC. This happens on JS-E with PROM 3.11, for instance.
+	 */
+	if (real_irq == 0 || p->force) {
+		if (p->irq == 0 || p->irq >= 15) {	/* Corrupted map */
+			pci_info(dev, "PCIC: BAD IRQ %d\n", p->irq); for (;;) {}
+		}
+		pci_info(dev, "PCIC: setting irq %d at pin %d\n", p->irq,
+			 p->pin);
+		real_irq = p->irq;
+
+		i = p->pin;
+		if (i >= 4) {
+			ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI);
+			ivec &= ~(0xF << ((i - 4) << 2));
+			ivec |= p->irq << ((i - 4) << 2);
+			writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_HI);
+		} else {
+			ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO);
+			ivec &= ~(0xF << (i << 2));
+			ivec |= p->irq << (i << 2);
+			writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO);
+		}
+	}
+	dev->irq = pcic_build_device_irq(NULL, real_irq);
+}
+
+/*
+ * Normally called from {do_}pci_scan_bus...
+ */
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	struct linux_pcic *pcic;
+	/* struct linux_pbm_info* pbm = &pcic->pbm; */
+	int node;
+	struct pcidev_cookie *pcp;
+
+	if (!pcic0_up) {
+		pci_info(bus, "pcibios_fixup_bus: no PCIC\n");
+		return;
+	}
+	pcic = &pcic0;
+
+	/*
+	 * Next crud is an equivalent of pbm = pcic_bus_to_pbm(bus);
+	 */
+	if (bus->number != 0) {
+		pci_info(bus, "pcibios_fixup_bus: nonzero bus 0x%x\n",
+			 bus->number);
+		return;
+	}
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		node = pdev_to_pnode(&pcic->pbm, dev);
+		if(node == 0)
+			node = -1;
+
+		/* cookies */
+		pcp = pci_devcookie_alloc();
+		pcp->pbm = &pcic->pbm;
+		pcp->prom_node = of_find_node_by_phandle(node);
+		dev->sysdata = pcp;
+
+		/* fixing I/O to look like memory */
+		if ((dev->class>>16) != PCI_BASE_CLASS_BRIDGE)
+			pcic_map_pci_device(pcic, dev, node);
+
+		pcic_fill_irq(pcic, dev, node);
+	}
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	u16 cmd, oldcmd;
+	int i;
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	oldcmd = cmd;
+
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		struct resource *res = &dev->resource[i];
+
+		/* Only set up the requested stuff */
+		if (!(mask & (1<<i)))
+			continue;
+
+		if (res->flags & IORESOURCE_IO)
+			cmd |= PCI_COMMAND_IO;
+		if (res->flags & IORESOURCE_MEM)
+			cmd |= PCI_COMMAND_MEMORY;
+	}
+
+	if (cmd != oldcmd) {
+		pci_info(dev, "enabling device (%04x -> %04x)\n", oldcmd, cmd);
+		pci_write_config_word(dev, PCI_COMMAND, cmd);
+	}
+	return 0;
+}
+
+/* Makes compiler happy */
+static volatile int pcic_timer_dummy;
+
+static void pcic_clear_clock_irq(void)
+{
+	pcic_timer_dummy = readl(pcic0.pcic_regs+PCI_SYS_LIMIT);
+}
+
+/* CPU frequency is 100 MHz, timer increments every 4 CPU clocks */
+#define USECS_PER_JIFFY  (1000000 / HZ)
+#define TICK_TIMER_LIMIT ((100 * 1000000 / 4) / HZ)
+
+static unsigned int pcic_cycles_offset(void)
+{
+	u32 value, count;
+
+	value = readl(pcic0.pcic_regs + PCI_SYS_COUNTER);
+	count = value & ~PCI_SYS_COUNTER_OVERFLOW;
+
+	if (value & PCI_SYS_COUNTER_OVERFLOW)
+		count += TICK_TIMER_LIMIT;
+	/*
+	 * We divide all by HZ
+	 * to have microsecond resolution and to avoid overflow
+	 */
+	count = ((count / HZ) * USECS_PER_JIFFY) / (TICK_TIMER_LIMIT / HZ);
+
+	/* Coordinate with the sparc_config.clock_rate setting */
+	return count * 2;
+}
+
+void __init pci_time_init(void)
+{
+	struct linux_pcic *pcic = &pcic0;
+	unsigned long v;
+	int timer_irq, irq;
+	int err;
+
+#ifndef CONFIG_SMP
+	/*
+	 * The clock_rate is in SBUS dimension.
+	 * We take into account this in pcic_cycles_offset()
+	 */
+	sparc_config.clock_rate = SBUS_CLOCK_RATE / HZ;
+	sparc_config.features |= FEAT_L10_CLOCKEVENT;
+#endif
+	sparc_config.features |= FEAT_L10_CLOCKSOURCE;
+	sparc_config.get_cycles_offset = pcic_cycles_offset;
+
+	writel (TICK_TIMER_LIMIT, pcic->pcic_regs+PCI_SYS_LIMIT);
+	/* PROM should set appropriate irq */
+	v = readb(pcic->pcic_regs+PCI_COUNTER_IRQ);
+	timer_irq = PCI_COUNTER_IRQ_SYS(v);
+	writel (PCI_COUNTER_IRQ_SET(timer_irq, 0),
+		pcic->pcic_regs+PCI_COUNTER_IRQ);
+	irq = pcic_build_device_irq(NULL, timer_irq);
+	err = request_irq(irq, timer_interrupt,
+			  IRQF_TIMER, "timer", NULL);
+	if (err) {
+		prom_printf("time_init: unable to attach IRQ%d\n", timer_irq);
+		prom_halt();
+	}
+	local_irq_enable();
+}
+
+
+#if 0
+static void watchdog_reset() {
+	writeb(0, pcic->pcic_regs+PCI_SYS_STATUS);
+}
+#endif
+
+/*
+ * NMI
+ */
+void pcic_nmi(unsigned int pend, struct pt_regs *regs)
+{
+	pend = swab32(pend);
+
+	if (!pcic_speculative || (pend & PCI_SYS_INT_PENDING_PIO) == 0) {
+		/*
+		 * XXX On CP-1200 PCI #SERR may happen, we do not know
+		 * what to do about it yet.
+		 */
+		printk("Aiee, NMI pend 0x%x pc 0x%x spec %d, hanging\n",
+		    pend, (int)regs->pc, pcic_speculative);
+		for (;;) { }
+	}
+	pcic_speculative = 0;
+	pcic_trapped = 1;
+	regs->pc = regs->npc;
+	regs->npc += 4;
+}
+
+static inline unsigned long get_irqmask(int irq_nr)
+{
+	return 1 << irq_nr;
+}
+
+static void pcic_mask_irq(struct irq_data *data)
+{
+	unsigned long mask, flags;
+
+	mask = (unsigned long)data->chip_data;
+	local_irq_save(flags);
+	writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
+	local_irq_restore(flags);
+}
+
+static void pcic_unmask_irq(struct irq_data *data)
+{
+	unsigned long mask, flags;
+
+	mask = (unsigned long)data->chip_data;
+	local_irq_save(flags);
+	writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
+	local_irq_restore(flags);
+}
+
+static unsigned int pcic_startup_irq(struct irq_data *data)
+{
+	irq_link(data->irq);
+	pcic_unmask_irq(data);
+	return 0;
+}
+
+static struct irq_chip pcic_irq = {
+	.name		= "pcic",
+	.irq_startup	= pcic_startup_irq,
+	.irq_mask	= pcic_mask_irq,
+	.irq_unmask	= pcic_unmask_irq,
+};
+
+unsigned int pcic_build_device_irq(struct platform_device *op,
+                                   unsigned int real_irq)
+{
+	unsigned int irq;
+	unsigned long mask;
+
+	irq = 0;
+	mask = get_irqmask(real_irq);
+	if (mask == 0)
+		goto out;
+
+	irq = irq_alloc(real_irq, real_irq);
+	if (irq == 0)
+		goto out;
+
+	irq_set_chip_and_handler_name(irq, &pcic_irq,
+	                              handle_level_irq, "PCIC");
+	irq_set_chip_data(irq, (void *)mask);
+
+out:
+	return irq;
+}
+
+
+static void pcic_load_profile_irq(int cpu, unsigned int limit)
+{
+	printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
+}
+
+void __init sun4m_pci_init_IRQ(void)
+{
+	sparc_config.build_device_irq = pcic_build_device_irq;
+	sparc_config.clear_clock_irq  = pcic_clear_clock_irq;
+	sparc_config.load_profile_irq = pcic_load_profile_irq;
+}
+
+subsys_initcall(pcic_init);
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
new file mode 100644
index 0000000..eb978c7
--- /dev/null
+++ b/arch/sparc/kernel/pcr.c
@@ -0,0 +1,372 @@
+/* pcr.c: Generic sparc64 performance counter infrastructure.
+ *
+ * Copyright (C) 2009 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+
+#include <linux/irq_work.h>
+#include <linux/ftrace.h>
+
+#include <asm/pil.h>
+#include <asm/pcr.h>
+#include <asm/nmi.h>
+#include <asm/asi.h>
+#include <asm/spitfire.h>
+
+/* This code is shared between various users of the performance
+ * counters.  Users will be oprofile, pseudo-NMI watchdog, and the
+ * perf_event support layer.
+ */
+
+/* Performance counter interrupts run unmasked at PIL level 15.
+ * Therefore we can't do things like wakeups and other work
+ * that expects IRQ disabling to be adhered to in locking etc.
+ *
+ * Therefore in such situations we defer the work by signalling
+ * a lower level cpu IRQ.
+ */
+void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	clear_softint(1 << PIL_DEFERRED_PCR_WORK);
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+#ifdef CONFIG_IRQ_WORK
+	irq_work_run();
+#endif
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+void arch_irq_work_raise(void)
+{
+	set_softint(1 << PIL_DEFERRED_PCR_WORK);
+}
+
+const struct pcr_ops *pcr_ops;
+EXPORT_SYMBOL_GPL(pcr_ops);
+
+static u64 direct_pcr_read(unsigned long reg_num)
+{
+	u64 val;
+
+	WARN_ON_ONCE(reg_num != 0);
+	__asm__ __volatile__("rd %%pcr, %0" : "=r" (val));
+	return val;
+}
+
+static void direct_pcr_write(unsigned long reg_num, u64 val)
+{
+	WARN_ON_ONCE(reg_num != 0);
+	__asm__ __volatile__("wr %0, 0x0, %%pcr" : : "r" (val));
+}
+
+static u64 direct_pic_read(unsigned long reg_num)
+{
+	u64 val;
+
+	WARN_ON_ONCE(reg_num != 0);
+	__asm__ __volatile__("rd %%pic, %0" : "=r" (val));
+	return val;
+}
+
+static void direct_pic_write(unsigned long reg_num, u64 val)
+{
+	WARN_ON_ONCE(reg_num != 0);
+
+	/* Blackbird errata workaround.  See commentary in
+	 * arch/sparc64/kernel/smp.c:smp_percpu_timer_interrupt()
+	 * for more information.
+	 */
+	__asm__ __volatile__("ba,pt	%%xcc, 99f\n\t"
+			     " nop\n\t"
+			     ".align	64\n"
+			  "99:wr	%0, 0x0, %%pic\n\t"
+			     "rd	%%pic, %%g0" : : "r" (val));
+}
+
+static u64 direct_picl_value(unsigned int nmi_hz)
+{
+	u32 delta = local_cpu_data().clock_tick / nmi_hz;
+
+	return ((u64)((0 - delta) & 0xffffffff)) << 32;
+}
+
+static const struct pcr_ops direct_pcr_ops = {
+	.read_pcr		= direct_pcr_read,
+	.write_pcr		= direct_pcr_write,
+	.read_pic		= direct_pic_read,
+	.write_pic		= direct_pic_write,
+	.nmi_picl_value		= direct_picl_value,
+	.pcr_nmi_enable		= (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE),
+	.pcr_nmi_disable	= PCR_PIC_PRIV,
+};
+
+static void n2_pcr_write(unsigned long reg_num, u64 val)
+{
+	unsigned long ret;
+
+	WARN_ON_ONCE(reg_num != 0);
+	if (val & PCR_N2_HTRACE) {
+		ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val);
+		if (ret != HV_EOK)
+			direct_pcr_write(reg_num, val);
+	} else
+		direct_pcr_write(reg_num, val);
+}
+
+static u64 n2_picl_value(unsigned int nmi_hz)
+{
+	u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
+
+	return ((u64)((0 - delta) & 0xffffffff)) << 32;
+}
+
+static const struct pcr_ops n2_pcr_ops = {
+	.read_pcr		= direct_pcr_read,
+	.write_pcr		= n2_pcr_write,
+	.read_pic		= direct_pic_read,
+	.write_pic		= direct_pic_write,
+	.nmi_picl_value		= n2_picl_value,
+	.pcr_nmi_enable		= (PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE |
+				   PCR_N2_TOE_OV1 |
+				   (2 << PCR_N2_SL1_SHIFT) |
+				   (0xff << PCR_N2_MASK1_SHIFT)),
+	.pcr_nmi_disable	= PCR_PIC_PRIV,
+};
+
+static u64 n4_pcr_read(unsigned long reg_num)
+{
+	unsigned long val;
+
+	(void) sun4v_vt_get_perfreg(reg_num, &val);
+
+	return val;
+}
+
+static void n4_pcr_write(unsigned long reg_num, u64 val)
+{
+	(void) sun4v_vt_set_perfreg(reg_num, val);
+}
+
+static u64 n4_pic_read(unsigned long reg_num)
+{
+	unsigned long val;
+
+	__asm__ __volatile__("ldxa [%1] %2, %0"
+			     : "=r" (val)
+			     : "r" (reg_num * 0x8UL), "i" (ASI_PIC));
+
+	return val;
+}
+
+static void n4_pic_write(unsigned long reg_num, u64 val)
+{
+	__asm__ __volatile__("stxa %0, [%1] %2"
+			     : /* no outputs */
+			     : "r" (val), "r" (reg_num * 0x8UL), "i" (ASI_PIC));
+}
+
+static u64 n4_picl_value(unsigned int nmi_hz)
+{
+	u32 delta = local_cpu_data().clock_tick / (nmi_hz << 2);
+
+	return ((u64)((0 - delta) & 0xffffffff));
+}
+
+static const struct pcr_ops n4_pcr_ops = {
+	.read_pcr		= n4_pcr_read,
+	.write_pcr		= n4_pcr_write,
+	.read_pic		= n4_pic_read,
+	.write_pic		= n4_pic_write,
+	.nmi_picl_value		= n4_picl_value,
+	.pcr_nmi_enable		= (PCR_N4_PICNPT | PCR_N4_STRACE |
+				   PCR_N4_UTRACE | PCR_N4_TOE |
+				   (26 << PCR_N4_SL_SHIFT)),
+	.pcr_nmi_disable	= PCR_N4_PICNPT,
+};
+
+static u64 n5_pcr_read(unsigned long reg_num)
+{
+	unsigned long val;
+
+	(void) sun4v_t5_get_perfreg(reg_num, &val);
+
+	return val;
+}
+
+static void n5_pcr_write(unsigned long reg_num, u64 val)
+{
+	(void) sun4v_t5_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops n5_pcr_ops = {
+	.read_pcr		= n5_pcr_read,
+	.write_pcr		= n5_pcr_write,
+	.read_pic		= n4_pic_read,
+	.write_pic		= n4_pic_write,
+	.nmi_picl_value		= n4_picl_value,
+	.pcr_nmi_enable		= (PCR_N4_PICNPT | PCR_N4_STRACE |
+				   PCR_N4_UTRACE | PCR_N4_TOE |
+				   (26 << PCR_N4_SL_SHIFT)),
+	.pcr_nmi_disable	= PCR_N4_PICNPT,
+};
+
+static u64 m7_pcr_read(unsigned long reg_num)
+{
+	unsigned long val;
+
+	(void) sun4v_m7_get_perfreg(reg_num, &val);
+
+	return val;
+}
+
+static void m7_pcr_write(unsigned long reg_num, u64 val)
+{
+	(void) sun4v_m7_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops m7_pcr_ops = {
+	.read_pcr		= m7_pcr_read,
+	.write_pcr		= m7_pcr_write,
+	.read_pic		= n4_pic_read,
+	.write_pic		= n4_pic_write,
+	.nmi_picl_value		= n4_picl_value,
+	.pcr_nmi_enable		= (PCR_N4_PICNPT | PCR_N4_STRACE |
+				   PCR_N4_UTRACE | PCR_N4_TOE |
+				   (26 << PCR_N4_SL_SHIFT)),
+	.pcr_nmi_disable	= PCR_N4_PICNPT,
+};
+
+static unsigned long perf_hsvc_group;
+static unsigned long perf_hsvc_major;
+static unsigned long perf_hsvc_minor;
+
+static int __init register_perf_hsvc(void)
+{
+	unsigned long hverror;
+
+	if (tlb_type == hypervisor) {
+		switch (sun4v_chip_type) {
+		case SUN4V_CHIP_NIAGARA1:
+			perf_hsvc_group = HV_GRP_NIAG_PERF;
+			break;
+
+		case SUN4V_CHIP_NIAGARA2:
+			perf_hsvc_group = HV_GRP_N2_CPU;
+			break;
+
+		case SUN4V_CHIP_NIAGARA3:
+			perf_hsvc_group = HV_GRP_KT_CPU;
+			break;
+
+		case SUN4V_CHIP_NIAGARA4:
+			perf_hsvc_group = HV_GRP_VT_CPU;
+			break;
+
+		case SUN4V_CHIP_NIAGARA5:
+			perf_hsvc_group = HV_GRP_T5_CPU;
+			break;
+
+		case SUN4V_CHIP_SPARC_M7:
+			perf_hsvc_group = HV_GRP_M7_PERF;
+			break;
+
+		default:
+			return -ENODEV;
+		}
+
+
+		perf_hsvc_major = 1;
+		perf_hsvc_minor = 0;
+		hverror = sun4v_hvapi_register(perf_hsvc_group,
+					       perf_hsvc_major,
+					       &perf_hsvc_minor);
+		if (hverror) {
+			pr_err("perfmon: Could not register hvapi(0x%lx).\n",
+			       hverror);
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+
+static void __init unregister_perf_hsvc(void)
+{
+	if (tlb_type != hypervisor)
+		return;
+	sun4v_hvapi_unregister(perf_hsvc_group);
+}
+
+static int __init setup_sun4v_pcr_ops(void)
+{
+	int ret = 0;
+
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_NIAGARA1:
+	case SUN4V_CHIP_NIAGARA2:
+	case SUN4V_CHIP_NIAGARA3:
+		pcr_ops = &n2_pcr_ops;
+		break;
+
+	case SUN4V_CHIP_NIAGARA4:
+		pcr_ops = &n4_pcr_ops;
+		break;
+
+	case SUN4V_CHIP_NIAGARA5:
+		pcr_ops = &n5_pcr_ops;
+		break;
+
+	case SUN4V_CHIP_SPARC_M7:
+		pcr_ops = &m7_pcr_ops;
+		break;
+
+	default:
+		ret = -ENODEV;
+		break;
+	}
+
+	return ret;
+}
+
+int __init pcr_arch_init(void)
+{
+	int err = register_perf_hsvc();
+
+	if (err)
+		return err;
+
+	switch (tlb_type) {
+	case hypervisor:
+		err = setup_sun4v_pcr_ops();
+		if (err)
+			goto out_unregister;
+		break;
+
+	case cheetah:
+	case cheetah_plus:
+		pcr_ops = &direct_pcr_ops;
+		break;
+
+	case spitfire:
+		/* UltraSPARC-I/II and derivatives lack a profile
+		 * counter overflow interrupt so we can't make use of
+		 * their hardware currently.
+		 */
+		/* fallthrough */
+	default:
+		err = -ENODEV;
+		goto out_unregister;
+	}
+
+	return nmi_init();
+
+out_unregister:
+	unregister_perf_hsvc();
+	return err;
+}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
new file mode 100644
index 0000000..67b3e6b
--- /dev/null
+++ b/arch/sparc/kernel/perf_event.c
@@ -0,0 +1,1876 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Performance event support for sparc64.
+ *
+ * Copyright (C) 2009, 2010 David S. Miller <davem@davemloft.net>
+ *
+ * This code is based almost entirely upon the x86 perf event
+ * code, which is:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+
+#include <asm/stacktrace.h>
+#include <asm/cpudata.h>
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+#include <linux/sched/clock.h>
+#include <asm/nmi.h>
+#include <asm/pcr.h>
+#include <asm/cacheflush.h>
+
+#include "kernel.h"
+#include "kstack.h"
+
+/* Two classes of sparc64 chips currently exist.  All of which have
+ * 32-bit counters which can generate overflow interrupts on the
+ * transition from 0xffffffff to 0.
+ *
+ * All chips upto and including SPARC-T3 have two performance
+ * counters.  The two 32-bit counters are accessed in one go using a
+ * single 64-bit register.
+ *
+ * On these older chips both counters are controlled using a single
+ * control register.  The only way to stop all sampling is to clear
+ * all of the context (user, supervisor, hypervisor) sampling enable
+ * bits.  But these bits apply to both counters, thus the two counters
+ * can't be enabled/disabled individually.
+ *
+ * Furthermore, the control register on these older chips have two
+ * event fields, one for each of the two counters.  It's thus nearly
+ * impossible to have one counter going while keeping the other one
+ * stopped.  Therefore it is possible to get overflow interrupts for
+ * counters not currently "in use" and that condition must be checked
+ * in the overflow interrupt handler.
+ *
+ * So we use a hack, in that we program inactive counters with the
+ * "sw_count0" and "sw_count1" events.  These count how many times
+ * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
+ * unusual way to encode a NOP and therefore will not trigger in
+ * normal code.
+ *
+ * Starting with SPARC-T4 we have one control register per counter.
+ * And the counters are stored in individual registers.  The registers
+ * for the counters are 64-bit but only a 32-bit counter is
+ * implemented.  The event selections on SPARC-T4 lack any
+ * restrictions, therefore we can elide all of the complicated
+ * conflict resolution code we have for SPARC-T3 and earlier chips.
+ */
+
+#define MAX_HWEVENTS			4
+#define MAX_PCRS			4
+#define MAX_PERIOD			((1UL << 32) - 1)
+
+#define PIC_UPPER_INDEX			0
+#define PIC_LOWER_INDEX			1
+#define PIC_NO_INDEX			-1
+
+struct cpu_hw_events {
+	/* Number of events currently scheduled onto this cpu.
+	 * This tells how many entries in the arrays below
+	 * are valid.
+	 */
+	int			n_events;
+
+	/* Number of new events added since the last hw_perf_disable().
+	 * This works because the perf event layer always adds new
+	 * events inside of a perf_{disable,enable}() sequence.
+	 */
+	int			n_added;
+
+	/* Array of events current scheduled on this cpu.  */
+	struct perf_event	*event[MAX_HWEVENTS];
+
+	/* Array of encoded longs, specifying the %pcr register
+	 * encoding and the mask of PIC counters this even can
+	 * be scheduled on.  See perf_event_encode() et al.
+	 */
+	unsigned long		events[MAX_HWEVENTS];
+
+	/* The current counter index assigned to an event.  When the
+	 * event hasn't been programmed into the cpu yet, this will
+	 * hold PIC_NO_INDEX.  The event->hw.idx value tells us where
+	 * we ought to schedule the event.
+	 */
+	int			current_idx[MAX_HWEVENTS];
+
+	/* Software copy of %pcr register(s) on this cpu.  */
+	u64			pcr[MAX_HWEVENTS];
+
+	/* Enabled/disable state.  */
+	int			enabled;
+
+	unsigned int		txn_flags;
+};
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
+
+/* An event map describes the characteristics of a performance
+ * counter event.  In particular it gives the encoding as well as
+ * a mask telling which counters the event can be measured on.
+ *
+ * The mask is unused on SPARC-T4 and later.
+ */
+struct perf_event_map {
+	u16	encoding;
+	u8	pic_mask;
+#define PIC_NONE	0x00
+#define PIC_UPPER	0x01
+#define PIC_LOWER	0x02
+};
+
+/* Encode a perf_event_map entry into a long.  */
+static unsigned long perf_event_encode(const struct perf_event_map *pmap)
+{
+	return ((unsigned long) pmap->encoding << 16) | pmap->pic_mask;
+}
+
+static u8 perf_event_get_msk(unsigned long val)
+{
+	return val & 0xff;
+}
+
+static u64 perf_event_get_enc(unsigned long val)
+{
+	return val >> 16;
+}
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+#define CACHE_OP_UNSUPPORTED	0xfffe
+#define CACHE_OP_NONSENSE	0xffff
+
+typedef struct perf_event_map cache_map_t
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+struct sparc_pmu {
+	const struct perf_event_map	*(*event_map)(int);
+	const cache_map_t		*cache_map;
+	int				max_events;
+	u32				(*read_pmc)(int);
+	void				(*write_pmc)(int, u64);
+	int				upper_shift;
+	int				lower_shift;
+	int				event_mask;
+	int				user_bit;
+	int				priv_bit;
+	int				hv_bit;
+	int				irq_bit;
+	int				upper_nop;
+	int				lower_nop;
+	unsigned int			flags;
+#define SPARC_PMU_ALL_EXCLUDES_SAME	0x00000001
+#define SPARC_PMU_HAS_CONFLICTS		0x00000002
+	int				max_hw_events;
+	int				num_pcrs;
+	int				num_pic_regs;
+};
+
+static u32 sparc_default_read_pmc(int idx)
+{
+	u64 val;
+
+	val = pcr_ops->read_pic(0);
+	if (idx == PIC_UPPER_INDEX)
+		val >>= 32;
+
+	return val & 0xffffffff;
+}
+
+static void sparc_default_write_pmc(int idx, u64 val)
+{
+	u64 shift, mask, pic;
+
+	shift = 0;
+	if (idx == PIC_UPPER_INDEX)
+		shift = 32;
+
+	mask = ((u64) 0xffffffff) << shift;
+	val <<= shift;
+
+	pic = pcr_ops->read_pic(0);
+	pic &= ~mask;
+	pic |= val;
+	pcr_ops->write_pic(0, pic);
+}
+
+static const struct perf_event_map ultra3_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
+};
+
+static const struct perf_event_map *ultra3_event_map(int event_id)
+{
+	return &ultra3_perfmon_event_map[event_id];
+}
+
+static const cache_map_t ultra3_cache_map = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER },
+		[C(RESULT_MISS)] = { 0x0a, PIC_UPPER },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x0c, PIC_UPPER, },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER },
+		[C(RESULT_MISS)] = { 0x0c, PIC_UPPER },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x12, PIC_UPPER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x11, PIC_UPPER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(BPU)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(NODE)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+};
+
+static const struct sparc_pmu ultra3_pmu = {
+	.event_map	= ultra3_event_map,
+	.cache_map	= &ultra3_cache_map,
+	.max_events	= ARRAY_SIZE(ultra3_perfmon_event_map),
+	.read_pmc	= sparc_default_read_pmc,
+	.write_pmc	= sparc_default_write_pmc,
+	.upper_shift	= 11,
+	.lower_shift	= 4,
+	.event_mask	= 0x3f,
+	.user_bit	= PCR_UTRACE,
+	.priv_bit	= PCR_STRACE,
+	.upper_nop	= 0x1c,
+	.lower_nop	= 0x14,
+	.flags		= (SPARC_PMU_ALL_EXCLUDES_SAME |
+			   SPARC_PMU_HAS_CONFLICTS),
+	.max_hw_events	= 2,
+	.num_pcrs	= 1,
+	.num_pic_regs	= 1,
+};
+
+/* Niagara1 is very limited.  The upper PIC is hard-locked to count
+ * only instructions, so it is free running which creates all kinds of
+ * problems.  Some hardware designs make one wonder if the creator
+ * even looked at how this stuff gets used by software.
+ */
+static const struct perf_event_map niagara1_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER },
+};
+
+static const struct perf_event_map *niagara1_event_map(int event_id)
+{
+	return &niagara1_perfmon_event_map[event_id];
+}
+
+static const cache_map_t niagara1_cache_map = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { 0x00, PIC_UPPER },
+		[C(RESULT_MISS)] = { 0x02, PIC_LOWER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x05, PIC_LOWER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x04, PIC_LOWER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(BPU)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(NODE)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+};
+
+static const struct sparc_pmu niagara1_pmu = {
+	.event_map	= niagara1_event_map,
+	.cache_map	= &niagara1_cache_map,
+	.max_events	= ARRAY_SIZE(niagara1_perfmon_event_map),
+	.read_pmc	= sparc_default_read_pmc,
+	.write_pmc	= sparc_default_write_pmc,
+	.upper_shift	= 0,
+	.lower_shift	= 4,
+	.event_mask	= 0x7,
+	.user_bit	= PCR_UTRACE,
+	.priv_bit	= PCR_STRACE,
+	.upper_nop	= 0x0,
+	.lower_nop	= 0x0,
+	.flags		= (SPARC_PMU_ALL_EXCLUDES_SAME |
+			   SPARC_PMU_HAS_CONFLICTS),
+	.max_hw_events	= 2,
+	.num_pcrs	= 1,
+	.num_pic_regs	= 1,
+};
+
+static const struct perf_event_map niagara2_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
+};
+
+static const struct perf_event_map *niagara2_event_map(int event_id)
+{
+	return &niagara2_perfmon_event_map[event_id];
+}
+
+static const cache_map_t niagara2_cache_map = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
+		[C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(BPU)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(NODE)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+};
+
+static const struct sparc_pmu niagara2_pmu = {
+	.event_map	= niagara2_event_map,
+	.cache_map	= &niagara2_cache_map,
+	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
+	.read_pmc	= sparc_default_read_pmc,
+	.write_pmc	= sparc_default_write_pmc,
+	.upper_shift	= 19,
+	.lower_shift	= 6,
+	.event_mask	= 0xfff,
+	.user_bit	= PCR_UTRACE,
+	.priv_bit	= PCR_STRACE,
+	.hv_bit		= PCR_N2_HTRACE,
+	.irq_bit	= 0x30,
+	.upper_nop	= 0x220,
+	.lower_nop	= 0x220,
+	.flags		= (SPARC_PMU_ALL_EXCLUDES_SAME |
+			   SPARC_PMU_HAS_CONFLICTS),
+	.max_hw_events	= 2,
+	.num_pcrs	= 1,
+	.num_pic_regs	= 1,
+};
+
+static const struct perf_event_map niagara4_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { (26 << 6) },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { (3 << 6) | 0x3f },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { (3 << 6) | 0x04 },
+	[PERF_COUNT_HW_CACHE_MISSES] = { (16 << 6) | 0x07 },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { (4 << 6) | 0x01 },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { (25 << 6) | 0x0f },
+};
+
+static const struct perf_event_map *niagara4_event_map(int event_id)
+{
+	return &niagara4_perfmon_event_map[event_id];
+}
+
+static const cache_map_t niagara4_cache_map = {
+[C(L1D)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
+		[C(RESULT_MISS)] = { (16 << 6) | 0x07 },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
+		[C(RESULT_MISS)] = { (16 << 6) | 0x07 },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(L1I)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { (3 << 6) | 0x3f },
+		[C(RESULT_MISS)] = { (11 << 6) | 0x03 },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_NONSENSE },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(LL)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { (3 << 6) | 0x04 },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+	[C(OP_WRITE)] = {
+		[C(RESULT_ACCESS)] = { (3 << 6) | 0x08 },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+	[C(OP_PREFETCH)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(DTLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { (17 << 6) | 0x3f },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(ITLB)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { (6 << 6) | 0x3f },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(BPU)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+[C(NODE)] = {
+	[C(OP_READ)] = {
+		[C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
+		[C(RESULT_MISS)  ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
+		[ C(RESULT_MISS)   ] = { CACHE_OP_UNSUPPORTED },
+	},
+},
+};
+
+static u32 sparc_vt_read_pmc(int idx)
+{
+	u64 val = pcr_ops->read_pic(idx);
+
+	return val & 0xffffffff;
+}
+
+static void sparc_vt_write_pmc(int idx, u64 val)
+{
+	u64 pcr;
+
+	pcr = pcr_ops->read_pcr(idx);
+	/* ensure ov and ntc are reset */
+	pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
+
+	pcr_ops->write_pic(idx, val & 0xffffffff);
+
+	pcr_ops->write_pcr(idx, pcr);
+}
+
+static const struct sparc_pmu niagara4_pmu = {
+	.event_map	= niagara4_event_map,
+	.cache_map	= &niagara4_cache_map,
+	.max_events	= ARRAY_SIZE(niagara4_perfmon_event_map),
+	.read_pmc	= sparc_vt_read_pmc,
+	.write_pmc	= sparc_vt_write_pmc,
+	.upper_shift	= 5,
+	.lower_shift	= 5,
+	.event_mask	= 0x7ff,
+	.user_bit	= PCR_N4_UTRACE,
+	.priv_bit	= PCR_N4_STRACE,
+
+	/* We explicitly don't support hypervisor tracing.  The T4
+	 * generates the overflow event for precise events via a trap
+	 * which will not be generated (ie. it's completely lost) if
+	 * we happen to be in the hypervisor when the event triggers.
+	 * Essentially, the overflow event reporting is completely
+	 * unusable when you have hypervisor mode tracing enabled.
+	 */
+	.hv_bit		= 0,
+
+	.irq_bit	= PCR_N4_TOE,
+	.upper_nop	= 0,
+	.lower_nop	= 0,
+	.flags		= 0,
+	.max_hw_events	= 4,
+	.num_pcrs	= 4,
+	.num_pic_regs	= 4,
+};
+
+static const struct sparc_pmu sparc_m7_pmu = {
+	.event_map	= niagara4_event_map,
+	.cache_map	= &niagara4_cache_map,
+	.max_events	= ARRAY_SIZE(niagara4_perfmon_event_map),
+	.read_pmc	= sparc_vt_read_pmc,
+	.write_pmc	= sparc_vt_write_pmc,
+	.upper_shift	= 5,
+	.lower_shift	= 5,
+	.event_mask	= 0x7ff,
+	.user_bit	= PCR_N4_UTRACE,
+	.priv_bit	= PCR_N4_STRACE,
+
+	/* We explicitly don't support hypervisor tracing. */
+	.hv_bit		= 0,
+
+	.irq_bit	= PCR_N4_TOE,
+	.upper_nop	= 0,
+	.lower_nop	= 0,
+	.flags		= 0,
+	.max_hw_events	= 4,
+	.num_pcrs	= 4,
+	.num_pic_regs	= 4,
+};
+static const struct sparc_pmu *sparc_pmu __read_mostly;
+
+static u64 event_encoding(u64 event_id, int idx)
+{
+	if (idx == PIC_UPPER_INDEX)
+		event_id <<= sparc_pmu->upper_shift;
+	else
+		event_id <<= sparc_pmu->lower_shift;
+	return event_id;
+}
+
+static u64 mask_for_index(int idx)
+{
+	return event_encoding(sparc_pmu->event_mask, idx);
+}
+
+static u64 nop_for_index(int idx)
+{
+	return event_encoding(idx == PIC_UPPER_INDEX ?
+			      sparc_pmu->upper_nop :
+			      sparc_pmu->lower_nop, idx);
+}
+
+static inline void sparc_pmu_enable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
+{
+	u64 enc, val, mask = mask_for_index(idx);
+	int pcr_index = 0;
+
+	if (sparc_pmu->num_pcrs > 1)
+		pcr_index = idx;
+
+	enc = perf_event_get_enc(cpuc->events[idx]);
+
+	val = cpuc->pcr[pcr_index];
+	val &= ~mask;
+	val |= event_encoding(enc, idx);
+	cpuc->pcr[pcr_index] = val;
+
+	pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
+}
+
+static inline void sparc_pmu_disable_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc, int idx)
+{
+	u64 mask = mask_for_index(idx);
+	u64 nop = nop_for_index(idx);
+	int pcr_index = 0;
+	u64 val;
+
+	if (sparc_pmu->num_pcrs > 1)
+		pcr_index = idx;
+
+	val = cpuc->pcr[pcr_index];
+	val &= ~mask;
+	val |= nop;
+	cpuc->pcr[pcr_index] = val;
+
+	pcr_ops->write_pcr(pcr_index, cpuc->pcr[pcr_index]);
+}
+
+static u64 sparc_perf_event_update(struct perf_event *event,
+				   struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - 32;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = sparc_pmu->read_pmc(idx);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static int sparc_perf_event_set_period(struct perf_event *event,
+				       struct hw_perf_event *hwc, int idx)
+{
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	if (left > MAX_PERIOD)
+		left = MAX_PERIOD;
+
+	local64_set(&hwc->prev_count, (u64)-left);
+
+	sparc_pmu->write_pmc(idx, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static void read_in_all_counters(struct cpu_hw_events *cpuc)
+{
+	int i;
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *cp = cpuc->event[i];
+
+		if (cpuc->current_idx[i] != PIC_NO_INDEX &&
+		    cpuc->current_idx[i] != cp->hw.idx) {
+			sparc_perf_event_update(cp, &cp->hw,
+						cpuc->current_idx[i]);
+			cpuc->current_idx[i] = PIC_NO_INDEX;
+			if (cp->hw.state & PERF_HES_STOPPED)
+				cp->hw.state |= PERF_HES_ARCH;
+		}
+	}
+}
+
+/* On this PMU all PICs are programmed using a single PCR.  Calculate
+ * the combined control register value.
+ *
+ * For such chips we require that all of the events have the same
+ * configuration, so just fetch the settings from the first entry.
+ */
+static void calculate_single_pcr(struct cpu_hw_events *cpuc)
+{
+	int i;
+
+	if (!cpuc->n_added)
+		goto out;
+
+	/* Assign to counters all unassigned events.  */
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *cp = cpuc->event[i];
+		struct hw_perf_event *hwc = &cp->hw;
+		int idx = hwc->idx;
+		u64 enc;
+
+		if (cpuc->current_idx[i] != PIC_NO_INDEX)
+			continue;
+
+		sparc_perf_event_set_period(cp, hwc, idx);
+		cpuc->current_idx[i] = idx;
+
+		enc = perf_event_get_enc(cpuc->events[i]);
+		cpuc->pcr[0] &= ~mask_for_index(idx);
+		if (hwc->state & PERF_HES_ARCH) {
+			cpuc->pcr[0] |= nop_for_index(idx);
+		} else {
+			cpuc->pcr[0] |= event_encoding(enc, idx);
+			hwc->state = 0;
+		}
+	}
+out:
+	cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
+}
+
+static void sparc_pmu_start(struct perf_event *event, int flags);
+
+/* On this PMU each PIC has it's own PCR control register.  */
+static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
+{
+	int i;
+
+	if (!cpuc->n_added)
+		goto out;
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *cp = cpuc->event[i];
+		struct hw_perf_event *hwc = &cp->hw;
+		int idx = hwc->idx;
+
+		if (cpuc->current_idx[i] != PIC_NO_INDEX)
+			continue;
+
+		cpuc->current_idx[i] = idx;
+
+		if (cp->hw.state & PERF_HES_ARCH)
+			continue;
+
+		sparc_pmu_start(cp, PERF_EF_RELOAD);
+	}
+out:
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *cp = cpuc->event[i];
+		int idx = cp->hw.idx;
+
+		cpuc->pcr[idx] |= cp->hw.config_base;
+	}
+}
+
+/* If performance event entries have been added, move existing events
+ * around (if necessary) and then assign new entries to counters.
+ */
+static void update_pcrs_for_enable(struct cpu_hw_events *cpuc)
+{
+	if (cpuc->n_added)
+		read_in_all_counters(cpuc);
+
+	if (sparc_pmu->num_pcrs == 1) {
+		calculate_single_pcr(cpuc);
+	} else {
+		calculate_multiple_pcrs(cpuc);
+	}
+}
+
+static void sparc_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int i;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	if (cpuc->n_events)
+		update_pcrs_for_enable(cpuc);
+
+	for (i = 0; i < sparc_pmu->num_pcrs; i++)
+		pcr_ops->write_pcr(i, cpuc->pcr[i]);
+}
+
+static void sparc_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int i;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	cpuc->n_added = 0;
+
+	for (i = 0; i < sparc_pmu->num_pcrs; i++) {
+		u64 val = cpuc->pcr[i];
+
+		val &= ~(sparc_pmu->user_bit | sparc_pmu->priv_bit |
+			 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
+		cpuc->pcr[i] = val;
+		pcr_ops->write_pcr(i, cpuc->pcr[i]);
+	}
+}
+
+static int active_event_index(struct cpu_hw_events *cpuc,
+			      struct perf_event *event)
+{
+	int i;
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (cpuc->event[i] == event)
+			break;
+	}
+	BUG_ON(i == cpuc->n_events);
+	return cpuc->current_idx[i];
+}
+
+static void sparc_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+		sparc_perf_event_set_period(event, &event->hw, idx);
+	}
+
+	event->hw.state = 0;
+
+	sparc_pmu_enable_event(cpuc, &event->hw, idx);
+
+	perf_event_update_userpage(event);
+}
+
+static void sparc_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
+
+	if (!(event->hw.state & PERF_HES_STOPPED)) {
+		sparc_pmu_disable_event(cpuc, &event->hw, idx);
+		event->hw.state |= PERF_HES_STOPPED;
+	}
+
+	if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) {
+		sparc_perf_event_update(event, &event->hw, idx);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+}
+
+static void sparc_pmu_del(struct perf_event *event, int _flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	unsigned long flags;
+	int i;
+
+	local_irq_save(flags);
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		if (event == cpuc->event[i]) {
+			/* Absorb the final count and turn off the
+			 * event.
+			 */
+			sparc_pmu_stop(event, PERF_EF_UPDATE);
+
+			/* Shift remaining entries down into
+			 * the existing slot.
+			 */
+			while (++i < cpuc->n_events) {
+				cpuc->event[i - 1] = cpuc->event[i];
+				cpuc->events[i - 1] = cpuc->events[i];
+				cpuc->current_idx[i - 1] =
+					cpuc->current_idx[i];
+			}
+
+			perf_event_update_userpage(event);
+
+			cpuc->n_events--;
+			break;
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+static void sparc_pmu_read(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int idx = active_event_index(cpuc, event);
+	struct hw_perf_event *hwc = &event->hw;
+
+	sparc_perf_event_update(event, hwc, idx);
+}
+
+static atomic_t active_events = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmc_grab_mutex);
+
+static void perf_stop_nmi_watchdog(void *unused)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int i;
+
+	stop_nmi_watchdog(NULL);
+	for (i = 0; i < sparc_pmu->num_pcrs; i++)
+		cpuc->pcr[i] = pcr_ops->read_pcr(i);
+}
+
+static void perf_event_grab_pmc(void)
+{
+	if (atomic_inc_not_zero(&active_events))
+		return;
+
+	mutex_lock(&pmc_grab_mutex);
+	if (atomic_read(&active_events) == 0) {
+		if (atomic_read(&nmi_active) > 0) {
+			on_each_cpu(perf_stop_nmi_watchdog, NULL, 1);
+			BUG_ON(atomic_read(&nmi_active) != 0);
+		}
+		atomic_inc(&active_events);
+	}
+	mutex_unlock(&pmc_grab_mutex);
+}
+
+static void perf_event_release_pmc(void)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
+		if (atomic_read(&nmi_active) == 0)
+			on_each_cpu(start_nmi_watchdog, NULL, 1);
+		mutex_unlock(&pmc_grab_mutex);
+	}
+}
+
+static const struct perf_event_map *sparc_map_cache_event(u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	const struct perf_event_map *pmap;
+
+	if (!sparc_pmu->cache_map)
+		return ERR_PTR(-ENOENT);
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return ERR_PTR(-EINVAL);
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]);
+
+	if (pmap->encoding == CACHE_OP_UNSUPPORTED)
+		return ERR_PTR(-ENOENT);
+
+	if (pmap->encoding == CACHE_OP_NONSENSE)
+		return ERR_PTR(-EINVAL);
+
+	return pmap;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	perf_event_release_pmc();
+}
+
+/* Make sure all events can be scheduled into the hardware at
+ * the same time.  This is simplified by the fact that we only
+ * need to support 2 simultaneous HW events.
+ *
+ * As a side effect, the evts[]->hw.idx values will be assigned
+ * on success.  These are pending indexes.  When the events are
+ * actually programmed into the chip, these values will propagate
+ * to the per-cpu cpuc->current_idx[] slots, see the code in
+ * maybe_change_configuration() for details.
+ */
+static int sparc_check_constraints(struct perf_event **evts,
+				   unsigned long *events, int n_ev)
+{
+	u8 msk0 = 0, msk1 = 0;
+	int idx0 = 0;
+
+	/* This case is possible when we are invoked from
+	 * hw_perf_group_sched_in().
+	 */
+	if (!n_ev)
+		return 0;
+
+	if (n_ev > sparc_pmu->max_hw_events)
+		return -1;
+
+	if (!(sparc_pmu->flags & SPARC_PMU_HAS_CONFLICTS)) {
+		int i;
+
+		for (i = 0; i < n_ev; i++)
+			evts[i]->hw.idx = i;
+		return 0;
+	}
+
+	msk0 = perf_event_get_msk(events[0]);
+	if (n_ev == 1) {
+		if (msk0 & PIC_LOWER)
+			idx0 = 1;
+		goto success;
+	}
+	BUG_ON(n_ev != 2);
+	msk1 = perf_event_get_msk(events[1]);
+
+	/* If both events can go on any counter, OK.  */
+	if (msk0 == (PIC_UPPER | PIC_LOWER) &&
+	    msk1 == (PIC_UPPER | PIC_LOWER))
+		goto success;
+
+	/* If one event is limited to a specific counter,
+	 * and the other can go on both, OK.
+	 */
+	if ((msk0 == PIC_UPPER || msk0 == PIC_LOWER) &&
+	    msk1 == (PIC_UPPER | PIC_LOWER)) {
+		if (msk0 & PIC_LOWER)
+			idx0 = 1;
+		goto success;
+	}
+
+	if ((msk1 == PIC_UPPER || msk1 == PIC_LOWER) &&
+	    msk0 == (PIC_UPPER | PIC_LOWER)) {
+		if (msk1 & PIC_UPPER)
+			idx0 = 1;
+		goto success;
+	}
+
+	/* If the events are fixed to different counters, OK.  */
+	if ((msk0 == PIC_UPPER && msk1 == PIC_LOWER) ||
+	    (msk0 == PIC_LOWER && msk1 == PIC_UPPER)) {
+		if (msk0 & PIC_LOWER)
+			idx0 = 1;
+		goto success;
+	}
+
+	/* Otherwise, there is a conflict.  */
+	return -1;
+
+success:
+	evts[0]->hw.idx = idx0;
+	if (n_ev == 2)
+		evts[1]->hw.idx = idx0 ^ 1;
+	return 0;
+}
+
+static int check_excludes(struct perf_event **evts, int n_prev, int n_new)
+{
+	int eu = 0, ek = 0, eh = 0;
+	struct perf_event *event;
+	int i, n, first;
+
+	if (!(sparc_pmu->flags & SPARC_PMU_ALL_EXCLUDES_SAME))
+		return 0;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	first = 1;
+	for (i = 0; i < n; i++) {
+		event = evts[i];
+		if (first) {
+			eu = event->attr.exclude_user;
+			ek = event->attr.exclude_kernel;
+			eh = event->attr.exclude_hv;
+			first = 0;
+		} else if (event->attr.exclude_user != eu ||
+			   event->attr.exclude_kernel != ek ||
+			   event->attr.exclude_hv != eh) {
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
+
+static int collect_events(struct perf_event *group, int max_count,
+			  struct perf_event *evts[], unsigned long *events,
+			  int *current_idx)
+{
+	struct perf_event *event;
+	int n = 0;
+
+	if (!is_software_event(group)) {
+		if (n >= max_count)
+			return -1;
+		evts[n] = group;
+		events[n] = group->hw.event_base;
+		current_idx[n++] = PIC_NO_INDEX;
+	}
+	for_each_sibling_event(event, group) {
+		if (!is_software_event(event) &&
+		    event->state != PERF_EVENT_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			evts[n] = event;
+			events[n] = event->hw.event_base;
+			current_idx[n++] = PIC_NO_INDEX;
+		}
+	}
+	return n;
+}
+
+static int sparc_pmu_add(struct perf_event *event, int ef_flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int n0, ret = -EAGAIN;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	n0 = cpuc->n_events;
+	if (n0 >= sparc_pmu->max_hw_events)
+		goto out;
+
+	cpuc->event[n0] = event;
+	cpuc->events[n0] = event->hw.event_base;
+	cpuc->current_idx[n0] = PIC_NO_INDEX;
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (!(ef_flags & PERF_EF_START))
+		event->hw.state |= PERF_HES_ARCH;
+
+	/*
+	 * If group events scheduling transaction was started,
+	 * skip the schedulability test here, it will be performed
+	 * at commit time(->commit_txn) as a whole
+	 */
+	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+		goto nocheck;
+
+	if (check_excludes(cpuc->event, n0, 1))
+		goto out;
+	if (sparc_check_constraints(cpuc->event, cpuc->events, n0 + 1))
+		goto out;
+
+nocheck:
+	cpuc->n_events++;
+	cpuc->n_added++;
+
+	ret = 0;
+out:
+	local_irq_restore(flags);
+	return ret;
+}
+
+static int sparc_pmu_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct perf_event *evts[MAX_HWEVENTS];
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long events[MAX_HWEVENTS];
+	int current_idx_dmy[MAX_HWEVENTS];
+	const struct perf_event_map *pmap;
+	int n;
+
+	if (atomic_read(&nmi_active) < 0)
+		return -ENODEV;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	switch (attr->type) {
+	case PERF_TYPE_HARDWARE:
+		if (attr->config >= sparc_pmu->max_events)
+			return -EINVAL;
+		pmap = sparc_pmu->event_map(attr->config);
+		break;
+
+	case PERF_TYPE_HW_CACHE:
+		pmap = sparc_map_cache_event(attr->config);
+		if (IS_ERR(pmap))
+			return PTR_ERR(pmap);
+		break;
+
+	case PERF_TYPE_RAW:
+		pmap = NULL;
+		break;
+
+	default:
+		return -ENOENT;
+
+	}
+
+	if (pmap) {
+		hwc->event_base = perf_event_encode(pmap);
+	} else {
+		/*
+		 * User gives us "(encoding << 16) | pic_mask" for
+		 * PERF_TYPE_RAW events.
+		 */
+		hwc->event_base = attr->config;
+	}
+
+	/* We save the enable bits in the config_base.  */
+	hwc->config_base = sparc_pmu->irq_bit;
+	if (!attr->exclude_user)
+		hwc->config_base |= sparc_pmu->user_bit;
+	if (!attr->exclude_kernel)
+		hwc->config_base |= sparc_pmu->priv_bit;
+	if (!attr->exclude_hv)
+		hwc->config_base |= sparc_pmu->hv_bit;
+
+	n = 0;
+	if (event->group_leader != event) {
+		n = collect_events(event->group_leader,
+				   sparc_pmu->max_hw_events - 1,
+				   evts, events, current_idx_dmy);
+		if (n < 0)
+			return -EINVAL;
+	}
+	events[n] = hwc->event_base;
+	evts[n] = event;
+
+	if (check_excludes(evts, n, 1))
+		return -EINVAL;
+
+	if (sparc_check_constraints(evts, events, n + 1))
+		return -EINVAL;
+
+	hwc->idx = PIC_NO_INDEX;
+
+	/* Try to do all error checking before this point, as unwinding
+	 * state after grabbing the PMC is difficult.
+	 */
+	perf_event_grab_pmc();
+	event->destroy = hw_perf_event_destroy;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = MAX_PERIOD;
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	return 0;
+}
+
+/*
+ * Start group events scheduling transaction
+ * Set the flag to make pmu::enable() not perform the
+ * schedulability test, it will be performed at commit time
+ */
+static void sparc_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+	WARN_ON_ONCE(cpuhw->txn_flags);		/* txn already in flight */
+
+	cpuhw->txn_flags = txn_flags;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
+	perf_pmu_disable(pmu);
+}
+
+/*
+ * Stop group events scheduling transaction
+ * Clear the flag and pmu::enable() will perform the
+ * schedulability test.
+ */
+static void sparc_pmu_cancel_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+	unsigned int txn_flags;
+
+	WARN_ON_ONCE(!cpuhw->txn_flags);	/* no txn in flight */
+
+	txn_flags = cpuhw->txn_flags;
+	cpuhw->txn_flags = 0;
+	if (txn_flags & ~PERF_PMU_TXN_ADD)
+		return;
+
+	perf_pmu_enable(pmu);
+}
+
+/*
+ * Commit group events scheduling transaction
+ * Perform the group schedulability test as a whole
+ * Return 0 if success
+ */
+static int sparc_pmu_commit_txn(struct pmu *pmu)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int n;
+
+	if (!sparc_pmu)
+		return -EINVAL;
+
+	WARN_ON_ONCE(!cpuc->txn_flags);	/* no txn in flight */
+
+	if (cpuc->txn_flags & ~PERF_PMU_TXN_ADD) {
+		cpuc->txn_flags = 0;
+		return 0;
+	}
+
+	n = cpuc->n_events;
+	if (check_excludes(cpuc->event, 0, n))
+		return -EINVAL;
+	if (sparc_check_constraints(cpuc->event, cpuc->events, n))
+		return -EAGAIN;
+
+	cpuc->txn_flags = 0;
+	perf_pmu_enable(pmu);
+	return 0;
+}
+
+static struct pmu pmu = {
+	.pmu_enable	= sparc_pmu_enable,
+	.pmu_disable	= sparc_pmu_disable,
+	.event_init	= sparc_pmu_event_init,
+	.add		= sparc_pmu_add,
+	.del		= sparc_pmu_del,
+	.start		= sparc_pmu_start,
+	.stop		= sparc_pmu_stop,
+	.read		= sparc_pmu_read,
+	.start_txn	= sparc_pmu_start_txn,
+	.cancel_txn	= sparc_pmu_cancel_txn,
+	.commit_txn	= sparc_pmu_commit_txn,
+};
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	int cpu, i;
+
+	if (!sparc_pmu)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pr_info("\n");
+	for (i = 0; i < sparc_pmu->num_pcrs; i++)
+		pr_info("CPU#%d: PCR%d[%016llx]\n",
+			cpu, i, pcr_ops->read_pcr(i));
+	for (i = 0; i < sparc_pmu->num_pic_regs; i++)
+		pr_info("CPU#%d: PIC%d[%016llx]\n",
+			cpu, i, pcr_ops->read_pic(i));
+
+	local_irq_restore(flags);
+}
+
+static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
+					    unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	u64 finish_clock;
+	u64 start_clock;
+	int i;
+
+	if (!atomic_read(&active_events))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	start_clock = sched_clock();
+
+	regs = args->regs;
+
+	cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	/* If the PMU has the TOE IRQ enable bits, we need to do a
+	 * dummy write to the %pcr to clear the overflow bits and thus
+	 * the interrupt.
+	 *
+	 * Do this before we peek at the counters to determine
+	 * overflow so we don't lose any events.
+	 */
+	if (sparc_pmu->irq_bit &&
+	    sparc_pmu->num_pcrs == 1)
+		pcr_ops->write_pcr(0, cpuc->pcr[0]);
+
+	for (i = 0; i < cpuc->n_events; i++) {
+		struct perf_event *event = cpuc->event[i];
+		int idx = cpuc->current_idx[i];
+		struct hw_perf_event *hwc;
+		u64 val;
+
+		if (sparc_pmu->irq_bit &&
+		    sparc_pmu->num_pcrs > 1)
+			pcr_ops->write_pcr(idx, cpuc->pcr[idx]);
+
+		hwc = &event->hw;
+		val = sparc_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << 31))
+			continue;
+
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!sparc_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			sparc_pmu_stop(event, 0);
+	}
+
+	finish_clock = sched_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+};
+
+static bool __init supported_pmu(void)
+{
+	if (!strcmp(sparc_pmu_type, "ultra3") ||
+	    !strcmp(sparc_pmu_type, "ultra3+") ||
+	    !strcmp(sparc_pmu_type, "ultra3i") ||
+	    !strcmp(sparc_pmu_type, "ultra4+")) {
+		sparc_pmu = &ultra3_pmu;
+		return true;
+	}
+	if (!strcmp(sparc_pmu_type, "niagara")) {
+		sparc_pmu = &niagara1_pmu;
+		return true;
+	}
+	if (!strcmp(sparc_pmu_type, "niagara2") ||
+	    !strcmp(sparc_pmu_type, "niagara3")) {
+		sparc_pmu = &niagara2_pmu;
+		return true;
+	}
+	if (!strcmp(sparc_pmu_type, "niagara4") ||
+	    !strcmp(sparc_pmu_type, "niagara5")) {
+		sparc_pmu = &niagara4_pmu;
+		return true;
+	}
+	if (!strcmp(sparc_pmu_type, "sparc-m7")) {
+		sparc_pmu = &sparc_m7_pmu;
+		return true;
+	}
+	return false;
+}
+
+static int __init init_hw_perf_events(void)
+{
+	int err;
+
+	pr_info("Performance events: ");
+
+	err = pcr_arch_init();
+	if (err || !supported_pmu()) {
+		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
+		return 0;
+	}
+
+	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
+
+	perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
+	register_die_notifier(&perf_event_nmi_notifier);
+
+	return 0;
+}
+pure_initcall(init_hw_perf_events);
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	unsigned long ksp, fp;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	int graph = 0;
+#endif
+
+	stack_trace_flush();
+
+	perf_callchain_store(entry, regs->tpc);
+
+	ksp = regs->u_regs[UREG_I6];
+	fp = ksp + STACK_BIAS;
+	do {
+		struct sparc_stackf *sf;
+		struct pt_regs *regs;
+		unsigned long pc;
+
+		if (!kstack_valid(current_thread_info(), fp))
+			break;
+
+		sf = (struct sparc_stackf *) fp;
+		regs = (struct pt_regs *) (sf + 1);
+
+		if (kstack_is_trap_frame(current_thread_info(), regs)) {
+			if (user_mode(regs))
+				break;
+			pc = regs->tpc;
+			fp = regs->u_regs[UREG_I6] + STACK_BIAS;
+		} else {
+			pc = sf->callers_pc;
+			fp = (unsigned long)sf->fp + STACK_BIAS;
+		}
+		perf_callchain_store(entry, pc);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+		if ((pc + 8UL) == (unsigned long) &return_to_handler) {
+			int index = current->curr_ret_stack;
+			if (current->ret_stack && index >= graph) {
+				pc = current->ret_stack[index - graph].ret;
+				perf_callchain_store(entry, pc);
+				graph++;
+			}
+		}
+#endif
+	} while (entry->nr < entry->max_stack);
+}
+
+static inline int
+valid_user_frame(const void __user *fp, unsigned long size)
+{
+	/* addresses should be at least 4-byte aligned */
+	if (((unsigned long) fp) & 3)
+		return 0;
+
+	return (__range_not_ok(fp, size, TASK_SIZE) == 0);
+}
+
+static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+				   struct pt_regs *regs)
+{
+	unsigned long ufp;
+
+	ufp = regs->u_regs[UREG_FP] + STACK_BIAS;
+	do {
+		struct sparc_stackf __user *usf;
+		struct sparc_stackf sf;
+		unsigned long pc;
+
+		usf = (struct sparc_stackf __user *)ufp;
+		if (!valid_user_frame(usf, sizeof(sf)))
+			break;
+
+		if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+			break;
+
+		pc = sf.callers_pc;
+		ufp = (unsigned long)sf.fp + STACK_BIAS;
+		perf_callchain_store(entry, pc);
+	} while (entry->nr < entry->max_stack);
+}
+
+static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+				   struct pt_regs *regs)
+{
+	unsigned long ufp;
+
+	ufp = regs->u_regs[UREG_FP] & 0xffffffffUL;
+	do {
+		unsigned long pc;
+
+		if (thread32_stack_is_64bit(ufp)) {
+			struct sparc_stackf __user *usf;
+			struct sparc_stackf sf;
+
+			ufp += STACK_BIAS;
+			usf = (struct sparc_stackf __user *)ufp;
+			if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+				break;
+			pc = sf.callers_pc & 0xffffffff;
+			ufp = ((unsigned long) sf.fp) & 0xffffffff;
+		} else {
+			struct sparc_stackf32 __user *usf;
+			struct sparc_stackf32 sf;
+			usf = (struct sparc_stackf32 __user *)ufp;
+			if (__copy_from_user_inatomic(&sf, usf, sizeof(sf)))
+				break;
+			pc = sf.callers_pc;
+			ufp = (unsigned long)sf.fp;
+		}
+		perf_callchain_store(entry, pc);
+	} while (entry->nr < entry->max_stack);
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
+{
+	u64 saved_fault_address = current_thread_info()->fault_address;
+	u8 saved_fault_code = get_thread_fault_code();
+	mm_segment_t old_fs;
+
+	perf_callchain_store(entry, regs->tpc);
+
+	if (!current->mm)
+		return;
+
+	old_fs = get_fs();
+	set_fs(USER_DS);
+
+	flushw_user();
+
+	pagefault_disable();
+
+	if (test_thread_flag(TIF_32BIT))
+		perf_callchain_user_32(entry, regs);
+	else
+		perf_callchain_user_64(entry, regs);
+
+	pagefault_enable();
+
+	set_fs(old_fs);
+	set_thread_fault_code(saved_fault_code);
+	current_thread_info()->fault_address = saved_fault_address;
+}
diff --git a/arch/sparc/kernel/pmc.c b/arch/sparc/kernel/pmc.c
new file mode 100644
index 0000000..b5c1eb3
--- /dev/null
+++ b/arch/sparc/kernel/pmc.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* pmc - Driver implementation for power management functions
+ * of Power Management Controller (PMC) on SPARCstation-Voyager.
+ *
+ * Copyright (c) 2002 Eric Brower (ebrower@usa.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/oplib.h>
+#include <linux/uaccess.h>
+#include <asm/auxio.h>
+#include <asm/processor.h>
+
+/* Debug
+ *
+ * #define PMC_DEBUG_LED
+ * #define PMC_NO_IDLE
+ */
+
+#define PMC_OBPNAME	"SUNW,pmc"
+#define PMC_DEVNAME	"pmc"
+
+#define PMC_IDLE_REG	0x00
+#define PMC_IDLE_ON	0x01
+
+static u8 __iomem *regs;
+
+#define pmc_readb(offs)		(sbus_readb(regs+offs))
+#define pmc_writeb(val, offs)	(sbus_writeb(val, regs+offs))
+
+/*
+ * CPU idle callback function
+ * See .../arch/sparc/kernel/process.c
+ */
+static void pmc_swift_idle(void)
+{
+#ifdef PMC_DEBUG_LED
+	set_auxio(0x00, AUXIO_LED);
+#endif
+
+	pmc_writeb(pmc_readb(PMC_IDLE_REG) | PMC_IDLE_ON, PMC_IDLE_REG);
+
+#ifdef PMC_DEBUG_LED
+	set_auxio(AUXIO_LED, 0x00);
+#endif
+}
+
+static int pmc_probe(struct platform_device *op)
+{
+	regs = of_ioremap(&op->resource[0], 0,
+			  resource_size(&op->resource[0]), PMC_OBPNAME);
+	if (!regs) {
+		printk(KERN_ERR "%s: unable to map registers\n", PMC_DEVNAME);
+		return -ENODEV;
+	}
+
+#ifndef PMC_NO_IDLE
+	/* Assign power management IDLE handler */
+	sparc_idle = pmc_swift_idle;
+#endif
+
+	printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME);
+	return 0;
+}
+
+static const struct of_device_id pmc_match[] = {
+	{
+		.name = PMC_OBPNAME,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, pmc_match);
+
+static struct platform_driver pmc_driver = {
+	.driver = {
+		.name = "pmc",
+		.of_match_table = pmc_match,
+	},
+	.probe		= pmc_probe,
+};
+
+static int __init pmc_init(void)
+{
+	return platform_driver_register(&pmc_driver);
+}
+
+/* This driver is not critical to the boot process
+ * and is easiest to ioremap when SBus is already
+ * initialized, so we install ourselves thusly:
+ */
+__initcall(pmc_init);
diff --git a/arch/sparc/kernel/power.c b/arch/sparc/kernel/power.c
new file mode 100644
index 0000000..92627ab
--- /dev/null
+++ b/arch/sparc/kernel/power.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* power.c: Power management driver.
+ *
+ * Copyright (C) 1999, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/of_device.h>
+
+#include <asm/prom.h>
+#include <asm/io.h>
+
+static void __iomem *power_reg;
+
+static irqreturn_t power_handler(int irq, void *dev_id)
+{
+	orderly_poweroff(true);
+
+	/* FIXME: Check registers for status... */
+	return IRQ_HANDLED;
+}
+
+static int has_button_interrupt(unsigned int irq, struct device_node *dp)
+{
+	if (irq == 0xffffffff)
+		return 0;
+	if (!of_find_property(dp, "button", NULL))
+		return 0;
+
+	return 1;
+}
+
+static int power_probe(struct platform_device *op)
+{
+	struct resource *res = &op->resource[0];
+	unsigned int irq = op->archdata.irqs[0];
+
+	power_reg = of_ioremap(res, 0, 0x4, "power");
+
+	printk(KERN_INFO "%s: Control reg at %llx\n",
+	       op->dev.of_node->name, res->start);
+
+	if (has_button_interrupt(irq, op->dev.of_node)) {
+		if (request_irq(irq,
+				power_handler, 0, "power", NULL) < 0)
+			printk(KERN_ERR "power: Cannot setup IRQ handler.\n");
+	}
+
+	return 0;
+}
+
+static const struct of_device_id power_match[] = {
+	{
+		.name = "power",
+	},
+	{},
+};
+
+static struct platform_driver power_driver = {
+	.probe		= power_probe,
+	.driver = {
+		.name = "power",
+		.of_match_table = power_match,
+	},
+};
+
+builtin_platform_driver(power_driver);
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
new file mode 100644
index 0000000..d9662cf
--- /dev/null
+++ b/arch/sparc/kernel/process_32.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  linux/arch/sparc/kernel/process.c
+ *
+ *  Copyright (C) 1995, 2008 David S. Miller (davem@davemloft.net)
+ *  Copyright (C) 1996 Eddie C. Dost   (ecd@skynet.be)
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <stdarg.h>
+
+#include <linux/elfcore.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/delay.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+
+#include <asm/auxio.h>
+#include <asm/oplib.h>
+#include <linux/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+#include <asm/processor.h>
+#include <asm/psr.h>
+#include <asm/elf.h>
+#include <asm/prom.h>
+#include <asm/unistd.h>
+#include <asm/setup.h>
+
+#include "kernel.h"
+
+/* 
+ * Power management idle function 
+ * Set in pm platform drivers (apc.c and pmc.c)
+ */
+void (*sparc_idle)(void);
+
+/* 
+ * Power-off handler instantiation for pm.h compliance
+ * This is done via auxio, but could be used as a fallback
+ * handler when auxio is not present-- unused for now...
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
+
+/*
+ * sysctl - toggle power-off restriction for serial console 
+ * systems in machine_power_off()
+ */
+int scons_pwroff = 1;
+
+extern void fpsave(unsigned long *, unsigned long *, void *, unsigned long *);
+
+struct task_struct *last_task_used_math = NULL;
+struct thread_info *current_set[NR_CPUS];
+
+/* Idle loop support. */
+void arch_cpu_idle(void)
+{
+	if (sparc_idle)
+		(*sparc_idle)();
+	local_irq_enable();
+}
+
+/* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
+void machine_halt(void)
+{
+	local_irq_enable();
+	mdelay(8);
+	local_irq_disable();
+	prom_halt();
+	panic("Halt failed!");
+}
+
+void machine_restart(char * cmd)
+{
+	char *p;
+	
+	local_irq_enable();
+	mdelay(8);
+	local_irq_disable();
+
+	p = strchr (reboot_command, '\n');
+	if (p) *p = 0;
+	if (cmd)
+		prom_reboot(cmd);
+	if (*reboot_command)
+		prom_reboot(reboot_command);
+	prom_feval ("reset");
+	panic("Reboot failed!");
+}
+
+void machine_power_off(void)
+{
+	if (auxio_power_register &&
+	    (strcmp(of_console_device->type, "serial") || scons_pwroff)) {
+		u8 power_register = sbus_readb(auxio_power_register);
+		power_register |= AUXIO_POWER_OFF;
+		sbus_writeb(power_register, auxio_power_register);
+	}
+
+	machine_halt();
+}
+
+void show_regs(struct pt_regs *r)
+{
+	struct reg_window32 *rw = (struct reg_window32 *) r->u_regs[14];
+
+	show_regs_print_info(KERN_DEFAULT);
+
+        printk("PSR: %08lx PC: %08lx NPC: %08lx Y: %08lx    %s\n",
+	       r->psr, r->pc, r->npc, r->y, print_tainted());
+	printk("PC: <%pS>\n", (void *) r->pc);
+	printk("%%G: %08lx %08lx  %08lx %08lx  %08lx %08lx  %08lx %08lx\n",
+	       r->u_regs[0], r->u_regs[1], r->u_regs[2], r->u_regs[3],
+	       r->u_regs[4], r->u_regs[5], r->u_regs[6], r->u_regs[7]);
+	printk("%%O: %08lx %08lx  %08lx %08lx  %08lx %08lx  %08lx %08lx\n",
+	       r->u_regs[8], r->u_regs[9], r->u_regs[10], r->u_regs[11],
+	       r->u_regs[12], r->u_regs[13], r->u_regs[14], r->u_regs[15]);
+	printk("RPC: <%pS>\n", (void *) r->u_regs[15]);
+
+	printk("%%L: %08lx %08lx  %08lx %08lx  %08lx %08lx  %08lx %08lx\n",
+	       rw->locals[0], rw->locals[1], rw->locals[2], rw->locals[3],
+	       rw->locals[4], rw->locals[5], rw->locals[6], rw->locals[7]);
+	printk("%%I: %08lx %08lx  %08lx %08lx  %08lx %08lx  %08lx %08lx\n",
+	       rw->ins[0], rw->ins[1], rw->ins[2], rw->ins[3],
+	       rw->ins[4], rw->ins[5], rw->ins[6], rw->ins[7]);
+}
+
+/*
+ * The show_stack is an external API which we do not use ourselves.
+ * The oops is printed in die_if_kernel.
+ */
+void show_stack(struct task_struct *tsk, unsigned long *_ksp)
+{
+	unsigned long pc, fp;
+	unsigned long task_base;
+	struct reg_window32 *rw;
+	int count = 0;
+
+	if (!tsk)
+		tsk = current;
+
+	if (tsk == current && !_ksp)
+		__asm__ __volatile__("mov	%%fp, %0" : "=r" (_ksp));
+
+	task_base = (unsigned long) task_stack_page(tsk);
+	fp = (unsigned long) _ksp;
+	do {
+		/* Bogus frame pointer? */
+		if (fp < (task_base + sizeof(struct thread_info)) ||
+		    fp >= (task_base + (PAGE_SIZE << 1)))
+			break;
+		rw = (struct reg_window32 *) fp;
+		pc = rw->ins[7];
+		printk("[%08lx : ", pc);
+		printk("%pS ] ", (void *) pc);
+		fp = rw->ins[6];
+	} while (++count < 16);
+	printk("\n");
+}
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(struct task_struct *tsk)
+{
+#ifndef CONFIG_SMP
+	if (last_task_used_math == tsk) {
+#else
+	if (test_ti_thread_flag(task_thread_info(tsk), TIF_USEDFPU)) {
+#endif
+		/* Keep process from leaving FPU in a bogon state. */
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&tsk->thread.float_regs[0], &tsk->thread.fsr,
+		       &tsk->thread.fpqueue[0], &tsk->thread.fpqdepth);
+#ifndef CONFIG_SMP
+		last_task_used_math = NULL;
+#else
+		clear_ti_thread_flag(task_thread_info(tsk), TIF_USEDFPU);
+#endif
+	}
+}
+
+void flush_thread(void)
+{
+	current_thread_info()->w_saved = 0;
+
+#ifndef CONFIG_SMP
+	if(last_task_used_math == current) {
+#else
+	if (test_thread_flag(TIF_USEDFPU)) {
+#endif
+		/* Clean the fpu. */
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+#ifndef CONFIG_SMP
+		last_task_used_math = NULL;
+#else
+		clear_thread_flag(TIF_USEDFPU);
+#endif
+	}
+
+	/* This task is no longer a kernel thread. */
+	if (current->thread.flags & SPARC_FLAG_KTHREAD) {
+		current->thread.flags &= ~SPARC_FLAG_KTHREAD;
+
+		/* We must fixup kregs as well. */
+		/* XXX This was not fixed for ti for a while, worked. Unused? */
+		current->thread.kregs = (struct pt_regs *)
+		    (task_stack_page(current) + (THREAD_SIZE - TRACEREG_SZ));
+	}
+}
+
+static inline struct sparc_stackf __user *
+clone_stackframe(struct sparc_stackf __user *dst,
+		 struct sparc_stackf __user *src)
+{
+	unsigned long size, fp;
+	struct sparc_stackf *tmp;
+	struct sparc_stackf __user *sp;
+
+	if (get_user(tmp, &src->fp))
+		return NULL;
+
+	fp = (unsigned long) tmp;
+	size = (fp - ((unsigned long) src));
+	fp = (unsigned long) dst;
+	sp = (struct sparc_stackf __user *)(fp - size); 
+
+	/* do_fork() grabs the parent semaphore, we must release it
+	 * temporarily so we can build the child clone stack frame
+	 * without deadlocking.
+	 */
+	if (__copy_user(sp, src, size))
+		sp = NULL;
+	else if (put_user(fp, &sp->fp))
+		sp = NULL;
+
+	return sp;
+}
+
+asmlinkage int sparc_do_fork(unsigned long clone_flags,
+                             unsigned long stack_start,
+                             struct pt_regs *regs,
+                             unsigned long stack_size)
+{
+	unsigned long parent_tid_ptr, child_tid_ptr;
+	unsigned long orig_i1 = regs->u_regs[UREG_I1];
+	long ret;
+
+	parent_tid_ptr = regs->u_regs[UREG_I2];
+	child_tid_ptr = regs->u_regs[UREG_I4];
+
+	ret = do_fork(clone_flags, stack_start, stack_size,
+		      (int __user *) parent_tid_ptr,
+		      (int __user *) child_tid_ptr);
+
+	/* If we get an error and potentially restart the system
+	 * call, we're screwed because copy_thread() clobbered
+	 * the parent's %o1.  So detect that case and restore it
+	 * here.
+	 */
+	if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK)
+		regs->u_regs[UREG_I1] = orig_i1;
+
+	return ret;
+}
+
+/* Copy a Sparc thread.  The fork() return value conventions
+ * under SunOS are nothing short of bletcherous:
+ * Parent -->  %o0 == childs  pid, %o1 == 0
+ * Child  -->  %o0 == parents pid, %o1 == 1
+ *
+ * NOTE: We have a separate fork kpsr/kwim because
+ *       the parent could change these values between
+ *       sys_fork invocation and when we reach here
+ *       if the parent should sleep while trying to
+ *       allocate the task_struct and kernel stack in
+ *       do_fork().
+ * XXX See comment above sys_vfork in sparc64. todo.
+ */
+extern void ret_from_fork(void);
+extern void ret_from_kernel_thread(void);
+
+int copy_thread(unsigned long clone_flags, unsigned long sp,
+		unsigned long arg, struct task_struct *p)
+{
+	struct thread_info *ti = task_thread_info(p);
+	struct pt_regs *childregs, *regs = current_pt_regs();
+	char *new_stack;
+
+#ifndef CONFIG_SMP
+	if(last_task_used_math == current) {
+#else
+	if (test_thread_flag(TIF_USEDFPU)) {
+#endif
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&p->thread.float_regs[0], &p->thread.fsr,
+		       &p->thread.fpqueue[0], &p->thread.fpqdepth);
+	}
+
+	/*
+	 *  p->thread_info         new_stack   childregs stack bottom
+	 *  !                      !           !             !
+	 *  V                      V (stk.fr.) V  (pt_regs)  V
+	 *  +----- - - - - - ------+===========+=============+
+	 */
+	new_stack = task_stack_page(p) + THREAD_SIZE;
+	new_stack -= STACKFRAME_SZ + TRACEREG_SZ;
+	childregs = (struct pt_regs *) (new_stack + STACKFRAME_SZ);
+
+	/*
+	 * A new process must start with interrupts disabled, see schedule_tail()
+	 * and finish_task_switch(). (If we do not do it and if a timer interrupt
+	 * hits before we unlock and attempts to take the rq->lock, we deadlock.)
+	 *
+	 * Thus, kpsr |= PSR_PIL.
+	 */
+	ti->ksp = (unsigned long) new_stack;
+	p->thread.kregs = childregs;
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		extern int nwindows;
+		unsigned long psr;
+		memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
+		p->thread.flags |= SPARC_FLAG_KTHREAD;
+		p->thread.current_ds = KERNEL_DS;
+		ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
+		childregs->u_regs[UREG_G1] = sp; /* function */
+		childregs->u_regs[UREG_G2] = arg;
+		psr = childregs->psr = get_psr();
+		ti->kpsr = psr | PSR_PIL;
+		ti->kwim = 1 << (((psr & PSR_CWP) + 1) % nwindows);
+		return 0;
+	}
+	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
+	childregs->u_regs[UREG_FP] = sp;
+	p->thread.flags &= ~SPARC_FLAG_KTHREAD;
+	p->thread.current_ds = USER_DS;
+	ti->kpc = (((unsigned long) ret_from_fork) - 0x8);
+	ti->kpsr = current->thread.fork_kpsr | PSR_PIL;
+	ti->kwim = current->thread.fork_kwim;
+
+	if (sp != regs->u_regs[UREG_FP]) {
+		struct sparc_stackf __user *childstack;
+		struct sparc_stackf __user *parentstack;
+
+		/*
+		 * This is a clone() call with supplied user stack.
+		 * Set some valid stack frames to give to the child.
+		 */
+		childstack = (struct sparc_stackf __user *)
+			(sp & ~0xfUL);
+		parentstack = (struct sparc_stackf __user *)
+			regs->u_regs[UREG_FP];
+
+#if 0
+		printk("clone: parent stack:\n");
+		show_stackframe(parentstack);
+#endif
+
+		childstack = clone_stackframe(childstack, parentstack);
+		if (!childstack)
+			return -EFAULT;
+
+#if 0
+		printk("clone: child stack:\n");
+		show_stackframe(childstack);
+#endif
+
+		childregs->u_regs[UREG_FP] = (unsigned long)childstack;
+	}
+
+#ifdef CONFIG_SMP
+	/* FPU must be disabled on SMP. */
+	childregs->psr &= ~PSR_EF;
+	clear_tsk_thread_flag(p, TIF_USEDFPU);
+#endif
+
+	/* Set the return value for the child. */
+	childregs->u_regs[UREG_I0] = current->pid;
+	childregs->u_regs[UREG_I1] = 1;
+
+	/* Set the return value for the parent. */
+	regs->u_regs[UREG_I1] = 0;
+
+	if (clone_flags & CLONE_SETTLS)
+		childregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
+
+	return 0;
+}
+
+/*
+ * fill in the fpu structure for a core dump.
+ */
+int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs)
+{
+	if (used_math()) {
+		memset(fpregs, 0, sizeof(*fpregs));
+		fpregs->pr_q_entrysize = 8;
+		return 1;
+	}
+#ifdef CONFIG_SMP
+	if (test_thread_flag(TIF_USEDFPU)) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		if (regs != NULL) {
+			regs->psr &= ~(PSR_EF);
+			clear_thread_flag(TIF_USEDFPU);
+		}
+	}
+#else
+	if (current == last_task_used_math) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		if (regs != NULL) {
+			regs->psr &= ~(PSR_EF);
+			last_task_used_math = NULL;
+		}
+	}
+#endif
+	memcpy(&fpregs->pr_fr.pr_regs[0],
+	       &current->thread.float_regs[0],
+	       (sizeof(unsigned long) * 32));
+	fpregs->pr_fsr = current->thread.fsr;
+	fpregs->pr_qcnt = current->thread.fpqdepth;
+	fpregs->pr_q_entrysize = 8;
+	fpregs->pr_en = 1;
+	if(fpregs->pr_qcnt != 0) {
+		memcpy(&fpregs->pr_q[0],
+		       &current->thread.fpqueue[0],
+		       sizeof(struct fpq) * fpregs->pr_qcnt);
+	}
+	/* Zero out the rest. */
+	memset(&fpregs->pr_q[fpregs->pr_qcnt], 0,
+	       sizeof(struct fpq) * (32 - fpregs->pr_qcnt));
+	return 1;
+}
+
+unsigned long get_wchan(struct task_struct *task)
+{
+	unsigned long pc, fp, bias = 0;
+	unsigned long task_base = (unsigned long) task;
+        unsigned long ret = 0;
+	struct reg_window32 *rw;
+	int count = 0;
+
+	if (!task || task == current ||
+            task->state == TASK_RUNNING)
+		goto out;
+
+	fp = task_thread_info(task)->ksp + bias;
+	do {
+		/* Bogus frame pointer? */
+		if (fp < (task_base + sizeof(struct thread_info)) ||
+		    fp >= (task_base + (2 * PAGE_SIZE)))
+			break;
+		rw = (struct reg_window32 *) fp;
+		pc = rw->ins[7];
+		if (!in_sched_functions(pc)) {
+			ret = pc;
+			goto out;
+		}
+		fp = rw->ins[6] + bias;
+	} while (++count < 16);
+
+out:
+	return ret;
+}
+
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
new file mode 100644
index 0000000..59eaf62
--- /dev/null
+++ b/arch/sparc/kernel/process_64.c
@@ -0,0 +1,800 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  arch/sparc64/kernel/process.c
+ *
+ *  Copyright (C) 1995, 1996, 2008 David S. Miller (davem@davemloft.net)
+ *  Copyright (C) 1996       Eddie C. Dost   (ecd@skynet.be)
+ *  Copyright (C) 1997, 1998 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#include <stdarg.h>
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/stddef.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/delay.h>
+#include <linux/compat.h>
+#include <linux/tick.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/perf_event.h>
+#include <linux/elfcore.h>
+#include <linux/sysrq.h>
+#include <linux/nmi.h>
+#include <linux/context_tracking.h>
+#include <linux/signal.h>
+
+#include <linux/uaccess.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/pstate.h>
+#include <asm/elf.h>
+#include <asm/fpumacro.h>
+#include <asm/head.h>
+#include <asm/cpudata.h>
+#include <asm/mmu_context.h>
+#include <asm/unistd.h>
+#include <asm/hypervisor.h>
+#include <asm/syscalls.h>
+#include <asm/irq_regs.h>
+#include <asm/smp.h>
+#include <asm/pcr.h>
+
+#include "kstack.h"
+
+/* Idle loop support on sparc64. */
+void arch_cpu_idle(void)
+{
+	if (tlb_type != hypervisor) {
+		touch_nmi_watchdog();
+		local_irq_enable();
+	} else {
+		unsigned long pstate;
+
+		local_irq_enable();
+
+                /* The sun4v sleeping code requires that we have PSTATE.IE cleared over
+                 * the cpu sleep hypervisor call.
+                 */
+		__asm__ __volatile__(
+			"rdpr %%pstate, %0\n\t"
+			"andn %0, %1, %0\n\t"
+			"wrpr %0, %%g0, %%pstate"
+			: "=&r" (pstate)
+			: "i" (PSTATE_IE));
+
+		if (!need_resched() && !cpu_is_offline(smp_processor_id())) {
+			sun4v_cpu_yield();
+			/* If resumed by cpu_poke then we need to explicitly
+			 * call scheduler_ipi().
+			 */
+			scheduler_poke();
+		}
+
+		/* Re-enable interrupts. */
+		__asm__ __volatile__(
+			"rdpr %%pstate, %0\n\t"
+			"or %0, %1, %0\n\t"
+			"wrpr %0, %%g0, %%pstate"
+			: "=&r" (pstate)
+			: "i" (PSTATE_IE));
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void arch_cpu_idle_dead(void)
+{
+	sched_preempt_enable_no_resched();
+	cpu_play_dead();
+}
+#endif
+
+#ifdef CONFIG_COMPAT
+static void show_regwindow32(struct pt_regs *regs)
+{
+	struct reg_window32 __user *rw;
+	struct reg_window32 r_w;
+	mm_segment_t old_fs;
+	
+	__asm__ __volatile__ ("flushw");
+	rw = compat_ptr((unsigned int)regs->u_regs[14]);
+	old_fs = get_fs();
+	set_fs (USER_DS);
+	if (copy_from_user (&r_w, rw, sizeof(r_w))) {
+		set_fs (old_fs);
+		return;
+	}
+
+	set_fs (old_fs);			
+	printk("l0: %08x l1: %08x l2: %08x l3: %08x "
+	       "l4: %08x l5: %08x l6: %08x l7: %08x\n",
+	       r_w.locals[0], r_w.locals[1], r_w.locals[2], r_w.locals[3],
+	       r_w.locals[4], r_w.locals[5], r_w.locals[6], r_w.locals[7]);
+	printk("i0: %08x i1: %08x i2: %08x i3: %08x "
+	       "i4: %08x i5: %08x i6: %08x i7: %08x\n",
+	       r_w.ins[0], r_w.ins[1], r_w.ins[2], r_w.ins[3],
+	       r_w.ins[4], r_w.ins[5], r_w.ins[6], r_w.ins[7]);
+}
+#else
+#define show_regwindow32(regs)	do { } while (0)
+#endif
+
+static void show_regwindow(struct pt_regs *regs)
+{
+	struct reg_window __user *rw;
+	struct reg_window *rwk;
+	struct reg_window r_w;
+	mm_segment_t old_fs;
+
+	if ((regs->tstate & TSTATE_PRIV) || !(test_thread_flag(TIF_32BIT))) {
+		__asm__ __volatile__ ("flushw");
+		rw = (struct reg_window __user *)
+			(regs->u_regs[14] + STACK_BIAS);
+		rwk = (struct reg_window *)
+			(regs->u_regs[14] + STACK_BIAS);
+		if (!(regs->tstate & TSTATE_PRIV)) {
+			old_fs = get_fs();
+			set_fs (USER_DS);
+			if (copy_from_user (&r_w, rw, sizeof(r_w))) {
+				set_fs (old_fs);
+				return;
+			}
+			rwk = &r_w;
+			set_fs (old_fs);			
+		}
+	} else {
+		show_regwindow32(regs);
+		return;
+	}
+	printk("l0: %016lx l1: %016lx l2: %016lx l3: %016lx\n",
+	       rwk->locals[0], rwk->locals[1], rwk->locals[2], rwk->locals[3]);
+	printk("l4: %016lx l5: %016lx l6: %016lx l7: %016lx\n",
+	       rwk->locals[4], rwk->locals[5], rwk->locals[6], rwk->locals[7]);
+	printk("i0: %016lx i1: %016lx i2: %016lx i3: %016lx\n",
+	       rwk->ins[0], rwk->ins[1], rwk->ins[2], rwk->ins[3]);
+	printk("i4: %016lx i5: %016lx i6: %016lx i7: %016lx\n",
+	       rwk->ins[4], rwk->ins[5], rwk->ins[6], rwk->ins[7]);
+	if (regs->tstate & TSTATE_PRIV)
+		printk("I7: <%pS>\n", (void *) rwk->ins[7]);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+
+	printk("TSTATE: %016lx TPC: %016lx TNPC: %016lx Y: %08x    %s\n", regs->tstate,
+	       regs->tpc, regs->tnpc, regs->y, print_tainted());
+	printk("TPC: <%pS>\n", (void *) regs->tpc);
+	printk("g0: %016lx g1: %016lx g2: %016lx g3: %016lx\n",
+	       regs->u_regs[0], regs->u_regs[1], regs->u_regs[2],
+	       regs->u_regs[3]);
+	printk("g4: %016lx g5: %016lx g6: %016lx g7: %016lx\n",
+	       regs->u_regs[4], regs->u_regs[5], regs->u_regs[6],
+	       regs->u_regs[7]);
+	printk("o0: %016lx o1: %016lx o2: %016lx o3: %016lx\n",
+	       regs->u_regs[8], regs->u_regs[9], regs->u_regs[10],
+	       regs->u_regs[11]);
+	printk("o4: %016lx o5: %016lx sp: %016lx ret_pc: %016lx\n",
+	       regs->u_regs[12], regs->u_regs[13], regs->u_regs[14],
+	       regs->u_regs[15]);
+	printk("RPC: <%pS>\n", (void *) regs->u_regs[15]);
+	show_regwindow(regs);
+	show_stack(current, (unsigned long *) regs->u_regs[UREG_FP]);
+}
+
+union global_cpu_snapshot global_cpu_snapshot[NR_CPUS];
+static DEFINE_SPINLOCK(global_cpu_snapshot_lock);
+
+static void __global_reg_self(struct thread_info *tp, struct pt_regs *regs,
+			      int this_cpu)
+{
+	struct global_reg_snapshot *rp;
+
+	flushw_all();
+
+	rp = &global_cpu_snapshot[this_cpu].reg;
+
+	rp->tstate = regs->tstate;
+	rp->tpc = regs->tpc;
+	rp->tnpc = regs->tnpc;
+	rp->o7 = regs->u_regs[UREG_I7];
+
+	if (regs->tstate & TSTATE_PRIV) {
+		struct reg_window *rw;
+
+		rw = (struct reg_window *)
+			(regs->u_regs[UREG_FP] + STACK_BIAS);
+		if (kstack_valid(tp, (unsigned long) rw)) {
+			rp->i7 = rw->ins[7];
+			rw = (struct reg_window *)
+				(rw->ins[6] + STACK_BIAS);
+			if (kstack_valid(tp, (unsigned long) rw))
+				rp->rpc = rw->ins[7];
+		}
+	} else {
+		rp->i7 = 0;
+		rp->rpc = 0;
+	}
+	rp->thread = tp;
+}
+
+/* In order to avoid hangs we do not try to synchronize with the
+ * global register dump client cpus.  The last store they make is to
+ * the thread pointer, so do a short poll waiting for that to become
+ * non-NULL.
+ */
+static void __global_reg_poll(struct global_reg_snapshot *gp)
+{
+	int limit = 0;
+
+	while (!gp->thread && ++limit < 100) {
+		barrier();
+		udelay(1);
+	}
+}
+
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
+{
+	struct thread_info *tp = current_thread_info();
+	struct pt_regs *regs = get_irq_regs();
+	unsigned long flags;
+	int this_cpu, cpu;
+
+	if (!regs)
+		regs = tp->kregs;
+
+	spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
+
+	this_cpu = raw_smp_processor_id();
+
+	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+	if (cpumask_test_cpu(this_cpu, mask) && !exclude_self)
+		__global_reg_self(tp, regs, this_cpu);
+
+	smp_fetch_global_regs();
+
+	for_each_cpu(cpu, mask) {
+		struct global_reg_snapshot *gp;
+
+		if (exclude_self && cpu == this_cpu)
+			continue;
+
+		gp = &global_cpu_snapshot[cpu].reg;
+
+		__global_reg_poll(gp);
+
+		tp = gp->thread;
+		printk("%c CPU[%3d]: TSTATE[%016lx] TPC[%016lx] TNPC[%016lx] TASK[%s:%d]\n",
+		       (cpu == this_cpu ? '*' : ' '), cpu,
+		       gp->tstate, gp->tpc, gp->tnpc,
+		       ((tp && tp->task) ? tp->task->comm : "NULL"),
+		       ((tp && tp->task) ? tp->task->pid : -1));
+
+		if (gp->tstate & TSTATE_PRIV) {
+			printk("             TPC[%pS] O7[%pS] I7[%pS] RPC[%pS]\n",
+			       (void *) gp->tpc,
+			       (void *) gp->o7,
+			       (void *) gp->i7,
+			       (void *) gp->rpc);
+		} else {
+			printk("             TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
+			       gp->tpc, gp->o7, gp->i7, gp->rpc);
+		}
+
+		touch_nmi_watchdog();
+	}
+
+	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+	spin_unlock_irqrestore(&global_cpu_snapshot_lock, flags);
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ
+
+static void sysrq_handle_globreg(int key)
+{
+	trigger_all_cpu_backtrace();
+}
+
+static struct sysrq_key_op sparc_globalreg_op = {
+	.handler	= sysrq_handle_globreg,
+	.help_msg	= "global-regs(y)",
+	.action_msg	= "Show Global CPU Regs",
+};
+
+static void __global_pmu_self(int this_cpu)
+{
+	struct global_pmu_snapshot *pp;
+	int i, num;
+
+	if (!pcr_ops)
+		return;
+
+	pp = &global_cpu_snapshot[this_cpu].pmu;
+
+	num = 1;
+	if (tlb_type == hypervisor &&
+	    sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
+		num = 4;
+
+	for (i = 0; i < num; i++) {
+		pp->pcr[i] = pcr_ops->read_pcr(i);
+		pp->pic[i] = pcr_ops->read_pic(i);
+	}
+}
+
+static void __global_pmu_poll(struct global_pmu_snapshot *pp)
+{
+	int limit = 0;
+
+	while (!pp->pcr[0] && ++limit < 100) {
+		barrier();
+		udelay(1);
+	}
+}
+
+static void pmu_snapshot_all_cpus(void)
+{
+	unsigned long flags;
+	int this_cpu, cpu;
+
+	spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
+
+	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+	this_cpu = raw_smp_processor_id();
+
+	__global_pmu_self(this_cpu);
+
+	smp_fetch_global_pmu();
+
+	for_each_online_cpu(cpu) {
+		struct global_pmu_snapshot *pp = &global_cpu_snapshot[cpu].pmu;
+
+		__global_pmu_poll(pp);
+
+		printk("%c CPU[%3d]: PCR[%08lx:%08lx:%08lx:%08lx] PIC[%08lx:%08lx:%08lx:%08lx]\n",
+		       (cpu == this_cpu ? '*' : ' '), cpu,
+		       pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3],
+		       pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]);
+
+		touch_nmi_watchdog();
+	}
+
+	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+	spin_unlock_irqrestore(&global_cpu_snapshot_lock, flags);
+}
+
+static void sysrq_handle_globpmu(int key)
+{
+	pmu_snapshot_all_cpus();
+}
+
+static struct sysrq_key_op sparc_globalpmu_op = {
+	.handler	= sysrq_handle_globpmu,
+	.help_msg	= "global-pmu(x)",
+	.action_msg	= "Show Global PMU Regs",
+};
+
+static int __init sparc_sysrq_init(void)
+{
+	int ret = register_sysrq_key('y', &sparc_globalreg_op);
+
+	if (!ret)
+		ret = register_sysrq_key('x', &sparc_globalpmu_op);
+	return ret;
+}
+
+core_initcall(sparc_sysrq_init);
+
+#endif
+
+/* Free current thread data structures etc.. */
+void exit_thread(struct task_struct *tsk)
+{
+	struct thread_info *t = task_thread_info(tsk);
+
+	if (t->utraps) {
+		if (t->utraps[0] < 2)
+			kfree (t->utraps);
+		else
+			t->utraps[0]--;
+	}
+}
+
+void flush_thread(void)
+{
+	struct thread_info *t = current_thread_info();
+	struct mm_struct *mm;
+
+	mm = t->task->mm;
+	if (mm)
+		tsb_context_switch(mm);
+
+	set_thread_wsaved(0);
+
+	/* Clear FPU register state. */
+	t->fpsaved[0] = 0;
+}
+
+/* It's a bit more tricky when 64-bit tasks are involved... */
+static unsigned long clone_stackframe(unsigned long csp, unsigned long psp)
+{
+	bool stack_64bit = test_thread_64bit_stack(psp);
+	unsigned long fp, distance, rval;
+
+	if (stack_64bit) {
+		csp += STACK_BIAS;
+		psp += STACK_BIAS;
+		__get_user(fp, &(((struct reg_window __user *)psp)->ins[6]));
+		fp += STACK_BIAS;
+		if (test_thread_flag(TIF_32BIT))
+			fp &= 0xffffffff;
+	} else
+		__get_user(fp, &(((struct reg_window32 __user *)psp)->ins[6]));
+
+	/* Now align the stack as this is mandatory in the Sparc ABI
+	 * due to how register windows work.  This hides the
+	 * restriction from thread libraries etc.
+	 */
+	csp &= ~15UL;
+
+	distance = fp - psp;
+	rval = (csp - distance);
+	if (copy_in_user((void __user *) rval, (void __user *) psp, distance))
+		rval = 0;
+	else if (!stack_64bit) {
+		if (put_user(((u32)csp),
+			     &(((struct reg_window32 __user *)rval)->ins[6])))
+			rval = 0;
+	} else {
+		if (put_user(((u64)csp - STACK_BIAS),
+			     &(((struct reg_window __user *)rval)->ins[6])))
+			rval = 0;
+		else
+			rval = rval - STACK_BIAS;
+	}
+
+	return rval;
+}
+
+/* Standard stuff. */
+static inline void shift_window_buffer(int first_win, int last_win,
+				       struct thread_info *t)
+{
+	int i;
+
+	for (i = first_win; i < last_win; i++) {
+		t->rwbuf_stkptrs[i] = t->rwbuf_stkptrs[i+1];
+		memcpy(&t->reg_window[i], &t->reg_window[i+1],
+		       sizeof(struct reg_window));
+	}
+}
+
+void synchronize_user_stack(void)
+{
+	struct thread_info *t = current_thread_info();
+	unsigned long window;
+
+	flush_user_windows();
+	if ((window = get_thread_wsaved()) != 0) {
+		window -= 1;
+		do {
+			struct reg_window *rwin = &t->reg_window[window];
+			int winsize = sizeof(struct reg_window);
+			unsigned long sp;
+
+			sp = t->rwbuf_stkptrs[window];
+
+			if (test_thread_64bit_stack(sp))
+				sp += STACK_BIAS;
+			else
+				winsize = sizeof(struct reg_window32);
+
+			if (!copy_to_user((char __user *)sp, rwin, winsize)) {
+				shift_window_buffer(window, get_thread_wsaved() - 1, t);
+				set_thread_wsaved(get_thread_wsaved() - 1);
+			}
+		} while (window--);
+	}
+}
+
+static void stack_unaligned(unsigned long sp)
+{
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current);
+}
+
+static const char uwfault32[] = KERN_INFO \
+	"%s[%d]: bad register window fault: SP %08lx (orig_sp %08lx) TPC %08lx O7 %08lx\n";
+static const char uwfault64[] = KERN_INFO \
+	"%s[%d]: bad register window fault: SP %016lx (orig_sp %016lx) TPC %08lx O7 %016lx\n";
+
+void fault_in_user_windows(struct pt_regs *regs)
+{
+	struct thread_info *t = current_thread_info();
+	unsigned long window;
+
+	flush_user_windows();
+	window = get_thread_wsaved();
+
+	if (likely(window != 0)) {
+		window -= 1;
+		do {
+			struct reg_window *rwin = &t->reg_window[window];
+			int winsize = sizeof(struct reg_window);
+			unsigned long sp, orig_sp;
+
+			orig_sp = sp = t->rwbuf_stkptrs[window];
+
+			if (test_thread_64bit_stack(sp))
+				sp += STACK_BIAS;
+			else
+				winsize = sizeof(struct reg_window32);
+
+			if (unlikely(sp & 0x7UL))
+				stack_unaligned(sp);
+
+			if (unlikely(copy_to_user((char __user *)sp,
+						  rwin, winsize))) {
+				if (show_unhandled_signals)
+					printk_ratelimited(is_compat_task() ?
+							   uwfault32 : uwfault64,
+							   current->comm, current->pid,
+							   sp, orig_sp,
+							   regs->tpc,
+							   regs->u_regs[UREG_I7]);
+				goto barf;
+			}
+		} while (window--);
+	}
+	set_thread_wsaved(0);
+	return;
+
+barf:
+	set_thread_wsaved(window + 1);
+	force_sig(SIGSEGV, current);
+}
+
+asmlinkage long sparc_do_fork(unsigned long clone_flags,
+			      unsigned long stack_start,
+			      struct pt_regs *regs,
+			      unsigned long stack_size)
+{
+	int __user *parent_tid_ptr, *child_tid_ptr;
+	unsigned long orig_i1 = regs->u_regs[UREG_I1];
+	long ret;
+
+#ifdef CONFIG_COMPAT
+	if (test_thread_flag(TIF_32BIT)) {
+		parent_tid_ptr = compat_ptr(regs->u_regs[UREG_I2]);
+		child_tid_ptr = compat_ptr(regs->u_regs[UREG_I4]);
+	} else
+#endif
+	{
+		parent_tid_ptr = (int __user *) regs->u_regs[UREG_I2];
+		child_tid_ptr = (int __user *) regs->u_regs[UREG_I4];
+	}
+
+	ret = do_fork(clone_flags, stack_start, stack_size,
+		      parent_tid_ptr, child_tid_ptr);
+
+	/* If we get an error and potentially restart the system
+	 * call, we're screwed because copy_thread() clobbered
+	 * the parent's %o1.  So detect that case and restore it
+	 * here.
+	 */
+	if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK)
+		regs->u_regs[UREG_I1] = orig_i1;
+
+	return ret;
+}
+
+/* Copy a Sparc thread.  The fork() return value conventions
+ * under SunOS are nothing short of bletcherous:
+ * Parent -->  %o0 == childs  pid, %o1 == 0
+ * Child  -->  %o0 == parents pid, %o1 == 1
+ */
+int copy_thread(unsigned long clone_flags, unsigned long sp,
+		unsigned long arg, struct task_struct *p)
+{
+	struct thread_info *t = task_thread_info(p);
+	struct pt_regs *regs = current_pt_regs();
+	struct sparc_stackf *parent_sf;
+	unsigned long child_stack_sz;
+	char *child_trap_frame;
+
+	/* Calculate offset to stack_frame & pt_regs */
+	child_stack_sz = (STACKFRAME_SZ + TRACEREG_SZ);
+	child_trap_frame = (task_stack_page(p) +
+			    (THREAD_SIZE - child_stack_sz));
+
+	t->new_child = 1;
+	t->ksp = ((unsigned long) child_trap_frame) - STACK_BIAS;
+	t->kregs = (struct pt_regs *) (child_trap_frame +
+				       sizeof(struct sparc_stackf));
+	t->fpsaved[0] = 0;
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(child_trap_frame, 0, child_stack_sz);
+		__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
+			(current_pt_regs()->tstate + 1) & TSTATE_CWP;
+		t->current_ds = ASI_P;
+		t->kregs->u_regs[UREG_G1] = sp; /* function */
+		t->kregs->u_regs[UREG_G2] = arg;
+		return 0;
+	}
+
+	parent_sf = ((struct sparc_stackf *) regs) - 1;
+	memcpy(child_trap_frame, parent_sf, child_stack_sz);
+	if (t->flags & _TIF_32BIT) {
+		sp &= 0x00000000ffffffffUL;
+		regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+	}
+	t->kregs->u_regs[UREG_FP] = sp;
+	__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
+		(regs->tstate + 1) & TSTATE_CWP;
+	t->current_ds = ASI_AIUS;
+	if (sp != regs->u_regs[UREG_FP]) {
+		unsigned long csp;
+
+		csp = clone_stackframe(sp, regs->u_regs[UREG_FP]);
+		if (!csp)
+			return -EFAULT;
+		t->kregs->u_regs[UREG_FP] = csp;
+	}
+	if (t->utraps)
+		t->utraps[0]++;
+
+	/* Set the return value for the child. */
+	t->kregs->u_regs[UREG_I0] = current->pid;
+	t->kregs->u_regs[UREG_I1] = 1;
+
+	/* Set the second return value for the parent. */
+	regs->u_regs[UREG_I1] = 0;
+
+	if (clone_flags & CLONE_SETTLS)
+		t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
+
+	return 0;
+}
+
+/* TIF_MCDPER in thread info flags for current task is updated lazily upon
+ * a context switch. Update this flag in current task's thread flags
+ * before dup so the dup'd task will inherit the current TIF_MCDPER flag.
+ */
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	if (adi_capable()) {
+		register unsigned long tmp_mcdper;
+
+		__asm__ __volatile__(
+			".word 0x83438000\n\t"	/* rd  %mcdper, %g1 */
+			"mov %%g1, %0\n\t"
+			: "=r" (tmp_mcdper)
+			:
+			: "g1");
+		if (tmp_mcdper)
+			set_thread_flag(TIF_MCDPER);
+		else
+			clear_thread_flag(TIF_MCDPER);
+	}
+
+	*dst = *src;
+	return 0;
+}
+
+typedef struct {
+	union {
+		unsigned int	pr_regs[32];
+		unsigned long	pr_dregs[16];
+	} pr_fr;
+	unsigned int __unused;
+	unsigned int	pr_fsr;
+	unsigned char	pr_qcnt;
+	unsigned char	pr_q_entrysize;
+	unsigned char	pr_en;
+	unsigned int	pr_q[64];
+} elf_fpregset_t32;
+
+/*
+ * fill in the fpu structure for a core dump.
+ */
+int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs)
+{
+	unsigned long *kfpregs = current_thread_info()->fpregs;
+	unsigned long fprs = current_thread_info()->fpsaved[0];
+
+	if (test_thread_flag(TIF_32BIT)) {
+		elf_fpregset_t32 *fpregs32 = (elf_fpregset_t32 *)fpregs;
+
+		if (fprs & FPRS_DL)
+			memcpy(&fpregs32->pr_fr.pr_regs[0], kfpregs,
+			       sizeof(unsigned int) * 32);
+		else
+			memset(&fpregs32->pr_fr.pr_regs[0], 0,
+			       sizeof(unsigned int) * 32);
+		fpregs32->pr_qcnt = 0;
+		fpregs32->pr_q_entrysize = 8;
+		memset(&fpregs32->pr_q[0], 0,
+		       (sizeof(unsigned int) * 64));
+		if (fprs & FPRS_FEF) {
+			fpregs32->pr_fsr = (unsigned int) current_thread_info()->xfsr[0];
+			fpregs32->pr_en = 1;
+		} else {
+			fpregs32->pr_fsr = 0;
+			fpregs32->pr_en = 0;
+		}
+	} else {
+		if(fprs & FPRS_DL)
+			memcpy(&fpregs->pr_regs[0], kfpregs,
+			       sizeof(unsigned int) * 32);
+		else
+			memset(&fpregs->pr_regs[0], 0,
+			       sizeof(unsigned int) * 32);
+		if(fprs & FPRS_DU)
+			memcpy(&fpregs->pr_regs[16], kfpregs+16,
+			       sizeof(unsigned int) * 32);
+		else
+			memset(&fpregs->pr_regs[16], 0,
+			       sizeof(unsigned int) * 32);
+		if(fprs & FPRS_FEF) {
+			fpregs->pr_fsr = current_thread_info()->xfsr[0];
+			fpregs->pr_gsr = current_thread_info()->gsr[0];
+		} else {
+			fpregs->pr_fsr = fpregs->pr_gsr = 0;
+		}
+		fpregs->pr_fprs = fprs;
+	}
+	return 1;
+}
+EXPORT_SYMBOL(dump_fpu);
+
+unsigned long get_wchan(struct task_struct *task)
+{
+	unsigned long pc, fp, bias = 0;
+	struct thread_info *tp;
+	struct reg_window *rw;
+        unsigned long ret = 0;
+	int count = 0; 
+
+	if (!task || task == current ||
+            task->state == TASK_RUNNING)
+		goto out;
+
+	tp = task_thread_info(task);
+	bias = STACK_BIAS;
+	fp = task_thread_info(task)->ksp + bias;
+
+	do {
+		if (!kstack_valid(tp, fp))
+			break;
+		rw = (struct reg_window *) fp;
+		pc = rw->ins[7];
+		if (!in_sched_functions(pc)) {
+			ret = pc;
+			goto out;
+		}
+		fp = rw->ins[6] + bias;
+	} while (++count < 16);
+
+out:
+	return ret;
+}
diff --git a/arch/sparc/kernel/prom.h b/arch/sparc/kernel/prom.h
new file mode 100644
index 0000000..26a1cca
--- /dev/null
+++ b/arch/sparc/kernel/prom.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PROM_H
+#define __PROM_H
+
+#include <linux/spinlock.h>
+#include <asm/prom.h>
+
+void of_console_init(void);
+
+extern unsigned int prom_early_allocated;
+
+#endif /* __PROM_H */
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
new file mode 100644
index 0000000..b51cbb9
--- /dev/null
+++ b/arch/sparc/kernel/prom_32.c
@@ -0,0 +1,337 @@
+/*
+ * Procedures for creating, accessing and interpreting the device tree.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ * 
+ *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ *    {engebret|bergner}@us.ibm.com 
+ *
+ *  Adapted for sparc32 by David S. Miller davem@davemloft.net
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+
+#include <asm/prom.h>
+#include <asm/oplib.h>
+#include <asm/leon.h>
+#include <asm/leon_amba.h>
+
+#include "prom.h"
+
+void * __init prom_early_alloc(unsigned long size)
+{
+	void *ret;
+
+	ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+	if (ret != NULL)
+		memset(ret, 0, size);
+
+	prom_early_allocated += size;
+
+	return ret;
+}
+
+/* The following routines deal with the black magic of fully naming a
+ * node.
+ *
+ * Certain well known named nodes are just the simple name string.
+ *
+ * Actual devices have an address specifier appended to the base name
+ * string, like this "foo@addr".  The "addr" can be in any number of
+ * formats, and the platform plus the type of the node determine the
+ * format and how it is constructed.
+ *
+ * For children of the ROOT node, the naming convention is fixed and
+ * determined by whether this is a sun4u or sun4v system.
+ *
+ * For children of other nodes, it is bus type specific.  So
+ * we walk up the tree until we discover a "device_type" property
+ * we recognize and we go from there.
+ */
+static void __init sparc32_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom_registers *regs;
+	struct property *rprop;
+
+	rprop = of_find_property(dp, "reg", NULL);
+	if (!rprop)
+		return;
+
+	regs = rprop->value;
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
+		regs->which_io, regs->phys_addr);
+}
+
+/* "name@slot,offset"  */
+static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom_registers *regs;
+	struct property *prop;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
+		regs->which_io,
+		regs->phys_addr);
+}
+
+/* "name@devnum[,func]" */
+static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom_pci_registers *regs;
+	struct property *prop;
+	unsigned int devfn;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+	devfn = (regs->phys_hi >> 8) & 0xff;
+	if (devfn & 0x07) {
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
+			devfn >> 3,
+			devfn & 0x07);
+	} else {
+		sprintf(tmp_buf, "%s@%x",
+			dp->name,
+			devfn >> 3);
+	}
+}
+
+/* "name@addrhi,addrlo" */
+static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom_registers *regs;
+	struct property *prop;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
+		regs->which_io, regs->phys_addr);
+}
+
+/* "name:vendor:device@irq,addrlo" */
+static void __init ambapp_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct amba_prom_registers *regs;
+	unsigned int *intr, *device, *vendor, reg0;
+	struct property *prop;
+	int interrupt = 0;
+
+	/* In order to get a unique ID in the device tree (multiple AMBA devices
+	 * may have the same name) the node number is printed
+	 */
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop) {
+		reg0 = (unsigned int)dp->phandle;
+	} else {
+		regs = prop->value;
+		reg0 = regs->phys_addr;
+	}
+
+	/* Not all cores have Interrupt */
+	prop = of_find_property(dp, "interrupts", NULL);
+	if (!prop)
+		intr = &interrupt; /* IRQ0 does not exist */
+	else
+		intr = prop->value;
+
+	prop = of_find_property(dp, "vendor", NULL);
+	if (!prop)
+		return;
+	vendor = prop->value;
+	prop = of_find_property(dp, "device", NULL);
+	if (!prop)
+		return;
+	device = prop->value;
+
+	sprintf(tmp_buf, "%s:%d:%d@%x,%x",
+		dp->name, *vendor, *device,
+		*intr, reg0);
+}
+
+static void __init __build_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct device_node *parent = dp->parent;
+
+	if (parent != NULL) {
+		if (!strcmp(parent->type, "pci") ||
+		    !strcmp(parent->type, "pciex"))
+			return pci_path_component(dp, tmp_buf);
+		if (!strcmp(parent->type, "sbus"))
+			return sbus_path_component(dp, tmp_buf);
+		if (!strcmp(parent->type, "ebus"))
+			return ebus_path_component(dp, tmp_buf);
+		if (!strcmp(parent->type, "ambapp"))
+			return ambapp_path_component(dp, tmp_buf);
+
+		/* "isa" is handled with platform naming */
+	}
+
+	/* Use platform naming convention.  */
+	return sparc32_path_component(dp, tmp_buf);
+}
+
+char * __init build_path_component(struct device_node *dp)
+{
+	char tmp_buf[64], *n;
+
+	tmp_buf[0] = '\0';
+	__build_path_component(dp, tmp_buf);
+	if (tmp_buf[0] == '\0')
+		strcpy(tmp_buf, dp->name);
+
+	n = prom_early_alloc(strlen(tmp_buf) + 1);
+	strcpy(n, tmp_buf);
+
+	return n;
+}
+
+extern void restore_current(void);
+
+void __init of_console_init(void)
+{
+	char *msg = "OF stdout device is: %s\n";
+	struct device_node *dp;
+	unsigned long flags;
+	const char *type;
+	phandle node;
+	int skip, tmp, fd;
+
+	of_console_path = prom_early_alloc(256);
+
+	switch (prom_vers) {
+	case PROM_V0:
+		skip = 0;
+		switch (*romvec->pv_stdout) {
+		case PROMDEV_SCREEN:
+			type = "display";
+			break;
+
+		case PROMDEV_TTYB:
+			skip = 1;
+			/* FALLTHRU */
+
+		case PROMDEV_TTYA:
+			type = "serial";
+			break;
+
+		default:
+			prom_printf("Invalid PROM_V0 stdout value %u\n",
+				    *romvec->pv_stdout);
+			prom_halt();
+		}
+
+		tmp = skip;
+		for_each_node_by_type(dp, type) {
+			if (!tmp--)
+				break;
+		}
+		if (!dp) {
+			prom_printf("Cannot find PROM_V0 console node.\n");
+			prom_halt();
+		}
+		of_console_device = dp;
+
+		strcpy(of_console_path, dp->full_name);
+		if (!strcmp(type, "serial")) {
+			strcat(of_console_path,
+			       (skip ? ":b" : ":a"));
+		}
+		break;
+
+	default:
+	case PROM_V2:
+	case PROM_V3:
+		fd = *romvec->pv_v2bootargs.fd_stdout;
+
+		spin_lock_irqsave(&prom_lock, flags);
+		node = (*romvec->pv_v2devops.v2_inst2pkg)(fd);
+		restore_current();
+		spin_unlock_irqrestore(&prom_lock, flags);
+
+		if (!node) {
+			prom_printf("Cannot resolve stdout node from "
+				    "instance %08x.\n", fd);
+			prom_halt();
+		}
+		dp = of_find_node_by_phandle(node);
+		type = of_get_property(dp, "device_type", NULL);
+
+		if (!type) {
+			prom_printf("Console stdout lacks "
+				    "device_type property.\n");
+			prom_halt();
+		}
+
+		if (strcmp(type, "display") && strcmp(type, "serial")) {
+			prom_printf("Console device_type is neither display "
+				    "nor serial.\n");
+			prom_halt();
+		}
+
+		of_console_device = dp;
+
+		if (prom_vers == PROM_V2) {
+			strcpy(of_console_path, dp->full_name);
+			switch (*romvec->pv_stdout) {
+			case PROMDEV_TTYA:
+				strcat(of_console_path, ":a");
+				break;
+			case PROMDEV_TTYB:
+				strcat(of_console_path, ":b");
+				break;
+			}
+		} else {
+			const char *path;
+
+			dp = of_find_node_by_path("/");
+			path = of_get_property(dp, "stdout-path", NULL);
+			if (!path) {
+				prom_printf("No stdout-path in root node.\n");
+				prom_halt();
+			}
+			strcpy(of_console_path, path);
+		}
+		break;
+	}
+
+	of_console_options = strrchr(of_console_path, ':');
+	if (of_console_options) {
+		of_console_options++;
+		if (*of_console_options == '\0')
+			of_console_options = NULL;
+	}
+
+	printk(msg, of_console_path);
+}
+
+void __init of_fill_in_cpu_data(void)
+{
+}
+
+void __init irq_trans_init(struct device_node *dp)
+{
+}
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
new file mode 100644
index 0000000..baeaeed
--- /dev/null
+++ b/arch/sparc/kernel/prom_64.c
@@ -0,0 +1,635 @@
+/*
+ * Procedures for creating, accessing and interpreting the device tree.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ * 
+ *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ *    {engebret|bergner}@us.ibm.com 
+ *
+ *  Adapted for sparc64 by David S. Miller davem@davemloft.net
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+
+#include <asm/prom.h>
+#include <asm/oplib.h>
+#include <asm/irq.h>
+#include <asm/asi.h>
+#include <asm/upa.h>
+#include <asm/smp.h>
+
+#include "prom.h"
+
+void * __init prom_early_alloc(unsigned long size)
+{
+	unsigned long paddr = memblock_alloc(size, SMP_CACHE_BYTES);
+	void *ret;
+
+	if (!paddr) {
+		prom_printf("prom_early_alloc(%lu) failed\n", size);
+		prom_halt();
+	}
+
+	ret = __va(paddr);
+	memset(ret, 0, size);
+	prom_early_allocated += size;
+
+	return ret;
+}
+
+/* The following routines deal with the black magic of fully naming a
+ * node.
+ *
+ * Certain well known named nodes are just the simple name string.
+ *
+ * Actual devices have an address specifier appended to the base name
+ * string, like this "foo@addr".  The "addr" can be in any number of
+ * formats, and the platform plus the type of the node determine the
+ * format and how it is constructed.
+ *
+ * For children of the ROOT node, the naming convention is fixed and
+ * determined by whether this is a sun4u or sun4v system.
+ *
+ * For children of other nodes, it is bus type specific.  So
+ * we walk up the tree until we discover a "device_type" property
+ * we recognize and we go from there.
+ *
+ * As an example, the boot device on my workstation has a full path:
+ *
+ *	/pci@1e,600000/ide@d/disk@0,0:c
+ */
+static void __init sun4v_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom64_registers *regs;
+	struct property *rprop;
+	u32 high_bits, low_bits, type;
+
+	rprop = of_find_property(dp, "reg", NULL);
+	if (!rprop)
+		return;
+
+	regs = rprop->value;
+	if (!of_node_is_root(dp->parent)) {
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
+			(unsigned int) (regs->phys_addr >> 32UL),
+			(unsigned int) (regs->phys_addr & 0xffffffffUL));
+		return;
+	}
+
+	type = regs->phys_addr >> 60UL;
+	high_bits = (regs->phys_addr >> 32UL) & 0x0fffffffUL;
+	low_bits = (regs->phys_addr & 0xffffffffUL);
+
+	if (type == 0 || type == 8) {
+		const char *prefix = (type == 0) ? "m" : "i";
+
+		if (low_bits)
+			sprintf(tmp_buf, "%s@%s%x,%x",
+				dp->name, prefix,
+				high_bits, low_bits);
+		else
+			sprintf(tmp_buf, "%s@%s%x",
+				dp->name,
+				prefix,
+				high_bits);
+	} else if (type == 12) {
+		sprintf(tmp_buf, "%s@%x",
+			dp->name, high_bits);
+	}
+}
+
+static void __init sun4u_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom64_registers *regs;
+	struct property *prop;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+	if (!of_node_is_root(dp->parent)) {
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
+			(unsigned int) (regs->phys_addr >> 32UL),
+			(unsigned int) (regs->phys_addr & 0xffffffffUL));
+		return;
+	}
+
+	prop = of_find_property(dp, "upa-portid", NULL);
+	if (!prop)
+		prop = of_find_property(dp, "portid", NULL);
+	if (prop) {
+		unsigned long mask = 0xffffffffUL;
+
+		if (tlb_type >= cheetah)
+			mask = 0x7fffff;
+
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
+			*(u32 *)prop->value,
+			(unsigned int) (regs->phys_addr & mask));
+	}
+}
+
+/* "name@slot,offset"  */
+static void __init sbus_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom_registers *regs;
+	struct property *prop;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
+		regs->which_io,
+		regs->phys_addr);
+}
+
+/* "name@devnum[,func]" */
+static void __init pci_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom_pci_registers *regs;
+	struct property *prop;
+	unsigned int devfn;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+	devfn = (regs->phys_hi >> 8) & 0xff;
+	if (devfn & 0x07) {
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name,
+			devfn >> 3,
+			devfn & 0x07);
+	} else {
+		sprintf(tmp_buf, "%s@%x",
+			dp->name,
+			devfn >> 3);
+	}
+}
+
+/* "name@UPA_PORTID,offset" */
+static void __init upa_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom64_registers *regs;
+	struct property *prop;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+
+	prop = of_find_property(dp, "upa-portid", NULL);
+	if (!prop)
+		return;
+
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
+		*(u32 *) prop->value,
+		(unsigned int) (regs->phys_addr & 0xffffffffUL));
+}
+
+/* "name@reg" */
+static void __init vdev_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct property *prop;
+	u32 *regs;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+
+	sprintf(tmp_buf, "%s@%x", dp->name, *regs);
+}
+
+/* "name@addrhi,addrlo" */
+static void __init ebus_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct linux_prom64_registers *regs;
+	struct property *prop;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name,
+		(unsigned int) (regs->phys_addr >> 32UL),
+		(unsigned int) (regs->phys_addr & 0xffffffffUL));
+}
+
+/* "name@bus,addr" */
+static void __init i2c_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct property *prop;
+	u32 *regs;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+
+	/* This actually isn't right... should look at the #address-cells
+	 * property of the i2c bus node etc. etc.
+	 */
+	sprintf(tmp_buf, "%s@%x,%x",
+		dp->name, regs[0], regs[1]);
+}
+
+/* "name@reg0[,reg1]" */
+static void __init usb_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct property *prop;
+	u32 *regs;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+
+	if (prop->length == sizeof(u32) || regs[1] == 1) {
+		sprintf(tmp_buf, "%s@%x",
+			dp->name, regs[0]);
+	} else {
+		sprintf(tmp_buf, "%s@%x,%x",
+			dp->name, regs[0], regs[1]);
+	}
+}
+
+/* "name@reg0reg1[,reg2reg3]" */
+static void __init ieee1394_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct property *prop;
+	u32 *regs;
+
+	prop = of_find_property(dp, "reg", NULL);
+	if (!prop)
+		return;
+
+	regs = prop->value;
+
+	if (regs[2] || regs[3]) {
+		sprintf(tmp_buf, "%s@%08x%08x,%04x%08x",
+			dp->name, regs[0], regs[1], regs[2], regs[3]);
+	} else {
+		sprintf(tmp_buf, "%s@%08x%08x",
+			dp->name, regs[0], regs[1]);
+	}
+}
+
+static void __init __build_path_component(struct device_node *dp, char *tmp_buf)
+{
+	struct device_node *parent = dp->parent;
+
+	if (parent != NULL) {
+		if (!strcmp(parent->type, "pci") ||
+		    !strcmp(parent->type, "pciex")) {
+			pci_path_component(dp, tmp_buf);
+			return;
+		}
+		if (!strcmp(parent->type, "sbus")) {
+			sbus_path_component(dp, tmp_buf);
+			return;
+		}
+		if (!strcmp(parent->type, "upa")) {
+			upa_path_component(dp, tmp_buf);
+			return;
+		}
+		if (!strcmp(parent->type, "ebus")) {
+			ebus_path_component(dp, tmp_buf);
+			return;
+		}
+		if (!strcmp(parent->name, "usb") ||
+		    !strcmp(parent->name, "hub")) {
+			usb_path_component(dp, tmp_buf);
+			return;
+		}
+		if (!strcmp(parent->type, "i2c")) {
+			i2c_path_component(dp, tmp_buf);
+			return;
+		}
+		if (!strcmp(parent->type, "firewire")) {
+			ieee1394_path_component(dp, tmp_buf);
+			return;
+		}
+		if (!strcmp(parent->type, "virtual-devices")) {
+			vdev_path_component(dp, tmp_buf);
+			return;
+		}
+		/* "isa" is handled with platform naming */
+	}
+
+	/* Use platform naming convention.  */
+	if (tlb_type == hypervisor) {
+		sun4v_path_component(dp, tmp_buf);
+		return;
+	} else {
+		sun4u_path_component(dp, tmp_buf);
+	}
+}
+
+char * __init build_path_component(struct device_node *dp)
+{
+	char tmp_buf[64], *n;
+
+	tmp_buf[0] = '\0';
+	__build_path_component(dp, tmp_buf);
+	if (tmp_buf[0] == '\0')
+		strcpy(tmp_buf, dp->name);
+
+	n = prom_early_alloc(strlen(tmp_buf) + 1);
+	strcpy(n, tmp_buf);
+
+	return n;
+}
+
+static const char *get_mid_prop(void)
+{
+	return (tlb_type == spitfire ? "upa-portid" : "portid");
+}
+
+bool arch_find_n_match_cpu_physical_id(struct device_node *cpun,
+				       int cpu, unsigned int *thread)
+{
+	const char *mid_prop = get_mid_prop();
+	int this_cpu_id;
+
+	/* On hypervisor based platforms we interrogate the 'reg'
+	 * property.  On everything else we look for a 'upa-portid',
+	 * 'portid', or 'cpuid' property.
+	 */
+
+	if (tlb_type == hypervisor) {
+		struct property *prop = of_find_property(cpun, "reg", NULL);
+		u32 *regs;
+
+		if (!prop) {
+			pr_warn("CPU node missing reg property\n");
+			return false;
+		}
+		regs = prop->value;
+		this_cpu_id = regs[0] & 0x0fffffff;
+	} else {
+		this_cpu_id = of_getintprop_default(cpun, mid_prop, -1);
+
+		if (this_cpu_id < 0) {
+			mid_prop = "cpuid";
+			this_cpu_id = of_getintprop_default(cpun, mid_prop, -1);
+		}
+		if (this_cpu_id < 0) {
+			pr_warn("CPU node missing cpu ID property\n");
+			return false;
+		}
+	}
+	if (this_cpu_id == cpu) {
+		if (thread) {
+			int proc_id = cpu_data(cpu).proc_id;
+
+			/* On sparc64, the cpu thread information is obtained
+			 * either from OBP or the machine description.  We've
+			 * actually probed this information already long before
+			 * this interface gets called so instead of interrogating
+			 * both the OF node and the MDESC again, just use what
+			 * we discovered already.
+			 */
+			if (proc_id < 0)
+				proc_id = 0;
+			*thread = proc_id;
+		}
+		return true;
+	}
+	return false;
+}
+
+static void *of_iterate_over_cpus(void *(*func)(struct device_node *, int, int), int arg)
+{
+	struct device_node *dp;
+	const char *mid_prop;
+
+	mid_prop = get_mid_prop();
+	for_each_node_by_type(dp, "cpu") {
+		int cpuid = of_getintprop_default(dp, mid_prop, -1);
+		const char *this_mid_prop = mid_prop;
+		void *ret;
+
+		if (cpuid < 0) {
+			this_mid_prop = "cpuid";
+			cpuid = of_getintprop_default(dp, this_mid_prop, -1);
+		}
+		if (cpuid < 0) {
+			prom_printf("OF: Serious problem, cpu lacks "
+				    "%s property", this_mid_prop);
+			prom_halt();
+		}
+#ifdef CONFIG_SMP
+		if (cpuid >= NR_CPUS) {
+			printk(KERN_WARNING "Ignoring CPU %d which is "
+			       ">= NR_CPUS (%d)\n",
+			       cpuid, NR_CPUS);
+			continue;
+		}
+#endif
+		ret = func(dp, cpuid, arg);
+		if (ret)
+			return ret;
+	}
+	return NULL;
+}
+
+static void *check_cpu_node(struct device_node *dp, int cpuid, int id)
+{
+	if (id == cpuid)
+		return dp;
+	return NULL;
+}
+
+struct device_node *of_find_node_by_cpuid(int cpuid)
+{
+	return of_iterate_over_cpus(check_cpu_node, cpuid);
+}
+
+static void *record_one_cpu(struct device_node *dp, int cpuid, int arg)
+{
+	ncpus_probed++;
+#ifdef CONFIG_SMP
+	set_cpu_present(cpuid, true);
+	set_cpu_possible(cpuid, true);
+#endif
+	return NULL;
+}
+
+void __init of_populate_present_mask(void)
+{
+	if (tlb_type == hypervisor)
+		return;
+
+	ncpus_probed = 0;
+	of_iterate_over_cpus(record_one_cpu, 0);
+}
+
+static void *fill_in_one_cpu(struct device_node *dp, int cpuid, int arg)
+{
+	struct device_node *portid_parent = NULL;
+	int portid = -1;
+
+	if (of_find_property(dp, "cpuid", NULL)) {
+		int limit = 2;
+
+		portid_parent = dp;
+		while (limit--) {
+			portid_parent = portid_parent->parent;
+			if (!portid_parent)
+				break;
+			portid = of_getintprop_default(portid_parent,
+						       "portid", -1);
+			if (portid >= 0)
+				break;
+		}
+	}
+
+#ifndef CONFIG_SMP
+	/* On uniprocessor we only want the values for the
+	 * real physical cpu the kernel booted onto, however
+	 * cpu_data() only has one entry at index 0.
+	 */
+	if (cpuid != real_hard_smp_processor_id())
+		return NULL;
+	cpuid = 0;
+#endif
+
+	cpu_data(cpuid).clock_tick =
+		of_getintprop_default(dp, "clock-frequency", 0);
+
+	if (portid_parent) {
+		cpu_data(cpuid).dcache_size =
+			of_getintprop_default(dp, "l1-dcache-size",
+					      16 * 1024);
+		cpu_data(cpuid).dcache_line_size =
+			of_getintprop_default(dp, "l1-dcache-line-size",
+					      32);
+		cpu_data(cpuid).icache_size =
+			of_getintprop_default(dp, "l1-icache-size",
+					      8 * 1024);
+		cpu_data(cpuid).icache_line_size =
+			of_getintprop_default(dp, "l1-icache-line-size",
+					      32);
+		cpu_data(cpuid).ecache_size =
+			of_getintprop_default(dp, "l2-cache-size", 0);
+		cpu_data(cpuid).ecache_line_size =
+			of_getintprop_default(dp, "l2-cache-line-size", 0);
+		if (!cpu_data(cpuid).ecache_size ||
+		    !cpu_data(cpuid).ecache_line_size) {
+			cpu_data(cpuid).ecache_size =
+				of_getintprop_default(portid_parent,
+						      "l2-cache-size",
+						      (4 * 1024 * 1024));
+			cpu_data(cpuid).ecache_line_size =
+				of_getintprop_default(portid_parent,
+						      "l2-cache-line-size", 64);
+		}
+
+		cpu_data(cpuid).core_id = portid + 1;
+		cpu_data(cpuid).proc_id = portid;
+	} else {
+		cpu_data(cpuid).dcache_size =
+			of_getintprop_default(dp, "dcache-size", 16 * 1024);
+		cpu_data(cpuid).dcache_line_size =
+			of_getintprop_default(dp, "dcache-line-size", 32);
+
+		cpu_data(cpuid).icache_size =
+			of_getintprop_default(dp, "icache-size", 16 * 1024);
+		cpu_data(cpuid).icache_line_size =
+			of_getintprop_default(dp, "icache-line-size", 32);
+
+		cpu_data(cpuid).ecache_size =
+			of_getintprop_default(dp, "ecache-size",
+					      (4 * 1024 * 1024));
+		cpu_data(cpuid).ecache_line_size =
+			of_getintprop_default(dp, "ecache-line-size", 64);
+
+		cpu_data(cpuid).core_id = 0;
+		cpu_data(cpuid).proc_id = -1;
+	}
+
+	return NULL;
+}
+
+void __init of_fill_in_cpu_data(void)
+{
+	if (tlb_type == hypervisor)
+		return;
+
+	of_iterate_over_cpus(fill_in_one_cpu, 0);
+
+	smp_fill_in_sib_core_maps();
+}
+
+void __init of_console_init(void)
+{
+	char *msg = "OF stdout device is: %s\n";
+	struct device_node *dp;
+	const char *type;
+	phandle node;
+
+	of_console_path = prom_early_alloc(256);
+	if (prom_ihandle2path(prom_stdout, of_console_path, 256) < 0) {
+		prom_printf("Cannot obtain path of stdout.\n");
+		prom_halt();
+	}
+	of_console_options = strrchr(of_console_path, ':');
+	if (of_console_options) {
+		of_console_options++;
+		if (*of_console_options == '\0')
+			of_console_options = NULL;
+	}
+
+	node = prom_inst2pkg(prom_stdout);
+	if (!node) {
+		prom_printf("Cannot resolve stdout node from "
+			    "instance %08x.\n", prom_stdout);
+		prom_halt();
+	}
+
+	dp = of_find_node_by_phandle(node);
+	type = of_get_property(dp, "device_type", NULL);
+	if (!type) {
+		prom_printf("Console stdout lacks device_type property.\n");
+		prom_halt();
+	}
+
+	if (strcmp(type, "display") && strcmp(type, "serial")) {
+		prom_printf("Console device_type is neither display "
+			    "nor serial.\n");
+		prom_halt();
+	}
+
+	of_console_device = dp;
+
+	printk(msg, of_console_path);
+}
diff --git a/arch/sparc/kernel/prom_common.c b/arch/sparc/kernel/prom_common.c
new file mode 100644
index 0000000..79cc0d1
--- /dev/null
+++ b/arch/sparc/kernel/prom_common.c
@@ -0,0 +1,162 @@
+/* prom_common.c: OF device tree support common code.
+ *
+ * Paul Mackerras	August 1996.
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ *  Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
+ *    {engebret|bergner}@us.ibm.com
+ *
+ *  Adapted for sparc by David S. Miller davem@davemloft.net
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_pdt.h>
+#include <asm/prom.h>
+#include <asm/oplib.h>
+
+#include "prom.h"
+
+struct device_node *of_console_device;
+EXPORT_SYMBOL(of_console_device);
+
+char *of_console_path;
+EXPORT_SYMBOL(of_console_path);
+
+char *of_console_options;
+EXPORT_SYMBOL(of_console_options);
+
+int of_getintprop_default(struct device_node *np, const char *name, int def)
+{
+	struct property *prop;
+	int len;
+
+	prop = of_find_property(np, name, &len);
+	if (!prop || len != 4)
+		return def;
+
+	return *(int *) prop->value;
+}
+EXPORT_SYMBOL(of_getintprop_default);
+
+DEFINE_MUTEX(of_set_property_mutex);
+EXPORT_SYMBOL(of_set_property_mutex);
+
+int of_set_property(struct device_node *dp, const char *name, void *val, int len)
+{
+	struct property **prevp;
+	unsigned long flags;
+	void *new_val;
+	int err;
+
+	new_val = kmemdup(val, len, GFP_KERNEL);
+	if (!new_val)
+		return -ENOMEM;
+
+	err = -ENODEV;
+
+	mutex_lock(&of_set_property_mutex);
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	prevp = &dp->properties;
+	while (*prevp) {
+		struct property *prop = *prevp;
+
+		if (!strcasecmp(prop->name, name)) {
+			void *old_val = prop->value;
+			int ret;
+
+			ret = prom_setprop(dp->phandle, name, val, len);
+
+			err = -EINVAL;
+			if (ret >= 0) {
+				prop->value = new_val;
+				prop->length = len;
+
+				if (OF_IS_DYNAMIC(prop))
+					kfree(old_val);
+
+				OF_MARK_DYNAMIC(prop);
+
+				err = 0;
+			}
+			break;
+		}
+		prevp = &(*prevp)->next;
+	}
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+	mutex_unlock(&of_set_property_mutex);
+
+	/* XXX Upate procfs if necessary... */
+
+	return err;
+}
+EXPORT_SYMBOL(of_set_property);
+
+int of_find_in_proplist(const char *list, const char *match, int len)
+{
+	while (len > 0) {
+		int l;
+
+		if (!strcmp(list, match))
+			return 1;
+		l = strlen(list) + 1;
+		list += l;
+		len -= l;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(of_find_in_proplist);
+
+/*
+ * SPARC32 and SPARC64's prom_nextprop() do things differently
+ * here, despite sharing the same interface.  SPARC32 doesn't fill in 'buf',
+ * returning NULL on an error.  SPARC64 fills in 'buf', but sets it to an
+ * empty string upon error.
+ */
+static int __init handle_nextprop_quirks(char *buf, const char *name)
+{
+	if (!name || strlen(name) == 0)
+		return -1;
+
+#ifdef CONFIG_SPARC32
+	strcpy(buf, name);
+#endif
+	return 0;
+}
+
+static int __init prom_common_nextprop(phandle node, char *prev, char *buf)
+{
+	const char *name;
+
+	buf[0] = '\0';
+	name = prom_nextprop(node, prev, buf);
+	return handle_nextprop_quirks(buf, name);
+}
+
+unsigned int prom_early_allocated __initdata;
+
+static struct of_pdt_ops prom_sparc_ops __initdata = {
+	.nextprop = prom_common_nextprop,
+	.getproplen = prom_getproplen,
+	.getproperty = prom_getproperty,
+	.getchild = prom_getchild,
+	.getsibling = prom_getsibling,
+};
+
+void __init prom_build_devicetree(void)
+{
+	of_pdt_build_devicetree(prom_root_node, &prom_sparc_ops);
+	of_console_init();
+
+	pr_info("PROM: Built device tree with %u bytes of memory.\n",
+			prom_early_allocated);
+}
diff --git a/arch/sparc/kernel/prom_irqtrans.c b/arch/sparc/kernel/prom_irqtrans.c
new file mode 100644
index 0000000..f3fecac
--- /dev/null
+++ b/arch/sparc/kernel/prom_irqtrans.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/irq.h>
+#include <asm/upa.h>
+
+#include "prom.h"
+
+#ifdef CONFIG_PCI
+/* PSYCHO interrupt mapping support. */
+#define PSYCHO_IMAP_A_SLOT0	0x0c00UL
+#define PSYCHO_IMAP_B_SLOT0	0x0c20UL
+static unsigned long psycho_pcislot_imap_offset(unsigned long ino)
+{
+	unsigned int bus =  (ino & 0x10) >> 4;
+	unsigned int slot = (ino & 0x0c) >> 2;
+
+	if (bus == 0)
+		return PSYCHO_IMAP_A_SLOT0 + (slot * 8);
+	else
+		return PSYCHO_IMAP_B_SLOT0 + (slot * 8);
+}
+
+#define PSYCHO_OBIO_IMAP_BASE	0x1000UL
+
+#define PSYCHO_ONBOARD_IRQ_BASE		0x20
+#define psycho_onboard_imap_offset(__ino) \
+	(PSYCHO_OBIO_IMAP_BASE + (((__ino) & 0x1f) << 3))
+
+#define PSYCHO_ICLR_A_SLOT0	0x1400UL
+#define PSYCHO_ICLR_SCSI	0x1800UL
+
+#define psycho_iclr_offset(ino)					      \
+	((ino & 0x20) ? (PSYCHO_ICLR_SCSI + (((ino) & 0x1f) << 3)) :  \
+			(PSYCHO_ICLR_A_SLOT0 + (((ino) & 0x1f)<<3)))
+
+static unsigned int psycho_irq_build(struct device_node *dp,
+				     unsigned int ino,
+				     void *_data)
+{
+	unsigned long controller_regs = (unsigned long) _data;
+	unsigned long imap, iclr;
+	unsigned long imap_off, iclr_off;
+	int inofixup = 0;
+
+	ino &= 0x3f;
+	if (ino < PSYCHO_ONBOARD_IRQ_BASE) {
+		/* PCI slot */
+		imap_off = psycho_pcislot_imap_offset(ino);
+	} else {
+		/* Onboard device */
+		imap_off = psycho_onboard_imap_offset(ino);
+	}
+
+	/* Now build the IRQ bucket. */
+	imap = controller_regs + imap_off;
+
+	iclr_off = psycho_iclr_offset(ino);
+	iclr = controller_regs + iclr_off;
+
+	if ((ino & 0x20) == 0)
+		inofixup = ino & 0x03;
+
+	return build_irq(inofixup, iclr, imap);
+}
+
+static void __init psycho_irq_trans_init(struct device_node *dp)
+{
+	const struct linux_prom64_registers *regs;
+
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = psycho_irq_build;
+
+	regs = of_get_property(dp, "reg", NULL);
+	dp->irq_trans->data = (void *) regs[2].phys_addr;
+}
+
+#define sabre_read(__reg) \
+({	u64 __ret; \
+	__asm__ __volatile__("ldxa [%1] %2, %0" \
+			     : "=r" (__ret) \
+			     : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \
+			     : "memory"); \
+	__ret; \
+})
+
+struct sabre_irq_data {
+	unsigned long controller_regs;
+	unsigned int pci_first_busno;
+};
+#define SABRE_CONFIGSPACE	0x001000000UL
+#define SABRE_WRSYNC		0x1c20UL
+
+#define SABRE_CONFIG_BASE(CONFIG_SPACE)	\
+	(CONFIG_SPACE | (1UL << 24))
+#define SABRE_CONFIG_ENCODE(BUS, DEVFN, REG)	\
+	(((unsigned long)(BUS)   << 16) |	\
+	 ((unsigned long)(DEVFN) << 8)  |	\
+	 ((unsigned long)(REG)))
+
+/* When a device lives behind a bridge deeper in the PCI bus topology
+ * than APB, a special sequence must run to make sure all pending DMA
+ * transfers at the time of IRQ delivery are visible in the coherency
+ * domain by the cpu.  This sequence is to perform a read on the far
+ * side of the non-APB bridge, then perform a read of Sabre's DMA
+ * write-sync register.
+ */
+static void sabre_wsync_handler(unsigned int ino, void *_arg1, void *_arg2)
+{
+	unsigned int phys_hi = (unsigned int) (unsigned long) _arg1;
+	struct sabre_irq_data *irq_data = _arg2;
+	unsigned long controller_regs = irq_data->controller_regs;
+	unsigned long sync_reg = controller_regs + SABRE_WRSYNC;
+	unsigned long config_space = controller_regs + SABRE_CONFIGSPACE;
+	unsigned int bus, devfn;
+	u16 _unused;
+
+	config_space = SABRE_CONFIG_BASE(config_space);
+
+	bus = (phys_hi >> 16) & 0xff;
+	devfn = (phys_hi >> 8) & 0xff;
+
+	config_space |= SABRE_CONFIG_ENCODE(bus, devfn, 0x00);
+
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "lduha [%1] %2, %0\n\t"
+			     "membar #Sync"
+			     : "=r" (_unused)
+			     : "r" ((u16 *) config_space),
+			       "i" (ASI_PHYS_BYPASS_EC_E_L)
+			     : "memory");
+
+	sabre_read(sync_reg);
+}
+
+#define SABRE_IMAP_A_SLOT0	0x0c00UL
+#define SABRE_IMAP_B_SLOT0	0x0c20UL
+#define SABRE_ICLR_A_SLOT0	0x1400UL
+#define SABRE_ICLR_B_SLOT0	0x1480UL
+#define SABRE_ICLR_SCSI		0x1800UL
+#define SABRE_ICLR_ETH		0x1808UL
+#define SABRE_ICLR_BPP		0x1810UL
+#define SABRE_ICLR_AU_REC	0x1818UL
+#define SABRE_ICLR_AU_PLAY	0x1820UL
+#define SABRE_ICLR_PFAIL	0x1828UL
+#define SABRE_ICLR_KMS		0x1830UL
+#define SABRE_ICLR_FLPY		0x1838UL
+#define SABRE_ICLR_SHW		0x1840UL
+#define SABRE_ICLR_KBD		0x1848UL
+#define SABRE_ICLR_MS		0x1850UL
+#define SABRE_ICLR_SER		0x1858UL
+#define SABRE_ICLR_UE		0x1870UL
+#define SABRE_ICLR_CE		0x1878UL
+#define SABRE_ICLR_PCIERR	0x1880UL
+
+static unsigned long sabre_pcislot_imap_offset(unsigned long ino)
+{
+	unsigned int bus =  (ino & 0x10) >> 4;
+	unsigned int slot = (ino & 0x0c) >> 2;
+
+	if (bus == 0)
+		return SABRE_IMAP_A_SLOT0 + (slot * 8);
+	else
+		return SABRE_IMAP_B_SLOT0 + (slot * 8);
+}
+
+#define SABRE_OBIO_IMAP_BASE	0x1000UL
+#define SABRE_ONBOARD_IRQ_BASE	0x20
+#define sabre_onboard_imap_offset(__ino) \
+	(SABRE_OBIO_IMAP_BASE + (((__ino) & 0x1f) << 3))
+
+#define sabre_iclr_offset(ino)					      \
+	((ino & 0x20) ? (SABRE_ICLR_SCSI + (((ino) & 0x1f) << 3)) :  \
+			(SABRE_ICLR_A_SLOT0 + (((ino) & 0x1f)<<3)))
+
+static int sabre_device_needs_wsync(struct device_node *dp)
+{
+	struct device_node *parent = dp->parent;
+	const char *parent_model, *parent_compat;
+
+	/* This traversal up towards the root is meant to
+	 * handle two cases:
+	 *
+	 * 1) non-PCI bus sitting under PCI, such as 'ebus'
+	 * 2) the PCI controller interrupts themselves, which
+	 *    will use the sabre_irq_build but do not need
+	 *    the DMA synchronization handling
+	 */
+	while (parent) {
+		if (!strcmp(parent->type, "pci"))
+			break;
+		parent = parent->parent;
+	}
+
+	if (!parent)
+		return 0;
+
+	parent_model = of_get_property(parent,
+				       "model", NULL);
+	if (parent_model &&
+	    (!strcmp(parent_model, "SUNW,sabre") ||
+	     !strcmp(parent_model, "SUNW,simba")))
+		return 0;
+
+	parent_compat = of_get_property(parent,
+					"compatible", NULL);
+	if (parent_compat &&
+	    (!strcmp(parent_compat, "pci108e,a000") ||
+	     !strcmp(parent_compat, "pci108e,a001")))
+		return 0;
+
+	return 1;
+}
+
+static unsigned int sabre_irq_build(struct device_node *dp,
+				    unsigned int ino,
+				    void *_data)
+{
+	struct sabre_irq_data *irq_data = _data;
+	unsigned long controller_regs = irq_data->controller_regs;
+	const struct linux_prom_pci_registers *regs;
+	unsigned long imap, iclr;
+	unsigned long imap_off, iclr_off;
+	int inofixup = 0;
+	int irq;
+
+	ino &= 0x3f;
+	if (ino < SABRE_ONBOARD_IRQ_BASE) {
+		/* PCI slot */
+		imap_off = sabre_pcislot_imap_offset(ino);
+	} else {
+		/* onboard device */
+		imap_off = sabre_onboard_imap_offset(ino);
+	}
+
+	/* Now build the IRQ bucket. */
+	imap = controller_regs + imap_off;
+
+	iclr_off = sabre_iclr_offset(ino);
+	iclr = controller_regs + iclr_off;
+
+	if ((ino & 0x20) == 0)
+		inofixup = ino & 0x03;
+
+	irq = build_irq(inofixup, iclr, imap);
+
+	/* If the parent device is a PCI<->PCI bridge other than
+	 * APB, we have to install a pre-handler to ensure that
+	 * all pending DMA is drained before the interrupt handler
+	 * is run.
+	 */
+	regs = of_get_property(dp, "reg", NULL);
+	if (regs && sabre_device_needs_wsync(dp)) {
+		irq_install_pre_handler(irq,
+					sabre_wsync_handler,
+					(void *) (long) regs->phys_hi,
+					(void *) irq_data);
+	}
+
+	return irq;
+}
+
+static void __init sabre_irq_trans_init(struct device_node *dp)
+{
+	const struct linux_prom64_registers *regs;
+	struct sabre_irq_data *irq_data;
+	const u32 *busrange;
+
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = sabre_irq_build;
+
+	irq_data = prom_early_alloc(sizeof(struct sabre_irq_data));
+
+	regs = of_get_property(dp, "reg", NULL);
+	irq_data->controller_regs = regs[0].phys_addr;
+
+	busrange = of_get_property(dp, "bus-range", NULL);
+	irq_data->pci_first_busno = busrange[0];
+
+	dp->irq_trans->data = irq_data;
+}
+
+/* SCHIZO interrupt mapping support.  Unlike Psycho, for this controller the
+ * imap/iclr registers are per-PBM.
+ */
+#define SCHIZO_IMAP_BASE	0x1000UL
+#define SCHIZO_ICLR_BASE	0x1400UL
+
+static unsigned long schizo_imap_offset(unsigned long ino)
+{
+	return SCHIZO_IMAP_BASE + (ino * 8UL);
+}
+
+static unsigned long schizo_iclr_offset(unsigned long ino)
+{
+	return SCHIZO_ICLR_BASE + (ino * 8UL);
+}
+
+static unsigned long schizo_ino_to_iclr(unsigned long pbm_regs,
+					unsigned int ino)
+{
+
+	return pbm_regs + schizo_iclr_offset(ino);
+}
+
+static unsigned long schizo_ino_to_imap(unsigned long pbm_regs,
+					unsigned int ino)
+{
+	return pbm_regs + schizo_imap_offset(ino);
+}
+
+#define schizo_read(__reg) \
+({	u64 __ret; \
+	__asm__ __volatile__("ldxa [%1] %2, %0" \
+			     : "=r" (__ret) \
+			     : "r" (__reg), "i" (ASI_PHYS_BYPASS_EC_E) \
+			     : "memory"); \
+	__ret; \
+})
+#define schizo_write(__reg, __val) \
+	__asm__ __volatile__("stxa %0, [%1] %2" \
+			     : /* no outputs */ \
+			     : "r" (__val), "r" (__reg), \
+			       "i" (ASI_PHYS_BYPASS_EC_E) \
+			     : "memory")
+
+static void tomatillo_wsync_handler(unsigned int ino, void *_arg1, void *_arg2)
+{
+	unsigned long sync_reg = (unsigned long) _arg2;
+	u64 mask = 1UL << (ino & IMAP_INO);
+	u64 val;
+	int limit;
+
+	schizo_write(sync_reg, mask);
+
+	limit = 100000;
+	val = 0;
+	while (--limit) {
+		val = schizo_read(sync_reg);
+		if (!(val & mask))
+			break;
+	}
+	if (limit <= 0) {
+		printk("tomatillo_wsync_handler: DMA won't sync [%llx:%llx]\n",
+		       val, mask);
+	}
+
+	if (_arg1) {
+		static unsigned char cacheline[64]
+			__attribute__ ((aligned (64)));
+
+		__asm__ __volatile__("rd %%fprs, %0\n\t"
+				     "or %0, %4, %1\n\t"
+				     "wr %1, 0x0, %%fprs\n\t"
+				     "stda %%f0, [%5] %6\n\t"
+				     "wr %0, 0x0, %%fprs\n\t"
+				     "membar #Sync"
+				     : "=&r" (mask), "=&r" (val)
+				     : "0" (mask), "1" (val),
+				     "i" (FPRS_FEF), "r" (&cacheline[0]),
+				     "i" (ASI_BLK_COMMIT_P));
+	}
+}
+
+struct schizo_irq_data {
+	unsigned long pbm_regs;
+	unsigned long sync_reg;
+	u32 portid;
+	int chip_version;
+};
+
+static unsigned int schizo_irq_build(struct device_node *dp,
+				     unsigned int ino,
+				     void *_data)
+{
+	struct schizo_irq_data *irq_data = _data;
+	unsigned long pbm_regs = irq_data->pbm_regs;
+	unsigned long imap, iclr;
+	int ign_fixup;
+	int irq;
+	int is_tomatillo;
+
+	ino &= 0x3f;
+
+	/* Now build the IRQ bucket. */
+	imap = schizo_ino_to_imap(pbm_regs, ino);
+	iclr = schizo_ino_to_iclr(pbm_regs, ino);
+
+	/* On Schizo, no inofixup occurs.  This is because each
+	 * INO has it's own IMAP register.  On Psycho and Sabre
+	 * there is only one IMAP register for each PCI slot even
+	 * though four different INOs can be generated by each
+	 * PCI slot.
+	 *
+	 * But, for JBUS variants (essentially, Tomatillo), we have
+	 * to fixup the lowest bit of the interrupt group number.
+	 */
+	ign_fixup = 0;
+
+	is_tomatillo = (irq_data->sync_reg != 0UL);
+
+	if (is_tomatillo) {
+		if (irq_data->portid & 1)
+			ign_fixup = (1 << 6);
+	}
+
+	irq = build_irq(ign_fixup, iclr, imap);
+
+	if (is_tomatillo) {
+		irq_install_pre_handler(irq,
+					tomatillo_wsync_handler,
+					((irq_data->chip_version <= 4) ?
+					 (void *) 1 : (void *) 0),
+					(void *) irq_data->sync_reg);
+	}
+
+	return irq;
+}
+
+static void __init __schizo_irq_trans_init(struct device_node *dp,
+					   int is_tomatillo)
+{
+	const struct linux_prom64_registers *regs;
+	struct schizo_irq_data *irq_data;
+
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = schizo_irq_build;
+
+	irq_data = prom_early_alloc(sizeof(struct schizo_irq_data));
+
+	regs = of_get_property(dp, "reg", NULL);
+	dp->irq_trans->data = irq_data;
+
+	irq_data->pbm_regs = regs[0].phys_addr;
+	if (is_tomatillo)
+		irq_data->sync_reg = regs[3].phys_addr + 0x1a18UL;
+	else
+		irq_data->sync_reg = 0UL;
+	irq_data->portid = of_getintprop_default(dp, "portid", 0);
+	irq_data->chip_version = of_getintprop_default(dp, "version#", 0);
+}
+
+static void __init schizo_irq_trans_init(struct device_node *dp)
+{
+	__schizo_irq_trans_init(dp, 0);
+}
+
+static void __init tomatillo_irq_trans_init(struct device_node *dp)
+{
+	__schizo_irq_trans_init(dp, 1);
+}
+
+static unsigned int pci_sun4v_irq_build(struct device_node *dp,
+					unsigned int devino,
+					void *_data)
+{
+	u32 devhandle = (u32) (unsigned long) _data;
+
+	return sun4v_build_irq(devhandle, devino);
+}
+
+static void __init pci_sun4v_irq_trans_init(struct device_node *dp)
+{
+	const struct linux_prom64_registers *regs;
+
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = pci_sun4v_irq_build;
+
+	regs = of_get_property(dp, "reg", NULL);
+	dp->irq_trans->data = (void *) (unsigned long)
+		((regs->phys_addr >> 32UL) & 0x0fffffff);
+}
+
+struct fire_irq_data {
+	unsigned long pbm_regs;
+	u32 portid;
+};
+
+#define FIRE_IMAP_BASE	0x001000
+#define FIRE_ICLR_BASE	0x001400
+
+static unsigned long fire_imap_offset(unsigned long ino)
+{
+	return FIRE_IMAP_BASE + (ino * 8UL);
+}
+
+static unsigned long fire_iclr_offset(unsigned long ino)
+{
+	return FIRE_ICLR_BASE + (ino * 8UL);
+}
+
+static unsigned long fire_ino_to_iclr(unsigned long pbm_regs,
+					    unsigned int ino)
+{
+	return pbm_regs + fire_iclr_offset(ino);
+}
+
+static unsigned long fire_ino_to_imap(unsigned long pbm_regs,
+					    unsigned int ino)
+{
+	return pbm_regs + fire_imap_offset(ino);
+}
+
+static unsigned int fire_irq_build(struct device_node *dp,
+					 unsigned int ino,
+					 void *_data)
+{
+	struct fire_irq_data *irq_data = _data;
+	unsigned long pbm_regs = irq_data->pbm_regs;
+	unsigned long imap, iclr;
+	unsigned long int_ctrlr;
+
+	ino &= 0x3f;
+
+	/* Now build the IRQ bucket. */
+	imap = fire_ino_to_imap(pbm_regs, ino);
+	iclr = fire_ino_to_iclr(pbm_regs, ino);
+
+	/* Set the interrupt controller number.  */
+	int_ctrlr = 1 << 6;
+	upa_writeq(int_ctrlr, imap);
+
+	/* The interrupt map registers do not have an INO field
+	 * like other chips do.  They return zero in the INO
+	 * field, and the interrupt controller number is controlled
+	 * in bits 6 to 9.  So in order for build_irq() to get
+	 * the INO right we pass it in as part of the fixup
+	 * which will get added to the map register zero value
+	 * read by build_irq().
+	 */
+	ino |= (irq_data->portid << 6);
+	ino -= int_ctrlr;
+	return build_irq(ino, iclr, imap);
+}
+
+static void __init fire_irq_trans_init(struct device_node *dp)
+{
+	const struct linux_prom64_registers *regs;
+	struct fire_irq_data *irq_data;
+
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = fire_irq_build;
+
+	irq_data = prom_early_alloc(sizeof(struct fire_irq_data));
+
+	regs = of_get_property(dp, "reg", NULL);
+	dp->irq_trans->data = irq_data;
+
+	irq_data->pbm_regs = regs[0].phys_addr;
+	irq_data->portid = of_getintprop_default(dp, "portid", 0);
+}
+#endif /* CONFIG_PCI */
+
+#ifdef CONFIG_SBUS
+/* INO number to IMAP register offset for SYSIO external IRQ's.
+ * This should conform to both Sunfire/Wildfire server and Fusion
+ * desktop designs.
+ */
+#define SYSIO_IMAP_SLOT0	0x2c00UL
+#define SYSIO_IMAP_SLOT1	0x2c08UL
+#define SYSIO_IMAP_SLOT2	0x2c10UL
+#define SYSIO_IMAP_SLOT3	0x2c18UL
+#define SYSIO_IMAP_SCSI		0x3000UL
+#define SYSIO_IMAP_ETH		0x3008UL
+#define SYSIO_IMAP_BPP		0x3010UL
+#define SYSIO_IMAP_AUDIO	0x3018UL
+#define SYSIO_IMAP_PFAIL	0x3020UL
+#define SYSIO_IMAP_KMS		0x3028UL
+#define SYSIO_IMAP_FLPY		0x3030UL
+#define SYSIO_IMAP_SHW		0x3038UL
+#define SYSIO_IMAP_KBD		0x3040UL
+#define SYSIO_IMAP_MS		0x3048UL
+#define SYSIO_IMAP_SER		0x3050UL
+#define SYSIO_IMAP_TIM0		0x3060UL
+#define SYSIO_IMAP_TIM1		0x3068UL
+#define SYSIO_IMAP_UE		0x3070UL
+#define SYSIO_IMAP_CE		0x3078UL
+#define SYSIO_IMAP_SBERR	0x3080UL
+#define SYSIO_IMAP_PMGMT	0x3088UL
+#define SYSIO_IMAP_GFX		0x3090UL
+#define SYSIO_IMAP_EUPA		0x3098UL
+
+#define bogon     ((unsigned long) -1)
+static unsigned long sysio_irq_offsets[] = {
+	/* SBUS Slot 0 --> 3, level 1 --> 7 */
+	SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0,
+	SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0,
+	SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1,
+	SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1,
+	SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2,
+	SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2,
+	SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3,
+	SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3,
+
+	/* Onboard devices (not relevant/used on SunFire). */
+	SYSIO_IMAP_SCSI,
+	SYSIO_IMAP_ETH,
+	SYSIO_IMAP_BPP,
+	bogon,
+	SYSIO_IMAP_AUDIO,
+	SYSIO_IMAP_PFAIL,
+	bogon,
+	bogon,
+	SYSIO_IMAP_KMS,
+	SYSIO_IMAP_FLPY,
+	SYSIO_IMAP_SHW,
+	SYSIO_IMAP_KBD,
+	SYSIO_IMAP_MS,
+	SYSIO_IMAP_SER,
+	bogon,
+	bogon,
+	SYSIO_IMAP_TIM0,
+	SYSIO_IMAP_TIM1,
+	bogon,
+	bogon,
+	SYSIO_IMAP_UE,
+	SYSIO_IMAP_CE,
+	SYSIO_IMAP_SBERR,
+	SYSIO_IMAP_PMGMT,
+	SYSIO_IMAP_GFX,
+	SYSIO_IMAP_EUPA,
+};
+
+#undef bogon
+
+#define NUM_SYSIO_OFFSETS ARRAY_SIZE(sysio_irq_offsets)
+
+/* Convert Interrupt Mapping register pointer to associated
+ * Interrupt Clear register pointer, SYSIO specific version.
+ */
+#define SYSIO_ICLR_UNUSED0	0x3400UL
+#define SYSIO_ICLR_SLOT0	0x3408UL
+#define SYSIO_ICLR_SLOT1	0x3448UL
+#define SYSIO_ICLR_SLOT2	0x3488UL
+#define SYSIO_ICLR_SLOT3	0x34c8UL
+static unsigned long sysio_imap_to_iclr(unsigned long imap)
+{
+	unsigned long diff = SYSIO_ICLR_UNUSED0 - SYSIO_IMAP_SLOT0;
+	return imap + diff;
+}
+
+static unsigned int sbus_of_build_irq(struct device_node *dp,
+				      unsigned int ino,
+				      void *_data)
+{
+	unsigned long reg_base = (unsigned long) _data;
+	const struct linux_prom_registers *regs;
+	unsigned long imap, iclr;
+	int sbus_slot = 0;
+	int sbus_level = 0;
+
+	ino &= 0x3f;
+
+	regs = of_get_property(dp, "reg", NULL);
+	if (regs)
+		sbus_slot = regs->which_io;
+
+	if (ino < 0x20)
+		ino += (sbus_slot * 8);
+
+	imap = sysio_irq_offsets[ino];
+	if (imap == ((unsigned long)-1)) {
+		prom_printf("get_irq_translations: Bad SYSIO INO[%x]\n",
+			    ino);
+		prom_halt();
+	}
+	imap += reg_base;
+
+	/* SYSIO inconsistency.  For external SLOTS, we have to select
+	 * the right ICLR register based upon the lower SBUS irq level
+	 * bits.
+	 */
+	if (ino >= 0x20) {
+		iclr = sysio_imap_to_iclr(imap);
+	} else {
+		sbus_level = ino & 0x7;
+
+		switch(sbus_slot) {
+		case 0:
+			iclr = reg_base + SYSIO_ICLR_SLOT0;
+			break;
+		case 1:
+			iclr = reg_base + SYSIO_ICLR_SLOT1;
+			break;
+		case 2:
+			iclr = reg_base + SYSIO_ICLR_SLOT2;
+			break;
+		default:
+		case 3:
+			iclr = reg_base + SYSIO_ICLR_SLOT3;
+			break;
+		}
+
+		iclr += ((unsigned long)sbus_level - 1UL) * 8UL;
+	}
+	return build_irq(sbus_level, iclr, imap);
+}
+
+static void __init sbus_irq_trans_init(struct device_node *dp)
+{
+	const struct linux_prom64_registers *regs;
+
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = sbus_of_build_irq;
+
+	regs = of_get_property(dp, "reg", NULL);
+	dp->irq_trans->data = (void *) (unsigned long) regs->phys_addr;
+}
+#endif /* CONFIG_SBUS */
+
+
+static unsigned int central_build_irq(struct device_node *dp,
+				      unsigned int ino,
+				      void *_data)
+{
+	struct device_node *central_dp = _data;
+	struct platform_device *central_op = of_find_device_by_node(central_dp);
+	struct resource *res;
+	unsigned long imap, iclr;
+	u32 tmp;
+
+	if (!strcmp(dp->name, "eeprom")) {
+		res = &central_op->resource[5];
+	} else if (!strcmp(dp->name, "zs")) {
+		res = &central_op->resource[4];
+	} else if (!strcmp(dp->name, "clock-board")) {
+		res = &central_op->resource[3];
+	} else {
+		return ino;
+	}
+
+	imap = res->start + 0x00UL;
+	iclr = res->start + 0x10UL;
+
+	/* Set the INO state to idle, and disable.  */
+	upa_writel(0, iclr);
+	upa_readl(iclr);
+
+	tmp = upa_readl(imap);
+	tmp &= ~0x80000000;
+	upa_writel(tmp, imap);
+
+	return build_irq(0, iclr, imap);
+}
+
+static void __init central_irq_trans_init(struct device_node *dp)
+{
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = central_build_irq;
+
+	dp->irq_trans->data = dp;
+}
+
+struct irq_trans {
+	const char *name;
+	void (*init)(struct device_node *);
+};
+
+#ifdef CONFIG_PCI
+static struct irq_trans __initdata pci_irq_trans_table[] = {
+	{ "SUNW,sabre", sabre_irq_trans_init },
+	{ "pci108e,a000", sabre_irq_trans_init },
+	{ "pci108e,a001", sabre_irq_trans_init },
+	{ "SUNW,psycho", psycho_irq_trans_init },
+	{ "pci108e,8000", psycho_irq_trans_init },
+	{ "SUNW,schizo", schizo_irq_trans_init },
+	{ "pci108e,8001", schizo_irq_trans_init },
+	{ "SUNW,schizo+", schizo_irq_trans_init },
+	{ "pci108e,8002", schizo_irq_trans_init },
+	{ "SUNW,tomatillo", tomatillo_irq_trans_init },
+	{ "pci108e,a801", tomatillo_irq_trans_init },
+	{ "SUNW,sun4v-pci", pci_sun4v_irq_trans_init },
+	{ "pciex108e,80f0", fire_irq_trans_init },
+};
+#endif
+
+static unsigned int sun4v_vdev_irq_build(struct device_node *dp,
+					 unsigned int devino,
+					 void *_data)
+{
+	u32 devhandle = (u32) (unsigned long) _data;
+
+	return sun4v_build_irq(devhandle, devino);
+}
+
+static void __init sun4v_vdev_irq_trans_init(struct device_node *dp)
+{
+	const struct linux_prom64_registers *regs;
+
+	dp->irq_trans = prom_early_alloc(sizeof(struct of_irq_controller));
+	dp->irq_trans->irq_build = sun4v_vdev_irq_build;
+
+	regs = of_get_property(dp, "reg", NULL);
+	dp->irq_trans->data = (void *) (unsigned long)
+		((regs->phys_addr >> 32UL) & 0x0fffffff);
+}
+
+void __init irq_trans_init(struct device_node *dp)
+{
+#ifdef CONFIG_PCI
+	const char *model;
+	int i;
+#endif
+
+#ifdef CONFIG_PCI
+	model = of_get_property(dp, "model", NULL);
+	if (!model)
+		model = of_get_property(dp, "compatible", NULL);
+	if (model) {
+		for (i = 0; i < ARRAY_SIZE(pci_irq_trans_table); i++) {
+			struct irq_trans *t = &pci_irq_trans_table[i];
+
+			if (!strcmp(model, t->name)) {
+				t->init(dp);
+				return;
+			}
+		}
+	}
+#endif
+#ifdef CONFIG_SBUS
+	if (!strcmp(dp->name, "sbus") ||
+	    !strcmp(dp->name, "sbi")) {
+		sbus_irq_trans_init(dp);
+		return;
+	}
+#endif
+	if (!strcmp(dp->name, "fhc") &&
+	    !strcmp(dp->parent->name, "central")) {
+		central_irq_trans_init(dp);
+		return;
+	}
+	if (!strcmp(dp->name, "virtual-devices") ||
+	    !strcmp(dp->name, "niu")) {
+		sun4v_vdev_irq_trans_init(dp);
+		return;
+	}
+}
diff --git a/arch/sparc/kernel/psycho_common.c b/arch/sparc/kernel/psycho_common.c
new file mode 100644
index 0000000..81aa91e
--- /dev/null
+++ b/arch/sparc/kernel/psycho_common.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+/* psycho_common.c: Code common to PSYCHO and derivative PCI controllers.
+ *
+ * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+
+#include <asm/upa.h>
+
+#include "pci_impl.h"
+#include "iommu_common.h"
+#include "psycho_common.h"
+
+#define  PSYCHO_STRBUF_CTRL_DENAB	0x0000000000000002ULL
+#define  PSYCHO_STCERR_WRITE		0x0000000000000002ULL
+#define  PSYCHO_STCERR_READ		0x0000000000000001ULL
+#define  PSYCHO_STCTAG_PPN		0x0fffffff00000000ULL
+#define  PSYCHO_STCTAG_VPN		0x00000000ffffe000ULL
+#define  PSYCHO_STCTAG_VALID		0x0000000000000002ULL
+#define  PSYCHO_STCTAG_WRITE		0x0000000000000001ULL
+#define  PSYCHO_STCLINE_LINDX		0x0000000001e00000ULL
+#define  PSYCHO_STCLINE_SPTR		0x00000000001f8000ULL
+#define  PSYCHO_STCLINE_LADDR		0x0000000000007f00ULL
+#define  PSYCHO_STCLINE_EPTR		0x00000000000000fcULL
+#define  PSYCHO_STCLINE_VALID		0x0000000000000002ULL
+#define  PSYCHO_STCLINE_FOFN		0x0000000000000001ULL
+
+static DEFINE_SPINLOCK(stc_buf_lock);
+static unsigned long stc_error_buf[128];
+static unsigned long stc_tag_buf[16];
+static unsigned long stc_line_buf[16];
+
+static void psycho_check_stc_error(struct pci_pbm_info *pbm)
+{
+	unsigned long err_base, tag_base, line_base;
+	struct strbuf *strbuf = &pbm->stc;
+	u64 control;
+	int i;
+
+	if (!strbuf->strbuf_control)
+		return;
+
+	err_base = strbuf->strbuf_err_stat;
+	tag_base = strbuf->strbuf_tag_diag;
+	line_base = strbuf->strbuf_line_diag;
+
+	spin_lock(&stc_buf_lock);
+
+	/* This is __REALLY__ dangerous.  When we put the streaming
+	 * buffer into diagnostic mode to probe it's tags and error
+	 * status, we _must_ clear all of the line tag valid bits
+	 * before re-enabling the streaming buffer.  If any dirty data
+	 * lives in the STC when we do this, we will end up
+	 * invalidating it before it has a chance to reach main
+	 * memory.
+	 */
+	control = upa_readq(strbuf->strbuf_control);
+	upa_writeq(control | PSYCHO_STRBUF_CTRL_DENAB, strbuf->strbuf_control);
+	for (i = 0; i < 128; i++) {
+		u64 val;
+
+		val = upa_readq(err_base + (i * 8UL));
+		upa_writeq(0UL, err_base + (i * 8UL));
+		stc_error_buf[i] = val;
+	}
+	for (i = 0; i < 16; i++) {
+		stc_tag_buf[i] = upa_readq(tag_base + (i * 8UL));
+		stc_line_buf[i] = upa_readq(line_base + (i * 8UL));
+		upa_writeq(0UL, tag_base + (i * 8UL));
+		upa_writeq(0UL, line_base + (i * 8UL));
+	}
+
+	/* OK, state is logged, exit diagnostic mode. */
+	upa_writeq(control, strbuf->strbuf_control);
+
+	for (i = 0; i < 16; i++) {
+		int j, saw_error, first, last;
+
+		saw_error = 0;
+		first = i * 8;
+		last = first + 8;
+		for (j = first; j < last; j++) {
+			u64 errval = stc_error_buf[j];
+			if (errval != 0) {
+				saw_error++;
+				printk(KERN_ERR "%s: STC_ERR(%d)[wr(%d)"
+				       "rd(%d)]\n",
+				       pbm->name,
+				       j,
+				       (errval & PSYCHO_STCERR_WRITE) ? 1 : 0,
+				       (errval & PSYCHO_STCERR_READ) ? 1 : 0);
+			}
+		}
+		if (saw_error != 0) {
+			u64 tagval = stc_tag_buf[i];
+			u64 lineval = stc_line_buf[i];
+			printk(KERN_ERR "%s: STC_TAG(%d)[PA(%016llx)VA(%08llx)"
+			       "V(%d)W(%d)]\n",
+			       pbm->name,
+			       i,
+			       ((tagval & PSYCHO_STCTAG_PPN) >> 19UL),
+			       (tagval & PSYCHO_STCTAG_VPN),
+			       ((tagval & PSYCHO_STCTAG_VALID) ? 1 : 0),
+			       ((tagval & PSYCHO_STCTAG_WRITE) ? 1 : 0));
+			printk(KERN_ERR "%s: STC_LINE(%d)[LIDX(%llx)SP(%llx)"
+			       "LADDR(%llx)EP(%llx)V(%d)FOFN(%d)]\n",
+			       pbm->name,
+			       i,
+			       ((lineval & PSYCHO_STCLINE_LINDX) >> 21UL),
+			       ((lineval & PSYCHO_STCLINE_SPTR) >> 15UL),
+			       ((lineval & PSYCHO_STCLINE_LADDR) >> 8UL),
+			       ((lineval & PSYCHO_STCLINE_EPTR) >> 2UL),
+			       ((lineval & PSYCHO_STCLINE_VALID) ? 1 : 0),
+			       ((lineval & PSYCHO_STCLINE_FOFN) ? 1 : 0));
+		}
+	}
+
+	spin_unlock(&stc_buf_lock);
+}
+
+#define PSYCHO_IOMMU_TAG		0xa580UL
+#define PSYCHO_IOMMU_DATA		0xa600UL
+
+static void psycho_record_iommu_tags_and_data(struct pci_pbm_info *pbm,
+					      u64 *tag, u64 *data)
+{
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		unsigned long base = pbm->controller_regs;
+		unsigned long off = i * 8UL;
+
+		tag[i] = upa_readq(base + PSYCHO_IOMMU_TAG+off);
+		data[i] = upa_readq(base + PSYCHO_IOMMU_DATA+off);
+
+		/* Now clear out the entry. */
+		upa_writeq(0, base + PSYCHO_IOMMU_TAG + off);
+		upa_writeq(0, base + PSYCHO_IOMMU_DATA + off);
+	}
+}
+
+#define  PSYCHO_IOMMU_TAG_ERRSTS (0x3UL << 23UL)
+#define  PSYCHO_IOMMU_TAG_ERR	 (0x1UL << 22UL)
+#define  PSYCHO_IOMMU_TAG_WRITE	 (0x1UL << 21UL)
+#define  PSYCHO_IOMMU_TAG_STREAM (0x1UL << 20UL)
+#define  PSYCHO_IOMMU_TAG_SIZE	 (0x1UL << 19UL)
+#define  PSYCHO_IOMMU_TAG_VPAGE	 0x7ffffULL
+#define  PSYCHO_IOMMU_DATA_VALID (1UL << 30UL)
+#define  PSYCHO_IOMMU_DATA_CACHE (1UL << 28UL)
+#define  PSYCHO_IOMMU_DATA_PPAGE 0xfffffffULL
+
+static void psycho_dump_iommu_tags_and_data(struct pci_pbm_info *pbm,
+					    u64 *tag, u64 *data)
+{
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		u64 tag_val, data_val;
+		const char *type_str;
+		tag_val = tag[i];
+		if (!(tag_val & PSYCHO_IOMMU_TAG_ERR))
+			continue;
+
+		data_val = data[i];
+		switch((tag_val & PSYCHO_IOMMU_TAG_ERRSTS) >> 23UL) {
+		case 0:
+			type_str = "Protection Error";
+			break;
+		case 1:
+			type_str = "Invalid Error";
+			break;
+		case 2:
+			type_str = "TimeOut Error";
+			break;
+		case 3:
+		default:
+			type_str = "ECC Error";
+			break;
+		}
+
+		printk(KERN_ERR "%s: IOMMU TAG(%d)[error(%s) wr(%d) "
+		       "str(%d) sz(%dK) vpg(%08llx)]\n",
+		       pbm->name, i, type_str,
+		       ((tag_val & PSYCHO_IOMMU_TAG_WRITE) ? 1 : 0),
+		       ((tag_val & PSYCHO_IOMMU_TAG_STREAM) ? 1 : 0),
+		       ((tag_val & PSYCHO_IOMMU_TAG_SIZE) ? 64 : 8),
+		       (tag_val & PSYCHO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT);
+		printk(KERN_ERR "%s: IOMMU DATA(%d)[valid(%d) cache(%d) "
+		       "ppg(%016llx)]\n",
+		       pbm->name, i,
+		       ((data_val & PSYCHO_IOMMU_DATA_VALID) ? 1 : 0),
+		       ((data_val & PSYCHO_IOMMU_DATA_CACHE) ? 1 : 0),
+		       (data_val & PSYCHO_IOMMU_DATA_PPAGE) << IOMMU_PAGE_SHIFT);
+	}
+}
+
+#define  PSYCHO_IOMMU_CTRL_XLTESTAT	0x0000000006000000UL
+#define  PSYCHO_IOMMU_CTRL_XLTEERR	0x0000000001000000UL
+
+void psycho_check_iommu_error(struct pci_pbm_info *pbm,
+			      unsigned long afsr,
+			      unsigned long afar,
+			      enum psycho_error_type type)
+{
+	u64 control, iommu_tag[16], iommu_data[16];
+	struct iommu *iommu = pbm->iommu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+	control = upa_readq(iommu->iommu_control);
+	if (control & PSYCHO_IOMMU_CTRL_XLTEERR) {
+		const char *type_str;
+
+		control &= ~PSYCHO_IOMMU_CTRL_XLTEERR;
+		upa_writeq(control, iommu->iommu_control);
+
+		switch ((control & PSYCHO_IOMMU_CTRL_XLTESTAT) >> 25UL) {
+		case 0:
+			type_str = "Protection Error";
+			break;
+		case 1:
+			type_str = "Invalid Error";
+			break;
+		case 2:
+			type_str = "TimeOut Error";
+			break;
+		case 3:
+		default:
+			type_str = "ECC Error";
+			break;
+		}
+		printk(KERN_ERR "%s: IOMMU Error, type[%s]\n",
+		       pbm->name, type_str);
+
+		/* It is very possible for another DVMA to occur while
+		 * we do this probe, and corrupt the system further.
+		 * But we are so screwed at this point that we are
+		 * likely to crash hard anyways, so get as much
+		 * diagnostic information to the console as we can.
+		 */
+		psycho_record_iommu_tags_and_data(pbm, iommu_tag, iommu_data);
+		psycho_dump_iommu_tags_and_data(pbm, iommu_tag, iommu_data);
+	}
+	psycho_check_stc_error(pbm);
+	spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+#define  PSYCHO_PCICTRL_SBH_ERR	 0x0000000800000000UL
+#define  PSYCHO_PCICTRL_SERR	 0x0000000400000000UL
+
+static irqreturn_t psycho_pcierr_intr_other(struct pci_pbm_info *pbm)
+{
+	irqreturn_t ret = IRQ_NONE;
+	u64 csr, csr_error_bits;
+	u16 stat, *addr;
+
+	csr = upa_readq(pbm->pci_csr);
+	csr_error_bits = csr & (PSYCHO_PCICTRL_SBH_ERR | PSYCHO_PCICTRL_SERR);
+	if (csr_error_bits) {
+		/* Clear the errors.  */
+		upa_writeq(csr, pbm->pci_csr);
+
+		/* Log 'em.  */
+		if (csr_error_bits & PSYCHO_PCICTRL_SBH_ERR)
+			printk(KERN_ERR "%s: PCI streaming byte hole "
+			       "error asserted.\n", pbm->name);
+		if (csr_error_bits & PSYCHO_PCICTRL_SERR)
+			printk(KERN_ERR "%s: PCI SERR signal asserted.\n",
+			       pbm->name);
+		ret = IRQ_HANDLED;
+	}
+	addr = psycho_pci_config_mkaddr(pbm, pbm->pci_first_busno,
+					0, PCI_STATUS);
+	pci_config_read16(addr, &stat);
+	if (stat & (PCI_STATUS_PARITY |
+		    PCI_STATUS_SIG_TARGET_ABORT |
+		    PCI_STATUS_REC_TARGET_ABORT |
+		    PCI_STATUS_REC_MASTER_ABORT |
+		    PCI_STATUS_SIG_SYSTEM_ERROR)) {
+		printk(KERN_ERR "%s: PCI bus error, PCI_STATUS[%04x]\n",
+		       pbm->name, stat);
+		pci_config_write16(addr, 0xffff);
+		ret = IRQ_HANDLED;
+	}
+	return ret;
+}
+
+#define  PSYCHO_PCIAFSR_PMA	0x8000000000000000ULL
+#define  PSYCHO_PCIAFSR_PTA	0x4000000000000000ULL
+#define  PSYCHO_PCIAFSR_PRTRY	0x2000000000000000ULL
+#define  PSYCHO_PCIAFSR_PPERR	0x1000000000000000ULL
+#define  PSYCHO_PCIAFSR_SMA	0x0800000000000000ULL
+#define  PSYCHO_PCIAFSR_STA	0x0400000000000000ULL
+#define  PSYCHO_PCIAFSR_SRTRY	0x0200000000000000ULL
+#define  PSYCHO_PCIAFSR_SPERR	0x0100000000000000ULL
+#define  PSYCHO_PCIAFSR_RESV1	0x00ff000000000000ULL
+#define  PSYCHO_PCIAFSR_BMSK	0x0000ffff00000000ULL
+#define  PSYCHO_PCIAFSR_BLK	0x0000000080000000ULL
+#define  PSYCHO_PCIAFSR_RESV2	0x0000000040000000ULL
+#define  PSYCHO_PCIAFSR_MID	0x000000003e000000ULL
+#define  PSYCHO_PCIAFSR_RESV3	0x0000000001ffffffULL
+
+irqreturn_t psycho_pcierr_intr(int irq, void *dev_id)
+{
+	struct pci_pbm_info *pbm = dev_id;
+	u64 afsr, afar, error_bits;
+	int reported;
+
+	afsr = upa_readq(pbm->pci_afsr);
+	afar = upa_readq(pbm->pci_afar);
+	error_bits = afsr &
+		(PSYCHO_PCIAFSR_PMA | PSYCHO_PCIAFSR_PTA |
+		 PSYCHO_PCIAFSR_PRTRY | PSYCHO_PCIAFSR_PPERR |
+		 PSYCHO_PCIAFSR_SMA | PSYCHO_PCIAFSR_STA |
+		 PSYCHO_PCIAFSR_SRTRY | PSYCHO_PCIAFSR_SPERR);
+	if (!error_bits)
+		return psycho_pcierr_intr_other(pbm);
+	upa_writeq(error_bits, pbm->pci_afsr);
+	printk(KERN_ERR "%s: PCI Error, primary error type[%s]\n",
+	       pbm->name,
+	       (((error_bits & PSYCHO_PCIAFSR_PMA) ?
+		 "Master Abort" :
+		 ((error_bits & PSYCHO_PCIAFSR_PTA) ?
+		  "Target Abort" :
+		  ((error_bits & PSYCHO_PCIAFSR_PRTRY) ?
+		   "Excessive Retries" :
+		   ((error_bits & PSYCHO_PCIAFSR_PPERR) ?
+		    "Parity Error" : "???"))))));
+	printk(KERN_ERR "%s: bytemask[%04llx] UPA_MID[%02llx] was_block(%d)\n",
+	       pbm->name,
+	       (afsr & PSYCHO_PCIAFSR_BMSK) >> 32UL,
+	       (afsr & PSYCHO_PCIAFSR_MID) >> 25UL,
+	       (afsr & PSYCHO_PCIAFSR_BLK) ? 1 : 0);
+	printk(KERN_ERR "%s: PCI AFAR [%016llx]\n", pbm->name, afar);
+	printk(KERN_ERR "%s: PCI Secondary errors [", pbm->name);
+	reported = 0;
+	if (afsr & PSYCHO_PCIAFSR_SMA) {
+		reported++;
+		printk("(Master Abort)");
+	}
+	if (afsr & PSYCHO_PCIAFSR_STA) {
+		reported++;
+		printk("(Target Abort)");
+	}
+	if (afsr & PSYCHO_PCIAFSR_SRTRY) {
+		reported++;
+		printk("(Excessive Retries)");
+	}
+	if (afsr & PSYCHO_PCIAFSR_SPERR) {
+		reported++;
+		printk("(Parity Error)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	if (error_bits & (PSYCHO_PCIAFSR_PTA | PSYCHO_PCIAFSR_STA)) {
+		psycho_check_iommu_error(pbm, afsr, afar, PCI_ERR);
+		pci_scan_for_target_abort(pbm, pbm->pci_bus);
+	}
+	if (error_bits & (PSYCHO_PCIAFSR_PMA | PSYCHO_PCIAFSR_SMA))
+		pci_scan_for_master_abort(pbm, pbm->pci_bus);
+
+	if (error_bits & (PSYCHO_PCIAFSR_PPERR | PSYCHO_PCIAFSR_SPERR))
+		pci_scan_for_parity_error(pbm, pbm->pci_bus);
+
+	return IRQ_HANDLED;
+}
+
+static void psycho_iommu_flush(struct pci_pbm_info *pbm)
+{
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		unsigned long off = i * 8;
+
+		upa_writeq(0, pbm->controller_regs + PSYCHO_IOMMU_TAG + off);
+		upa_writeq(0, pbm->controller_regs + PSYCHO_IOMMU_DATA + off);
+	}
+}
+
+#define PSYCHO_IOMMU_CONTROL		0x0200UL
+#define  PSYCHO_IOMMU_CTRL_TSBSZ	0x0000000000070000UL
+#define  PSYCHO_IOMMU_TSBSZ_1K      	0x0000000000000000UL
+#define  PSYCHO_IOMMU_TSBSZ_2K      	0x0000000000010000UL
+#define  PSYCHO_IOMMU_TSBSZ_4K      	0x0000000000020000UL
+#define  PSYCHO_IOMMU_TSBSZ_8K      	0x0000000000030000UL
+#define  PSYCHO_IOMMU_TSBSZ_16K     	0x0000000000040000UL
+#define  PSYCHO_IOMMU_TSBSZ_32K     	0x0000000000050000UL
+#define  PSYCHO_IOMMU_TSBSZ_64K     	0x0000000000060000UL
+#define  PSYCHO_IOMMU_TSBSZ_128K    	0x0000000000070000UL
+#define  PSYCHO_IOMMU_CTRL_TBWSZ    	0x0000000000000004UL
+#define  PSYCHO_IOMMU_CTRL_DENAB    	0x0000000000000002UL
+#define  PSYCHO_IOMMU_CTRL_ENAB     	0x0000000000000001UL
+#define PSYCHO_IOMMU_FLUSH		0x0210UL
+#define PSYCHO_IOMMU_TSBBASE		0x0208UL
+
+int psycho_iommu_init(struct pci_pbm_info *pbm, int tsbsize,
+		      u32 dvma_offset, u32 dma_mask,
+		      unsigned long write_complete_offset)
+{
+	struct iommu *iommu = pbm->iommu;
+	u64 control;
+	int err;
+
+	iommu->iommu_control  = pbm->controller_regs + PSYCHO_IOMMU_CONTROL;
+	iommu->iommu_tsbbase  = pbm->controller_regs + PSYCHO_IOMMU_TSBBASE;
+	iommu->iommu_flush    = pbm->controller_regs + PSYCHO_IOMMU_FLUSH;
+	iommu->iommu_tags     = pbm->controller_regs + PSYCHO_IOMMU_TAG;
+	iommu->write_complete_reg = (pbm->controller_regs +
+				     write_complete_offset);
+
+	iommu->iommu_ctxflush = 0;
+
+	control = upa_readq(iommu->iommu_control);
+	control |= PSYCHO_IOMMU_CTRL_DENAB;
+	upa_writeq(control, iommu->iommu_control);
+
+	psycho_iommu_flush(pbm);
+
+	/* Leave diag mode enabled for full-flushing done in pci_iommu.c */
+	err = iommu_table_init(iommu, tsbsize * 1024 * 8,
+			       dvma_offset, dma_mask, pbm->numa_node);
+	if (err)
+		return err;
+
+	upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase);
+
+	control = upa_readq(iommu->iommu_control);
+	control &= ~(PSYCHO_IOMMU_CTRL_TSBSZ | PSYCHO_IOMMU_CTRL_TBWSZ);
+	control |= PSYCHO_IOMMU_CTRL_ENAB;
+
+	switch (tsbsize) {
+	case 64:
+		control |= PSYCHO_IOMMU_TSBSZ_64K;
+		break;
+	case 128:
+		control |= PSYCHO_IOMMU_TSBSZ_128K;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	upa_writeq(control, iommu->iommu_control);
+
+	return 0;
+
+}
+
+void psycho_pbm_init_common(struct pci_pbm_info *pbm, struct platform_device *op,
+			    const char *chip_name, int chip_type)
+{
+	struct device_node *dp = op->dev.of_node;
+
+	pbm->name = dp->full_name;
+	pbm->numa_node = -1;
+	pbm->chip_type = chip_type;
+	pbm->chip_version = of_getintprop_default(dp, "version#", 0);
+	pbm->chip_revision = of_getintprop_default(dp, "module-revision#", 0);
+	pbm->op = op;
+	pbm->pci_ops = &sun4u_pci_ops;
+	pbm->config_space_reg_bits = 8;
+	pbm->index = pci_num_pbms++;
+	pci_get_pbm_props(pbm);
+	pci_determine_mem_io_space(pbm);
+
+	printk(KERN_INFO "%s: %s PCI Bus Module ver[%x:%x]\n",
+	       pbm->name, chip_name,
+	       pbm->chip_version, pbm->chip_revision);
+}
diff --git a/arch/sparc/kernel/psycho_common.h b/arch/sparc/kernel/psycho_common.h
new file mode 100644
index 0000000..6925231
--- /dev/null
+++ b/arch/sparc/kernel/psycho_common.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PSYCHO_COMMON_H
+#define _PSYCHO_COMMON_H
+
+/* U2P Programmer's Manual, page 13-55, configuration space
+ * address format:
+ * 
+ *  32             24 23 16 15    11 10       8 7   2  1 0
+ * ---------------------------------------------------------
+ * |0 0 0 0 0 0 0 0 1| bus | device | function | reg | 0 0 |
+ * ---------------------------------------------------------
+ */
+#define PSYCHO_CONFIG_BASE(PBM)	\
+	((PBM)->config_space | (1UL << 24))
+#define PSYCHO_CONFIG_ENCODE(BUS, DEVFN, REG)	\
+	(((unsigned long)(BUS)   << 16) |	\
+	 ((unsigned long)(DEVFN) << 8)  |	\
+	 ((unsigned long)(REG)))
+
+static inline void *psycho_pci_config_mkaddr(struct pci_pbm_info *pbm,
+					     unsigned char bus,
+					     unsigned int devfn,
+					     int where)
+{
+	return (void *)
+		(PSYCHO_CONFIG_BASE(pbm) |
+		 PSYCHO_CONFIG_ENCODE(bus, devfn, where));
+}
+
+enum psycho_error_type {
+	UE_ERR, CE_ERR, PCI_ERR
+};
+
+void psycho_check_iommu_error(struct pci_pbm_info *pbm,
+			      unsigned long afsr,
+			      unsigned long afar,
+			      enum psycho_error_type type);
+
+irqreturn_t psycho_pcierr_intr(int irq, void *dev_id);
+
+int psycho_iommu_init(struct pci_pbm_info *pbm, int tsbsize,
+		      u32 dvma_offset, u32 dma_mask,
+		      unsigned long write_complete_offset);
+
+void psycho_pbm_init_common(struct pci_pbm_info *pbm,
+			    struct platform_device *op,
+			    const char *chip_name, int chip_type);
+
+#endif /* _PSYCHO_COMMON_H */
diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c
new file mode 100644
index 0000000..16b50af
--- /dev/null
+++ b/arch/sparc/kernel/ptrace_32.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0
+/* ptrace.c: Sparc process tracing support.
+ *
+ * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
+ *
+ * Based upon code written by Ross Biro, Linus Torvalds, Bob Manson,
+ * and David Mosberger.
+ *
+ * Added Linux support -miguel (weird, eh?, the original code was meant
+ * to emulate SunOS).
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/smp.h>
+#include <linux/security.h>
+#include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
+
+#include <asm/pgtable.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#include "kernel.h"
+
+/* #define ALLOW_INIT_TRACING */
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	/* nothing to do */
+}
+
+enum sparc_regset {
+	REGSET_GENERAL,
+	REGSET_FP,
+};
+
+static int genregs32_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = target->thread.kregs;
+	unsigned long __user *reg_window;
+	unsigned long *k = kbuf;
+	unsigned long __user *u = ubuf;
+	unsigned long reg;
+
+	if (target == current)
+		flush_user_windows();
+
+	pos /= sizeof(reg);
+	count /= sizeof(reg);
+
+	if (kbuf) {
+		for (; count > 0 && pos < 16; count--)
+			*k++ = regs->u_regs[pos++];
+
+		reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		for (; count > 0 && pos < 32; count--) {
+			if (get_user(*k++, &reg_window[pos++]))
+				return -EFAULT;
+		}
+	} else {
+		for (; count > 0 && pos < 16; count--) {
+			if (put_user(regs->u_regs[pos++], u++))
+				return -EFAULT;
+		}
+
+		reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		for (; count > 0 && pos < 32; count--) {
+			if (get_user(reg, &reg_window[pos++]) ||
+			    put_user(reg, u++))
+				return -EFAULT;
+		}
+	}
+	while (count > 0) {
+		switch (pos) {
+		case 32: /* PSR */
+			reg = regs->psr;
+			break;
+		case 33: /* PC */
+			reg = regs->pc;
+			break;
+		case 34: /* NPC */
+			reg = regs->npc;
+			break;
+		case 35: /* Y */
+			reg = regs->y;
+			break;
+		case 36: /* WIM */
+		case 37: /* TBR */
+			reg = 0;
+			break;
+		default:
+			goto finish;
+		}
+
+		if (kbuf)
+			*k++ = reg;
+		else if (put_user(reg, u++))
+			return -EFAULT;
+		pos++;
+		count--;
+	}
+finish:
+	pos *= sizeof(reg);
+	count *= sizeof(reg);
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					38 * sizeof(reg), -1);
+}
+
+static int genregs32_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = target->thread.kregs;
+	unsigned long __user *reg_window;
+	const unsigned long *k = kbuf;
+	const unsigned long __user *u = ubuf;
+	unsigned long reg;
+
+	if (target == current)
+		flush_user_windows();
+
+	pos /= sizeof(reg);
+	count /= sizeof(reg);
+
+	if (kbuf) {
+		for (; count > 0 && pos < 16; count--)
+			regs->u_regs[pos++] = *k++;
+
+		reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		for (; count > 0 && pos < 32; count--) {
+			if (put_user(*k++, &reg_window[pos++]))
+				return -EFAULT;
+		}
+	} else {
+		for (; count > 0 && pos < 16; count--) {
+			if (get_user(reg, u++))
+				return -EFAULT;
+			regs->u_regs[pos++] = reg;
+		}
+
+		reg_window = (unsigned long __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		for (; count > 0 && pos < 32; count--) {
+			if (get_user(reg, u++) ||
+			    put_user(reg, &reg_window[pos++]))
+				return -EFAULT;
+		}
+	}
+	while (count > 0) {
+		unsigned long psr;
+
+		if (kbuf)
+			reg = *k++;
+		else if (get_user(reg, u++))
+			return -EFAULT;
+
+		switch (pos) {
+		case 32: /* PSR */
+			psr = regs->psr;
+			psr &= ~(PSR_ICC | PSR_SYSCALL);
+			psr |= (reg & (PSR_ICC | PSR_SYSCALL));
+			regs->psr = psr;
+			break;
+		case 33: /* PC */
+			regs->pc = reg;
+			break;
+		case 34: /* NPC */
+			regs->npc = reg;
+			break;
+		case 35: /* Y */
+			regs->y = reg;
+			break;
+		case 36: /* WIM */
+		case 37: /* TBR */
+			break;
+		default:
+			goto finish;
+		}
+
+		pos++;
+		count--;
+	}
+finish:
+	pos *= sizeof(reg);
+	count *= sizeof(reg);
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 38 * sizeof(reg), -1);
+}
+
+static int fpregs32_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	const unsigned long *fpregs = target->thread.float_regs;
+	int ret = 0;
+
+#if 0
+	if (target == current)
+		save_and_clear_fpu();
+#endif
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs,
+				  0, 32 * sizeof(u32));
+
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       32 * sizeof(u32),
+					       33 * sizeof(u32));
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &target->thread.fsr,
+					  33 * sizeof(u32),
+					  34 * sizeof(u32));
+
+	if (!ret) {
+		unsigned long val;
+
+		val = (1 << 8) | (8 << 16);
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &val,
+					  34 * sizeof(u32),
+					  35 * sizeof(u32));
+	}
+
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       35 * sizeof(u32), -1);
+
+	return ret;
+}
+
+static int fpregs32_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	unsigned long *fpregs = target->thread.float_regs;
+	int ret;
+
+#if 0
+	if (target == current)
+		save_and_clear_fpu();
+#endif
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 fpregs,
+				 0, 32 * sizeof(u32));
+	if (!ret)
+		user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					  32 * sizeof(u32),
+					  33 * sizeof(u32));
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &target->thread.fsr,
+					 33 * sizeof(u32),
+					 34 * sizeof(u32));
+	}
+
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						34 * sizeof(u32), -1);
+	return ret;
+}
+
+static const struct user_regset sparc32_regsets[] = {
+	/* Format is:
+	 * 	G0 --> G7
+	 *	O0 --> O7
+	 *	L0 --> L7
+	 *	I0 --> I7
+	 *	PSR, PC, nPC, Y, WIM, TBR
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = 38,
+		.size = sizeof(u32), .align = sizeof(u32),
+		.get = genregs32_get, .set = genregs32_set
+	},
+	/* Format is:
+	 *	F0 --> F31
+	 *	empty 32-bit word
+	 *	FSR (32--bit word)
+	 *	FPU QUEUE COUNT (8-bit char)
+	 *	FPU QUEUE ENTRYSIZE (8-bit char)
+	 *	FPU ENABLED (8-bit char)
+	 *	empty 8-bit char
+	 *	FPU QUEUE (64 32-bit ints)
+	 */
+	[REGSET_FP] = {
+		.core_note_type = NT_PRFPREG,
+		.n = 99,
+		.size = sizeof(u32), .align = sizeof(u32),
+		.get = fpregs32_get, .set = fpregs32_set
+	},
+};
+
+static const struct user_regset_view user_sparc32_view = {
+	.name = "sparc", .e_machine = EM_SPARC,
+	.regsets = sparc32_regsets, .n = ARRAY_SIZE(sparc32_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+	return &user_sparc32_view;
+}
+
+struct fps {
+	unsigned long regs[32];
+	unsigned long fsr;
+	unsigned long flags;
+	unsigned long extra;
+	unsigned long fpqd;
+	struct fq {
+		unsigned long *insnaddr;
+		unsigned long insn;
+	} fpq[16];
+};
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	unsigned long addr2 = current->thread.kregs->u_regs[UREG_I4];
+	void __user *addr2p;
+	const struct user_regset_view *view;
+	struct pt_regs __user *pregs;
+	struct fps __user *fps;
+	int ret;
+
+	view = task_user_regset_view(current);
+	addr2p = (void __user *) addr2;
+	pregs = (struct pt_regs __user *) addr;
+	fps = (struct fps __user *) addr;
+
+	switch(request) {
+	case PTRACE_GETREGS: {
+		ret = copy_regset_to_user(child, view, REGSET_GENERAL,
+					  32 * sizeof(u32),
+					  4 * sizeof(u32),
+					  &pregs->psr);
+		if (!ret)
+			copy_regset_to_user(child, view, REGSET_GENERAL,
+					    1 * sizeof(u32),
+					    15 * sizeof(u32),
+					    &pregs->u_regs[0]);
+		break;
+	}
+
+	case PTRACE_SETREGS: {
+		ret = copy_regset_from_user(child, view, REGSET_GENERAL,
+					    32 * sizeof(u32),
+					    4 * sizeof(u32),
+					    &pregs->psr);
+		if (!ret)
+			copy_regset_from_user(child, view, REGSET_GENERAL,
+					      1 * sizeof(u32),
+					      15 * sizeof(u32),
+					      &pregs->u_regs[0]);
+		break;
+	}
+
+	case PTRACE_GETFPREGS: {
+		ret = copy_regset_to_user(child, view, REGSET_FP,
+					  0 * sizeof(u32),
+					  32 * sizeof(u32),
+					  &fps->regs[0]);
+		if (!ret)
+			ret = copy_regset_to_user(child, view, REGSET_FP,
+						  33 * sizeof(u32),
+						  1 * sizeof(u32),
+						  &fps->fsr);
+
+		if (!ret) {
+			if (__put_user(0, &fps->fpqd) ||
+			    __put_user(0, &fps->flags) ||
+			    __put_user(0, &fps->extra) ||
+			    clear_user(fps->fpq, sizeof(fps->fpq)))
+				ret = -EFAULT;
+		}
+		break;
+	}
+
+	case PTRACE_SETFPREGS: {
+		ret = copy_regset_from_user(child, view, REGSET_FP,
+					    0 * sizeof(u32),
+					    32 * sizeof(u32),
+					    &fps->regs[0]);
+		if (!ret)
+			ret = copy_regset_from_user(child, view, REGSET_FP,
+						    33 * sizeof(u32),
+						    1 * sizeof(u32),
+						    &fps->fsr);
+		break;
+	}
+
+	case PTRACE_READTEXT:
+	case PTRACE_READDATA:
+		ret = ptrace_readdata(child, addr, addr2p, data);
+
+		if (ret == data)
+			ret = 0;
+		else if (ret >= 0)
+			ret = -EIO;
+		break;
+
+	case PTRACE_WRITETEXT:
+	case PTRACE_WRITEDATA:
+		ret = ptrace_writedata(child, addr2p, addr, data);
+
+		if (ret == data)
+			ret = 0;
+		else if (ret >= 0)
+			ret = -EIO;
+		break;
+
+	default:
+		if (request == PTRACE_SPARC_DETACH)
+			request = PTRACE_DETACH;
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+asmlinkage int syscall_trace(struct pt_regs *regs, int syscall_exit_p)
+{
+	int ret = 0;
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE)) {
+		if (syscall_exit_p)
+			tracehook_report_syscall_exit(regs, 0);
+		else
+			ret = tracehook_report_syscall_entry(regs);
+	}
+
+	return ret;
+}
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
new file mode 100644
index 0000000..e1d965e
--- /dev/null
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -0,0 +1,1200 @@
+/* ptrace.c: Sparc process tracing support.
+ *
+ * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *
+ * Based upon code written by Ross Biro, Linus Torvalds, Bob Manson,
+ * and David Mosberger.
+ *
+ * Added Linux support -miguel (weird, eh?, the original code was meant
+ * to emulate SunOS).
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/smp.h>
+#include <linux/security.h>
+#include <linux/seccomp.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <trace/syscall.h>
+#include <linux/compat.h>
+#include <linux/elf.h>
+#include <linux/context_tracking.h>
+
+#include <asm/asi.h>
+#include <asm/pgtable.h>
+#include <linux/uaccess.h>
+#include <asm/psrcompat.h>
+#include <asm/visasm.h>
+#include <asm/spitfire.h>
+#include <asm/page.h>
+#include <asm/cpudata.h>
+#include <asm/cacheflush.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#include "entry.h"
+
+/* #define ALLOW_INIT_TRACING */
+
+struct pt_regs_offset {
+	const char *name;
+	int offset;
+};
+
+#define REG_OFFSET_NAME(n, r) \
+	{.name = n, .offset = (PT_V9_##r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+	REG_OFFSET_NAME("g0", G0),
+	REG_OFFSET_NAME("g1", G1),
+	REG_OFFSET_NAME("g2", G2),
+	REG_OFFSET_NAME("g3", G3),
+	REG_OFFSET_NAME("g4", G4),
+	REG_OFFSET_NAME("g5", G5),
+	REG_OFFSET_NAME("g6", G6),
+	REG_OFFSET_NAME("g7", G7),
+
+	REG_OFFSET_NAME("i0", I0),
+	REG_OFFSET_NAME("i1", I1),
+	REG_OFFSET_NAME("i2", I2),
+	REG_OFFSET_NAME("i3", I3),
+	REG_OFFSET_NAME("i4", I4),
+	REG_OFFSET_NAME("i5", I5),
+	REG_OFFSET_NAME("i6", I6),
+	REG_OFFSET_NAME("i7", I7),
+
+	REG_OFFSET_NAME("tstate", TSTATE),
+	REG_OFFSET_NAME("pc", TPC),
+	REG_OFFSET_NAME("npc", TNPC),
+	REG_OFFSET_NAME("y", Y),
+	REG_OFFSET_NAME("lr", I7),
+
+	REG_OFFSET_END,
+};
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+	/* nothing to do */
+}
+
+/* To get the necessary page struct, access_process_vm() first calls
+ * get_user_pages().  This has done a flush_dcache_page() on the
+ * accessed page.  Then our caller (copy_{to,from}_user_page()) did
+ * to memcpy to read/write the data from that page.
+ *
+ * Now, the only thing we have to do is:
+ * 1) flush the D-cache if it's possible than an illegal alias
+ *    has been created
+ * 2) flush the I-cache if this is pre-cheetah and we did a write
+ */
+void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
+			 unsigned long uaddr, void *kaddr,
+			 unsigned long len, int write)
+{
+	BUG_ON(len > PAGE_SIZE);
+
+	if (tlb_type == hypervisor)
+		return;
+
+	preempt_disable();
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	/* If bit 13 of the kernel address we used to access the
+	 * user page is the same as the virtual address that page
+	 * is mapped to in the user's address space, we can skip the
+	 * D-cache flush.
+	 */
+	if ((uaddr ^ (unsigned long) kaddr) & (1UL << 13)) {
+		unsigned long start = __pa(kaddr);
+		unsigned long end = start + len;
+		unsigned long dcache_line_size;
+
+		dcache_line_size = local_cpu_data().dcache_line_size;
+
+		if (tlb_type == spitfire) {
+			for (; start < end; start += dcache_line_size)
+				spitfire_put_dcache_tag(start & 0x3fe0, 0x0);
+		} else {
+			start &= ~(dcache_line_size - 1);
+			for (; start < end; start += dcache_line_size)
+				__asm__ __volatile__(
+					"stxa %%g0, [%0] %1\n\t"
+					"membar #Sync"
+					: /* no outputs */
+					: "r" (start),
+					"i" (ASI_DCACHE_INVALIDATE));
+		}
+	}
+#endif
+	if (write && tlb_type == spitfire) {
+		unsigned long start = (unsigned long) kaddr;
+		unsigned long end = start + len;
+		unsigned long icache_line_size;
+
+		icache_line_size = local_cpu_data().icache_line_size;
+
+		for (; start < end; start += icache_line_size)
+			flushi(start);
+	}
+
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(flush_ptrace_access);
+
+static int get_from_target(struct task_struct *target, unsigned long uaddr,
+			   void *kbuf, int len)
+{
+	if (target == current) {
+		if (copy_from_user(kbuf, (void __user *) uaddr, len))
+			return -EFAULT;
+	} else {
+		int len2 = access_process_vm(target, uaddr, kbuf, len,
+				FOLL_FORCE);
+		if (len2 != len)
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int set_to_target(struct task_struct *target, unsigned long uaddr,
+			 void *kbuf, int len)
+{
+	if (target == current) {
+		if (copy_to_user((void __user *) uaddr, kbuf, len))
+			return -EFAULT;
+	} else {
+		int len2 = access_process_vm(target, uaddr, kbuf, len,
+				FOLL_FORCE | FOLL_WRITE);
+		if (len2 != len)
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int regwindow64_get(struct task_struct *target,
+			   const struct pt_regs *regs,
+			   struct reg_window *wbuf)
+{
+	unsigned long rw_addr = regs->u_regs[UREG_I6];
+
+	if (!test_thread_64bit_stack(rw_addr)) {
+		struct reg_window32 win32;
+		int i;
+
+		if (get_from_target(target, rw_addr, &win32, sizeof(win32)))
+			return -EFAULT;
+		for (i = 0; i < 8; i++)
+			wbuf->locals[i] = win32.locals[i];
+		for (i = 0; i < 8; i++)
+			wbuf->ins[i] = win32.ins[i];
+	} else {
+		rw_addr += STACK_BIAS;
+		if (get_from_target(target, rw_addr, wbuf, sizeof(*wbuf)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int regwindow64_set(struct task_struct *target,
+			   const struct pt_regs *regs,
+			   struct reg_window *wbuf)
+{
+	unsigned long rw_addr = regs->u_regs[UREG_I6];
+
+	if (!test_thread_64bit_stack(rw_addr)) {
+		struct reg_window32 win32;
+		int i;
+
+		for (i = 0; i < 8; i++)
+			win32.locals[i] = wbuf->locals[i];
+		for (i = 0; i < 8; i++)
+			win32.ins[i] = wbuf->ins[i];
+
+		if (set_to_target(target, rw_addr, &win32, sizeof(win32)))
+			return -EFAULT;
+	} else {
+		rw_addr += STACK_BIAS;
+		if (set_to_target(target, rw_addr, wbuf, sizeof(*wbuf)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+enum sparc_regset {
+	REGSET_GENERAL,
+	REGSET_FP,
+};
+
+static int genregs64_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	int ret;
+
+	if (target == current)
+		flushw_user();
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  regs->u_regs,
+				  0, 16 * sizeof(u64));
+	if (!ret && count && pos < (32 * sizeof(u64))) {
+		struct reg_window window;
+
+		if (regwindow64_get(target, regs, &window))
+			return -EFAULT;
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &window,
+					  16 * sizeof(u64),
+					  32 * sizeof(u64));
+	}
+
+	if (!ret) {
+		/* TSTATE, TPC, TNPC */
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &regs->tstate,
+					  32 * sizeof(u64),
+					  35 * sizeof(u64));
+	}
+
+	if (!ret) {
+		unsigned long y = regs->y;
+
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &y,
+					  35 * sizeof(u64),
+					  36 * sizeof(u64));
+	}
+
+	if (!ret) {
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       36 * sizeof(u64), -1);
+
+	}
+	return ret;
+}
+
+static int genregs64_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	int ret;
+
+	if (target == current)
+		flushw_user();
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 regs->u_regs,
+				 0, 16 * sizeof(u64));
+	if (!ret && count && pos < (32 * sizeof(u64))) {
+		struct reg_window window;
+
+		if (regwindow64_get(target, regs, &window))
+			return -EFAULT;
+
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &window,
+					 16 * sizeof(u64),
+					 32 * sizeof(u64));
+
+		if (!ret &&
+		    regwindow64_set(target, regs, &window))
+			return -EFAULT;
+	}
+
+	if (!ret && count > 0) {
+		unsigned long tstate;
+
+		/* TSTATE */
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &tstate,
+					 32 * sizeof(u64),
+					 33 * sizeof(u64));
+		if (!ret) {
+			/* Only the condition codes and the "in syscall"
+			 * state can be modified in the %tstate register.
+			 */
+			tstate &= (TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL);
+			regs->tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL);
+			regs->tstate |= tstate;
+		}
+	}
+
+	if (!ret) {
+		/* TPC, TNPC */
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &regs->tpc,
+					 33 * sizeof(u64),
+					 35 * sizeof(u64));
+	}
+
+	if (!ret) {
+		unsigned long y = regs->y;
+
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &y,
+					 35 * sizeof(u64),
+					 36 * sizeof(u64));
+		if (!ret)
+			regs->y = y;
+	}
+
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						36 * sizeof(u64), -1);
+
+	return ret;
+}
+
+static int fpregs64_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	const unsigned long *fpregs = task_thread_info(target)->fpregs;
+	unsigned long fprs, fsr, gsr;
+	int ret;
+
+	if (target == current)
+		save_and_clear_fpu();
+
+	fprs = task_thread_info(target)->fpsaved[0];
+
+	if (fprs & FPRS_DL)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  fpregs,
+					  0, 16 * sizeof(u64));
+	else
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       0,
+					       16 * sizeof(u64));
+
+	if (!ret) {
+		if (fprs & FPRS_DU)
+			ret = user_regset_copyout(&pos, &count,
+						  &kbuf, &ubuf,
+						  fpregs + 16,
+						  16 * sizeof(u64),
+						  32 * sizeof(u64));
+		else
+			ret = user_regset_copyout_zero(&pos, &count,
+						       &kbuf, &ubuf,
+						       16 * sizeof(u64),
+						       32 * sizeof(u64));
+	}
+
+	if (fprs & FPRS_FEF) {
+		fsr = task_thread_info(target)->xfsr[0];
+		gsr = task_thread_info(target)->gsr[0];
+	} else {
+		fsr = gsr = 0;
+	}
+
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &fsr,
+					  32 * sizeof(u64),
+					  33 * sizeof(u64));
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &gsr,
+					  33 * sizeof(u64),
+					  34 * sizeof(u64));
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &fprs,
+					  34 * sizeof(u64),
+					  35 * sizeof(u64));
+
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       35 * sizeof(u64), -1);
+
+	return ret;
+}
+
+static int fpregs64_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	unsigned long *fpregs = task_thread_info(target)->fpregs;
+	unsigned long fprs;
+	int ret;
+
+	if (target == current)
+		save_and_clear_fpu();
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 fpregs,
+				 0, 32 * sizeof(u64));
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 task_thread_info(target)->xfsr,
+					 32 * sizeof(u64),
+					 33 * sizeof(u64));
+	if (!ret)
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 task_thread_info(target)->gsr,
+					 33 * sizeof(u64),
+					 34 * sizeof(u64));
+
+	fprs = task_thread_info(target)->fpsaved[0];
+	if (!ret && count > 0) {
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &fprs,
+					 34 * sizeof(u64),
+					 35 * sizeof(u64));
+	}
+
+	fprs |= (FPRS_FEF | FPRS_DL | FPRS_DU);
+	task_thread_info(target)->fpsaved[0] = fprs;
+
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						35 * sizeof(u64), -1);
+	return ret;
+}
+
+static const struct user_regset sparc64_regsets[] = {
+	/* Format is:
+	 * 	G0 --> G7
+	 *	O0 --> O7
+	 *	L0 --> L7
+	 *	I0 --> I7
+	 *	TSTATE, TPC, TNPC, Y
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = 36,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = genregs64_get, .set = genregs64_set
+	},
+	/* Format is:
+	 *	F0 --> F63
+	 *	FSR
+	 *	GSR
+	 *	FPRS
+	 */
+	[REGSET_FP] = {
+		.core_note_type = NT_PRFPREG,
+		.n = 35,
+		.size = sizeof(u64), .align = sizeof(u64),
+		.get = fpregs64_get, .set = fpregs64_set
+	},
+};
+
+static const struct user_regset_view user_sparc64_view = {
+	.name = "sparc64", .e_machine = EM_SPARCV9,
+	.regsets = sparc64_regsets, .n = ARRAY_SIZE(sparc64_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int genregs32_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 void *kbuf, void __user *ubuf)
+{
+	const struct pt_regs *regs = task_pt_regs(target);
+	compat_ulong_t __user *reg_window;
+	compat_ulong_t *k = kbuf;
+	compat_ulong_t __user *u = ubuf;
+	compat_ulong_t reg;
+
+	if (target == current)
+		flushw_user();
+
+	pos /= sizeof(reg);
+	count /= sizeof(reg);
+
+	if (kbuf) {
+		for (; count > 0 && pos < 16; count--)
+			*k++ = regs->u_regs[pos++];
+
+		reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		if (target == current) {
+			for (; count > 0 && pos < 32; count--) {
+				if (get_user(*k++, &reg_window[pos++]))
+					return -EFAULT;
+			}
+		} else {
+			for (; count > 0 && pos < 32; count--) {
+				if (access_process_vm(target,
+						      (unsigned long)
+						      &reg_window[pos],
+						      k, sizeof(*k),
+						      FOLL_FORCE)
+				    != sizeof(*k))
+					return -EFAULT;
+				k++;
+				pos++;
+			}
+		}
+	} else {
+		for (; count > 0 && pos < 16; count--) {
+			if (put_user((compat_ulong_t) regs->u_regs[pos++], u++))
+				return -EFAULT;
+		}
+
+		reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		if (target == current) {
+			for (; count > 0 && pos < 32; count--) {
+				if (get_user(reg, &reg_window[pos++]) ||
+				    put_user(reg, u++))
+					return -EFAULT;
+			}
+		} else {
+			for (; count > 0 && pos < 32; count--) {
+				if (access_process_vm(target,
+						      (unsigned long)
+						      &reg_window[pos],
+						      &reg, sizeof(reg),
+						      FOLL_FORCE)
+				    != sizeof(reg))
+					return -EFAULT;
+				if (access_process_vm(target,
+						      (unsigned long) u,
+						      &reg, sizeof(reg),
+						      FOLL_FORCE | FOLL_WRITE)
+				    != sizeof(reg))
+					return -EFAULT;
+				pos++;
+				u++;
+			}
+		}
+	}
+	while (count > 0) {
+		switch (pos) {
+		case 32: /* PSR */
+			reg = tstate_to_psr(regs->tstate);
+			break;
+		case 33: /* PC */
+			reg = regs->tpc;
+			break;
+		case 34: /* NPC */
+			reg = regs->tnpc;
+			break;
+		case 35: /* Y */
+			reg = regs->y;
+			break;
+		case 36: /* WIM */
+		case 37: /* TBR */
+			reg = 0;
+			break;
+		default:
+			goto finish;
+		}
+
+		if (kbuf)
+			*k++ = reg;
+		else if (put_user(reg, u++))
+			return -EFAULT;
+		pos++;
+		count--;
+	}
+finish:
+	pos *= sizeof(reg);
+	count *= sizeof(reg);
+
+	return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					38 * sizeof(reg), -1);
+}
+
+static int genregs32_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	compat_ulong_t __user *reg_window;
+	const compat_ulong_t *k = kbuf;
+	const compat_ulong_t __user *u = ubuf;
+	compat_ulong_t reg;
+
+	if (target == current)
+		flushw_user();
+
+	pos /= sizeof(reg);
+	count /= sizeof(reg);
+
+	if (kbuf) {
+		for (; count > 0 && pos < 16; count--)
+			regs->u_regs[pos++] = *k++;
+
+		reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		if (target == current) {
+			for (; count > 0 && pos < 32; count--) {
+				if (put_user(*k++, &reg_window[pos++]))
+					return -EFAULT;
+			}
+		} else {
+			for (; count > 0 && pos < 32; count--) {
+				if (access_process_vm(target,
+						      (unsigned long)
+						      &reg_window[pos],
+						      (void *) k,
+						      sizeof(*k),
+						      FOLL_FORCE | FOLL_WRITE)
+				    != sizeof(*k))
+					return -EFAULT;
+				k++;
+				pos++;
+			}
+		}
+	} else {
+		for (; count > 0 && pos < 16; count--) {
+			if (get_user(reg, u++))
+				return -EFAULT;
+			regs->u_regs[pos++] = reg;
+		}
+
+		reg_window = (compat_ulong_t __user *) regs->u_regs[UREG_I6];
+		reg_window -= 16;
+		if (target == current) {
+			for (; count > 0 && pos < 32; count--) {
+				if (get_user(reg, u++) ||
+				    put_user(reg, &reg_window[pos++]))
+					return -EFAULT;
+			}
+		} else {
+			for (; count > 0 && pos < 32; count--) {
+				if (access_process_vm(target,
+						      (unsigned long)
+						      u,
+						      &reg, sizeof(reg),
+						      FOLL_FORCE)
+				    != sizeof(reg))
+					return -EFAULT;
+				if (access_process_vm(target,
+						      (unsigned long)
+						      &reg_window[pos],
+						      &reg, sizeof(reg),
+						      FOLL_FORCE | FOLL_WRITE)
+				    != sizeof(reg))
+					return -EFAULT;
+				pos++;
+				u++;
+			}
+		}
+	}
+	while (count > 0) {
+		unsigned long tstate;
+
+		if (kbuf)
+			reg = *k++;
+		else if (get_user(reg, u++))
+			return -EFAULT;
+
+		switch (pos) {
+		case 32: /* PSR */
+			tstate = regs->tstate;
+			tstate &= ~(TSTATE_ICC | TSTATE_XCC | TSTATE_SYSCALL);
+			tstate |= psr_to_tstate_icc(reg);
+			if (reg & PSR_SYSCALL)
+				tstate |= TSTATE_SYSCALL;
+			regs->tstate = tstate;
+			break;
+		case 33: /* PC */
+			regs->tpc = reg;
+			break;
+		case 34: /* NPC */
+			regs->tnpc = reg;
+			break;
+		case 35: /* Y */
+			regs->y = reg;
+			break;
+		case 36: /* WIM */
+		case 37: /* TBR */
+			break;
+		default:
+			goto finish;
+		}
+
+		pos++;
+		count--;
+	}
+finish:
+	pos *= sizeof(reg);
+	count *= sizeof(reg);
+
+	return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					 38 * sizeof(reg), -1);
+}
+
+static int fpregs32_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	const unsigned long *fpregs = task_thread_info(target)->fpregs;
+	compat_ulong_t enabled;
+	unsigned long fprs;
+	compat_ulong_t fsr;
+	int ret = 0;
+
+	if (target == current)
+		save_and_clear_fpu();
+
+	fprs = task_thread_info(target)->fpsaved[0];
+	if (fprs & FPRS_FEF) {
+		fsr = task_thread_info(target)->xfsr[0];
+		enabled = 1;
+	} else {
+		fsr = 0;
+		enabled = 0;
+	}
+
+	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				  fpregs,
+				  0, 32 * sizeof(u32));
+
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       32 * sizeof(u32),
+					       33 * sizeof(u32));
+	if (!ret)
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &fsr,
+					  33 * sizeof(u32),
+					  34 * sizeof(u32));
+
+	if (!ret) {
+		compat_ulong_t val;
+
+		val = (enabled << 8) | (8 << 16);
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+					  &val,
+					  34 * sizeof(u32),
+					  35 * sizeof(u32));
+	}
+
+	if (!ret)
+		ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+					       35 * sizeof(u32), -1);
+
+	return ret;
+}
+
+static int fpregs32_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	unsigned long *fpregs = task_thread_info(target)->fpregs;
+	unsigned long fprs;
+	int ret;
+
+	if (target == current)
+		save_and_clear_fpu();
+
+	fprs = task_thread_info(target)->fpsaved[0];
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 fpregs,
+				 0, 32 * sizeof(u32));
+	if (!ret)
+		user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+					  32 * sizeof(u32),
+					  33 * sizeof(u32));
+	if (!ret && count > 0) {
+		compat_ulong_t fsr;
+		unsigned long val;
+
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					 &fsr,
+					 33 * sizeof(u32),
+					 34 * sizeof(u32));
+		if (!ret) {
+			val = task_thread_info(target)->xfsr[0];
+			val &= 0xffffffff00000000UL;
+			val |= fsr;
+			task_thread_info(target)->xfsr[0] = val;
+		}
+	}
+
+	fprs |= (FPRS_FEF | FPRS_DL);
+	task_thread_info(target)->fpsaved[0] = fprs;
+
+	if (!ret)
+		ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+						34 * sizeof(u32), -1);
+	return ret;
+}
+
+static const struct user_regset sparc32_regsets[] = {
+	/* Format is:
+	 * 	G0 --> G7
+	 *	O0 --> O7
+	 *	L0 --> L7
+	 *	I0 --> I7
+	 *	PSR, PC, nPC, Y, WIM, TBR
+	 */
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = 38,
+		.size = sizeof(u32), .align = sizeof(u32),
+		.get = genregs32_get, .set = genregs32_set
+	},
+	/* Format is:
+	 *	F0 --> F31
+	 *	empty 32-bit word
+	 *	FSR (32--bit word)
+	 *	FPU QUEUE COUNT (8-bit char)
+	 *	FPU QUEUE ENTRYSIZE (8-bit char)
+	 *	FPU ENABLED (8-bit char)
+	 *	empty 8-bit char
+	 *	FPU QUEUE (64 32-bit ints)
+	 */
+	[REGSET_FP] = {
+		.core_note_type = NT_PRFPREG,
+		.n = 99,
+		.size = sizeof(u32), .align = sizeof(u32),
+		.get = fpregs32_get, .set = fpregs32_set
+	},
+};
+
+static const struct user_regset_view user_sparc32_view = {
+	.name = "sparc", .e_machine = EM_SPARC,
+	.regsets = sparc32_regsets, .n = ARRAY_SIZE(sparc32_regsets)
+};
+#endif /* CONFIG_COMPAT */
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_32BIT))
+		return &user_sparc32_view;
+#endif
+	return &user_sparc64_view;
+}
+
+#ifdef CONFIG_COMPAT
+struct compat_fps {
+	unsigned int regs[32];
+	unsigned int fsr;
+	unsigned int flags;
+	unsigned int extra;
+	unsigned int fpqd;
+	struct compat_fq {
+		unsigned int insnaddr;
+		unsigned int insn;
+	} fpq[16];
+};
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
+{
+	const struct user_regset_view *view = task_user_regset_view(current);
+	compat_ulong_t caddr2 = task_pt_regs(current)->u_regs[UREG_I4];
+	struct pt_regs32 __user *pregs;
+	struct compat_fps __user *fps;
+	unsigned long addr2 = caddr2;
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
+	int ret;
+
+	pregs = (struct pt_regs32 __user *) addr;
+	fps = (struct compat_fps __user *) addr;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		ret = (addr != 0) ? -EIO : 0;
+		break;
+
+	case PTRACE_GETREGS:
+		ret = copy_regset_to_user(child, view, REGSET_GENERAL,
+					  32 * sizeof(u32),
+					  4 * sizeof(u32),
+					  &pregs->psr);
+		if (!ret)
+			ret = copy_regset_to_user(child, view, REGSET_GENERAL,
+						  1 * sizeof(u32),
+						  15 * sizeof(u32),
+						  &pregs->u_regs[0]);
+		break;
+
+	case PTRACE_SETREGS:
+		ret = copy_regset_from_user(child, view, REGSET_GENERAL,
+					    32 * sizeof(u32),
+					    4 * sizeof(u32),
+					    &pregs->psr);
+		if (!ret)
+			ret = copy_regset_from_user(child, view, REGSET_GENERAL,
+						    1 * sizeof(u32),
+						    15 * sizeof(u32),
+						    &pregs->u_regs[0]);
+		break;
+
+	case PTRACE_GETFPREGS:
+		ret = copy_regset_to_user(child, view, REGSET_FP,
+					  0 * sizeof(u32),
+					  32 * sizeof(u32),
+					  &fps->regs[0]);
+		if (!ret)
+			ret = copy_regset_to_user(child, view, REGSET_FP,
+						  33 * sizeof(u32),
+						  1 * sizeof(u32),
+						  &fps->fsr);
+		if (!ret) {
+			if (__put_user(0, &fps->flags) ||
+			    __put_user(0, &fps->extra) ||
+			    __put_user(0, &fps->fpqd) ||
+			    clear_user(&fps->fpq[0], 32 * sizeof(unsigned int)))
+				ret = -EFAULT;
+		}
+		break;
+
+	case PTRACE_SETFPREGS:
+		ret = copy_regset_from_user(child, view, REGSET_FP,
+					    0 * sizeof(u32),
+					    32 * sizeof(u32),
+					    &fps->regs[0]);
+		if (!ret)
+			ret = copy_regset_from_user(child, view, REGSET_FP,
+						    33 * sizeof(u32),
+						    1 * sizeof(u32),
+						    &fps->fsr);
+		break;
+
+	case PTRACE_READTEXT:
+	case PTRACE_READDATA:
+		ret = ptrace_readdata(child, addr,
+				      (char __user *)addr2, data);
+		if (ret == data)
+			ret = 0;
+		else if (ret >= 0)
+			ret = -EIO;
+		break;
+
+	case PTRACE_WRITETEXT:
+	case PTRACE_WRITEDATA:
+		ret = ptrace_writedata(child, (char __user *) addr2,
+				       addr, data);
+		if (ret == data)
+			ret = 0;
+		else if (ret >= 0)
+			ret = -EIO;
+		break;
+
+	default:
+		if (request == PTRACE_SPARC_DETACH)
+			request = PTRACE_DETACH;
+		ret = compat_ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+struct fps {
+	unsigned int regs[64];
+	unsigned long fsr;
+};
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	const struct user_regset_view *view = task_user_regset_view(current);
+	unsigned long addr2 = task_pt_regs(current)->u_regs[UREG_I4];
+	struct pt_regs __user *pregs;
+	struct fps __user *fps;
+	void __user *addr2p;
+	int ret;
+
+	pregs = (struct pt_regs __user *) addr;
+	fps = (struct fps __user *) addr;
+	addr2p = (void __user *) addr2;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		ret = (addr != 0) ? -EIO : 0;
+		break;
+
+	case PTRACE_GETREGS64:
+		ret = copy_regset_to_user(child, view, REGSET_GENERAL,
+					  1 * sizeof(u64),
+					  15 * sizeof(u64),
+					  &pregs->u_regs[0]);
+		if (!ret) {
+			/* XXX doesn't handle 'y' register correctly XXX */
+			ret = copy_regset_to_user(child, view, REGSET_GENERAL,
+						  32 * sizeof(u64),
+						  4 * sizeof(u64),
+						  &pregs->tstate);
+		}
+		break;
+
+	case PTRACE_SETREGS64:
+		ret = copy_regset_from_user(child, view, REGSET_GENERAL,
+					    1 * sizeof(u64),
+					    15 * sizeof(u64),
+					    &pregs->u_regs[0]);
+		if (!ret) {
+			/* XXX doesn't handle 'y' register correctly XXX */
+			ret = copy_regset_from_user(child, view, REGSET_GENERAL,
+						    32 * sizeof(u64),
+						    4 * sizeof(u64),
+						    &pregs->tstate);
+		}
+		break;
+
+	case PTRACE_GETFPREGS64:
+		ret = copy_regset_to_user(child, view, REGSET_FP,
+					  0 * sizeof(u64),
+					  33 * sizeof(u64),
+					  fps);
+		break;
+
+	case PTRACE_SETFPREGS64:
+		ret = copy_regset_from_user(child, view, REGSET_FP,
+					  0 * sizeof(u64),
+					  33 * sizeof(u64),
+					  fps);
+		break;
+
+	case PTRACE_READTEXT:
+	case PTRACE_READDATA:
+		ret = ptrace_readdata(child, addr, addr2p, data);
+		if (ret == data)
+			ret = 0;
+		else if (ret >= 0)
+			ret = -EIO;
+		break;
+
+	case PTRACE_WRITETEXT:
+	case PTRACE_WRITEDATA:
+		ret = ptrace_writedata(child, addr2p, addr, data);
+		if (ret == data)
+			ret = 0;
+		else if (ret >= 0)
+			ret = -EIO;
+		break;
+
+	default:
+		if (request == PTRACE_SPARC_DETACH)
+			request = PTRACE_DETACH;
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
+}
+
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
+{
+	int ret = 0;
+
+	/* do the secure computing check first */
+	secure_computing_strict(regs->u_regs[UREG_G1]);
+
+	if (test_thread_flag(TIF_NOHZ))
+		user_exit();
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		ret = tracehook_report_syscall_entry(regs);
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_enter(regs, regs->u_regs[UREG_G1]);
+
+	audit_syscall_entry(regs->u_regs[UREG_G1], regs->u_regs[UREG_I0],
+			    regs->u_regs[UREG_I1], regs->u_regs[UREG_I2],
+			    regs->u_regs[UREG_I3]);
+
+	return ret;
+}
+
+asmlinkage void syscall_trace_leave(struct pt_regs *regs)
+{
+	if (test_thread_flag(TIF_NOHZ))
+		user_exit();
+
+	audit_syscall_exit(regs);
+
+	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+		trace_sys_exit(regs, regs->u_regs[UREG_I0]);
+
+	if (test_thread_flag(TIF_SYSCALL_TRACE))
+		tracehook_report_syscall_exit(regs, 0);
+
+	if (test_thread_flag(TIF_NOHZ))
+		user_enter();
+}
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:	the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+	const struct pt_regs_offset *roff;
+
+	for (roff = regoffset_table; roff->name != NULL; roff++)
+		if (!strcmp(roff->name, name))
+			return roff->offset;
+	return -EINVAL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @addr:	address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static inline int regs_within_kernel_stack(struct pt_regs *regs,
+					   unsigned long addr)
+{
+	unsigned long ksp = kernel_stack_pointer(regs) + STACK_BIAS;
+	return ((addr & ~(THREAD_SIZE - 1))  ==
+		(ksp & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:	pt_regs which contains kernel stack pointer.
+ * @n:		stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long ksp = kernel_stack_pointer(regs) + STACK_BIAS;
+	unsigned long *addr = (unsigned long *)ksp;
+	addr += n;
+	if (regs_within_kernel_stack(regs, (unsigned long)addr))
+		return *addr;
+	else
+		return 0;
+}
diff --git a/arch/sparc/kernel/reboot.c b/arch/sparc/kernel/reboot.c
new file mode 100644
index 0000000..7933ee3
--- /dev/null
+++ b/arch/sparc/kernel/reboot.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* reboot.c: reboot/shutdown/halt/poweroff handling
+ *
+ * Copyright (C) 2008 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/export.h>
+#include <linux/pm.h>
+
+#include <asm/oplib.h>
+#include <asm/prom.h>
+#include <asm/setup.h>
+
+/* sysctl - toggle power-off restriction for serial console
+ * systems in machine_power_off()
+ */
+int scons_pwroff = 1;
+
+/* This isn't actually used, it exists merely to satisfy the
+ * reference in kernel/sys.c
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL(pm_power_off);
+
+void machine_power_off(void)
+{
+	if (strcmp(of_console_device->type, "serial") || scons_pwroff)
+		prom_halt_power_off();
+
+	prom_halt();
+}
+
+void machine_halt(void)
+{
+	prom_halt();
+	panic("Halt failed!");
+}
+
+void machine_restart(char *cmd)
+{
+	char *p;
+
+	p = strchr(reboot_command, '\n');
+	if (p)
+		*p = 0;
+	if (cmd)
+		prom_reboot(cmd);
+	if (*reboot_command)
+		prom_reboot(reboot_command);
+	prom_reboot("");
+	panic("Reboot failed!");
+}
+
diff --git a/arch/sparc/kernel/rtrap_32.S b/arch/sparc/kernel/rtrap_32.S
new file mode 100644
index 0000000..dca8ed8
--- /dev/null
+++ b/arch/sparc/kernel/rtrap_32.S
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * rtrap.S: Return from Sparc trap low-level code.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/asi.h>
+#include <asm/smp.h>
+#include <asm/contregs.h>
+#include <asm/winmacro.h>
+#include <asm/asmmacro.h>
+#include <asm/thread_info.h>
+
+#define t_psr     l0
+#define t_pc      l1
+#define t_npc     l2
+#define t_wim     l3
+#define twin_tmp1 l4
+#define glob_tmp  g4
+#define curptr    g6
+
+	/* 7 WINDOW SPARC PATCH INSTRUCTIONS */
+	.globl	rtrap_7win_patch1, rtrap_7win_patch2, rtrap_7win_patch3
+	.globl	rtrap_7win_patch4, rtrap_7win_patch5
+rtrap_7win_patch1:	srl	%t_wim, 0x6, %glob_tmp
+rtrap_7win_patch2:	and	%glob_tmp, 0x7f, %glob_tmp
+rtrap_7win_patch3:	srl	%g1, 7, %g2
+rtrap_7win_patch4:	srl	%g2, 6, %g2
+rtrap_7win_patch5:	and	%g1, 0x7f, %g1
+	/* END OF PATCH INSTRUCTIONS */
+
+	/* We need to check for a few things which are:
+	 * 1) The need to call schedule() because this
+	 *    processes quantum is up.
+	 * 2) Pending signals for this process, if any
+	 *    exist we need to call do_signal() to do
+	 *    the needy.
+	 *
+	 * Else we just check if the rett would land us
+	 * in an invalid window, if so we need to grab
+	 * it off the user/kernel stack first.
+	 */
+
+	.globl	ret_trap_entry, rtrap_patch1, rtrap_patch2
+	.globl	rtrap_patch3, rtrap_patch4, rtrap_patch5
+	.globl	ret_trap_lockless_ipi
+ret_trap_entry:
+ret_trap_lockless_ipi:
+	andcc	%t_psr, PSR_PS, %g0
+	sethi	%hi(PSR_SYSCALL), %g1
+	be	1f
+	 andn	%t_psr, %g1, %t_psr
+
+	wr	%t_psr, 0x0, %psr
+	b	ret_trap_kernel
+	 nop
+
+1:
+	ld	[%curptr + TI_FLAGS], %g2
+	andcc	%g2, (_TIF_NEED_RESCHED), %g0
+	be	signal_p
+	 nop
+
+	call	schedule
+	 nop
+
+	ld	[%curptr + TI_FLAGS], %g2
+signal_p:
+	andcc	%g2, _TIF_DO_NOTIFY_RESUME_MASK, %g0
+	bz,a	ret_trap_continue
+	 ld	[%sp + STACKFRAME_SZ + PT_PSR], %t_psr
+
+	mov	%g2, %o2
+	mov	%l5, %o1
+	call	do_notify_resume
+	 add	%sp, STACKFRAME_SZ, %o0	! pt_regs ptr
+
+	b	signal_p
+	 ld	[%curptr + TI_FLAGS], %g2
+
+ret_trap_continue:
+	sethi	%hi(PSR_SYSCALL), %g1
+	andn	%t_psr, %g1, %t_psr
+	wr	%t_psr, 0x0, %psr
+	WRITE_PAUSE
+
+	ld	[%curptr + TI_W_SAVED], %twin_tmp1
+	orcc	%g0, %twin_tmp1, %g0
+	be	ret_trap_nobufwins
+	 nop
+
+	wr	%t_psr, PSR_ET, %psr
+	WRITE_PAUSE
+
+	mov	1, %o1
+	call	try_to_clear_window_buffer
+	 add	%sp, STACKFRAME_SZ, %o0
+
+	b	signal_p
+	 ld	[%curptr + TI_FLAGS], %g2
+
+ret_trap_nobufwins:
+	/* Load up the user's out registers so we can pull
+	 * a window from the stack, if necessary.
+	 */
+	LOAD_PT_INS(sp)
+
+	/* If there are already live user windows in the
+	 * set we can return from trap safely.
+	 */
+	ld	[%curptr + TI_UWINMASK], %twin_tmp1
+	orcc	%g0, %twin_tmp1, %g0
+	bne	ret_trap_userwins_ok
+	 nop
+
+		/* Calculate new %wim, we have to pull a register
+		 * window from the users stack.
+		 */
+ret_trap_pull_one_window:
+		rd	%wim, %t_wim
+		sll	%t_wim, 0x1, %twin_tmp1
+rtrap_patch1:	srl	%t_wim, 0x7, %glob_tmp
+		or	%glob_tmp, %twin_tmp1, %glob_tmp
+rtrap_patch2:	and	%glob_tmp, 0xff, %glob_tmp
+
+		wr	%glob_tmp, 0x0, %wim
+
+	/* Here comes the architecture specific
+	 * branch to the user stack checking routine
+	 * for return from traps.
+	 */
+	b	srmmu_rett_stackchk
+	 andcc	%fp, 0x7, %g0
+
+ret_trap_userwins_ok:
+	LOAD_PT_PRIV(sp, t_psr, t_pc, t_npc)
+	or	%t_pc, %t_npc, %g2
+	andcc	%g2, 0x3, %g0
+	sethi	%hi(PSR_SYSCALL), %g2
+	be	1f
+	 andn	%t_psr, %g2, %t_psr
+
+	b	ret_trap_unaligned_pc
+	 add	%sp, STACKFRAME_SZ, %o0
+
+1:
+	LOAD_PT_YREG(sp, g1)
+	LOAD_PT_GLOBALS(sp)
+
+	wr	%t_psr, 0x0, %psr
+	WRITE_PAUSE
+
+	jmp	%t_pc
+	rett	%t_npc
+	
+ret_trap_unaligned_pc:
+	ld	[%sp + STACKFRAME_SZ + PT_PC], %o1
+	ld	[%sp + STACKFRAME_SZ + PT_NPC], %o2
+	ld	[%sp + STACKFRAME_SZ + PT_PSR], %o3
+
+	wr	%t_wim, 0x0, %wim		! or else...
+
+	wr	%t_psr, PSR_ET, %psr
+	WRITE_PAUSE
+
+	call	do_memaccess_unaligned
+	 nop
+
+	b	signal_p
+	 ld	[%curptr + TI_FLAGS], %g2
+
+ret_trap_kernel:
+		/* Will the rett land us in the invalid window? */
+		mov	2, %g1
+		sll	%g1, %t_psr, %g1
+rtrap_patch3:	srl	%g1, 8, %g2
+		or	%g1, %g2, %g1
+		rd	%wim, %g2
+		andcc	%g2, %g1, %g0
+		be	1f		! Nope, just return from the trap
+		 sll	%g2, 0x1, %g1
+
+		/* We have to grab a window before returning. */
+rtrap_patch4:	srl	%g2, 7,  %g2
+		or	%g1, %g2, %g1
+rtrap_patch5:	and	%g1, 0xff, %g1
+
+	wr	%g1, 0x0, %wim
+
+	/* Grrr, make sure we load from the right %sp... */
+	LOAD_PT_ALL(sp, t_psr, t_pc, t_npc, g1)
+
+	restore	%g0, %g0, %g0
+	LOAD_WINDOW(sp)
+	b	2f
+	 save	%g0, %g0, %g0
+
+	/* Reload the entire frame in case this is from a
+	 * kernel system call or whatever...
+	 */
+1:
+	LOAD_PT_ALL(sp, t_psr, t_pc, t_npc, g1)
+2:
+	sethi	%hi(PSR_SYSCALL), %twin_tmp1
+	andn	%t_psr, %twin_tmp1, %t_psr
+	wr	%t_psr, 0x0, %psr
+	WRITE_PAUSE
+
+	jmp	%t_pc
+	rett	%t_npc
+
+ret_trap_user_stack_is_bolixed:
+	wr	%t_wim, 0x0, %wim
+
+	wr	%t_psr, PSR_ET, %psr
+	WRITE_PAUSE
+
+	call	window_ret_fault
+	 add	%sp, STACKFRAME_SZ, %o0
+
+	b	signal_p
+	 ld	[%curptr + TI_FLAGS], %g2
+
+	.globl	srmmu_rett_stackchk
+srmmu_rett_stackchk:
+	bne	ret_trap_user_stack_is_bolixed
+	 sethi   %hi(PAGE_OFFSET), %g1
+	cmp	%g1, %fp
+	bleu	ret_trap_user_stack_is_bolixed
+	 mov	AC_M_SFSR, %g1
+LEON_PI(lda	[%g1] ASI_LEON_MMUREGS, %g0)
+SUN_PI_(lda	[%g1] ASI_M_MMUREGS, %g0)
+
+LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %g1)
+SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %g1)
+	or	%g1, 0x2, %g1
+LEON_PI(sta	%g1, [%g0] ASI_LEON_MMUREGS)
+SUN_PI_(sta	%g1, [%g0] ASI_M_MMUREGS)
+
+	restore	%g0, %g0, %g0
+
+	LOAD_WINDOW(sp)
+
+	save	%g0, %g0, %g0
+
+	andn	%g1, 0x2, %g1
+LEON_PI(sta	%g1, [%g0] ASI_LEON_MMUREGS)
+SUN_PI_(sta	%g1, [%g0] ASI_M_MMUREGS)
+
+	mov	AC_M_SFAR, %g2
+LEON_PI(lda	[%g2] ASI_LEON_MMUREGS, %g2)
+SUN_PI_(lda	[%g2] ASI_M_MMUREGS, %g2)
+
+	mov	AC_M_SFSR, %g1
+LEON_PI(lda	[%g1] ASI_LEON_MMUREGS, %g1)
+SUN_PI_(lda	[%g1] ASI_M_MMUREGS, %g1)
+	andcc	%g1, 0x2, %g0
+	be	ret_trap_userwins_ok
+	 nop
+
+	b,a	ret_trap_user_stack_is_bolixed
diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S
new file mode 100644
index 0000000..29aa34f
--- /dev/null
+++ b/arch/sparc/kernel/rtrap_64.S
@@ -0,0 +1,378 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * rtrap.S: Preparing for return from trap on Sparc V9.
+ *
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+
+#include <asm/asi.h>
+#include <asm/pstate.h>
+#include <asm/ptrace.h>
+#include <asm/spitfire.h>
+#include <asm/head.h>
+#include <asm/visasm.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_CONTEXT_TRACKING
+# define SCHEDULE_USER schedule_user
+#else
+# define SCHEDULE_USER schedule
+#endif
+
+		.text
+		.align			32
+__handle_preemption:
+		call			SCHEDULE_USER
+661:		 wrpr			%g0, RTRAP_PSTATE, %pstate
+		/* If userspace is using ADI, it could potentially pass
+		 * a pointer with version tag embedded in it. To maintain
+		 * the ADI security, we must re-enable PSTATE.mcde before
+		 * we continue execution in the kernel for another thread.
+		 */
+		.section .sun_m7_1insn_patch, "ax"
+		.word	661b
+		 wrpr			%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
+		.previous
+		ba,pt			%xcc, __handle_preemption_continue
+		 wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
+
+__handle_user_windows:
+		add			%sp, PTREGS_OFF, %o0
+		call			fault_in_user_windows
+661:		 wrpr			%g0, RTRAP_PSTATE, %pstate
+		/* If userspace is using ADI, it could potentially pass
+		 * a pointer with version tag embedded in it. To maintain
+		 * the ADI security, we must re-enable PSTATE.mcde before
+		 * we continue execution in the kernel for another thread.
+		 */
+		.section .sun_m7_1insn_patch, "ax"
+		.word	661b
+		 wrpr			%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
+		.previous
+		ba,pt			%xcc, __handle_preemption_continue
+		 wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
+
+__handle_userfpu:
+		rd			%fprs, %l5
+		andcc			%l5, FPRS_FEF, %g0
+		sethi			%hi(TSTATE_PEF), %o0
+		be,a,pn			%icc, __handle_userfpu_continue
+		 andn			%l1, %o0, %l1
+		ba,a,pt			%xcc, __handle_userfpu_continue
+
+__handle_signal:
+		mov			%l5, %o1
+		add			%sp, PTREGS_OFF, %o0
+		mov			%l0, %o2
+		call			do_notify_resume
+661:		 wrpr			%g0, RTRAP_PSTATE, %pstate
+		/* If userspace is using ADI, it could potentially pass
+		 * a pointer with version tag embedded in it. To maintain
+		 * the ADI security, we must re-enable PSTATE.mcde before
+		 * we continue execution in the kernel for another thread.
+		 */
+		.section .sun_m7_1insn_patch, "ax"
+		.word	661b
+		 wrpr			%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
+		.previous
+		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
+
+		/* Signal delivery can modify pt_regs tstate, so we must
+		 * reload it.
+		 */
+		ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+		sethi			%hi(0xf << 20), %l4
+		and			%l1, %l4, %l4
+		andn			%l1, %l4, %l1
+		ba,pt			%xcc, __handle_preemption_continue
+		 srl			%l4, 20, %l4
+
+		/* When returning from a NMI (%pil==15) interrupt we want to
+		 * avoid running softirqs, doing IRQ tracing, preempting, etc.
+		 */
+		.globl			rtrap_nmi
+rtrap_nmi:	ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+		sethi			%hi(0xf << 20), %l4
+		and			%l1, %l4, %l4
+		andn			%l1, %l4, %l1
+		srl			%l4, 20, %l4
+		ba,pt			%xcc, rtrap_no_irq_enable
+		nop
+		/* Do not actually set the %pil here.  We will do that
+		 * below after we clear PSTATE_IE in the %pstate register.
+		 * If we re-enable interrupts here, we can recurse down
+		 * the hardirq stack potentially endlessly, causing a
+		 * stack overflow.
+		 */
+
+		.align			64
+		.globl			rtrap_irq, rtrap, irqsz_patchme, rtrap_xcall
+rtrap_irq:
+rtrap:
+		/* mm/ultra.S:xcall_report_regs KNOWS about this load. */
+		ldx			[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+rtrap_xcall:
+		sethi			%hi(0xf << 20), %l4
+		and			%l1, %l4, %l4
+		andn			%l1, %l4, %l1
+		srl			%l4, 20, %l4
+#ifdef CONFIG_TRACE_IRQFLAGS
+		brnz,pn			%l4, rtrap_no_irq_enable
+		 nop
+		call			trace_hardirqs_on
+		 nop
+		/* Do not actually set the %pil here.  We will do that
+		 * below after we clear PSTATE_IE in the %pstate register.
+		 * If we re-enable interrupts here, we can recurse down
+		 * the hardirq stack potentially endlessly, causing a
+		 * stack overflow.
+		 *
+		 * It is tempting to put this test and trace_hardirqs_on
+		 * call at the 'rt_continue' label, but that will not work
+		 * as that path hits unconditionally and we do not want to
+		 * execute this in NMI return paths, for example.
+		 */
+#endif
+rtrap_no_irq_enable:
+		andcc			%l1, TSTATE_PRIV, %l3
+		bne,pn			%icc, to_kernel
+		 nop
+
+		/* We must hold IRQs off and atomically test schedule+signal
+		 * state, then hold them off all the way back to userspace.
+		 * If we are returning to kernel, none of this matters.  Note
+		 * that we are disabling interrupts via PSTATE_IE, not using
+		 * %pil.
+		 *
+		 * If we do not do this, there is a window where we would do
+		 * the tests, later the signal/resched event arrives but we do
+		 * not process it since we are still in kernel mode.  It would
+		 * take until the next local IRQ before the signal/resched
+		 * event would be handled.
+		 *
+		 * This also means that if we have to deal with user
+		 * windows, we have to redo all of these sched+signal checks
+		 * with IRQs disabled.
+		 */
+to_user:	wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
+		wrpr			0, %pil
+__handle_preemption_continue:
+		ldx			[%g6 + TI_FLAGS], %l0
+		sethi			%hi(_TIF_USER_WORK_MASK), %o0
+		or			%o0, %lo(_TIF_USER_WORK_MASK), %o0
+		andcc			%l0, %o0, %g0
+		sethi			%hi(TSTATE_PEF), %o0
+		be,pt			%xcc, user_nowork
+		 andcc			%l1, %o0, %g0
+		andcc			%l0, _TIF_NEED_RESCHED, %g0
+		bne,pn			%xcc, __handle_preemption
+		 andcc			%l0, _TIF_DO_NOTIFY_RESUME_MASK, %g0
+		bne,pn			%xcc, __handle_signal
+		 ldub			[%g6 + TI_WSAVED], %o2
+		brnz,pn			%o2, __handle_user_windows
+		 nop
+		sethi			%hi(TSTATE_PEF), %o0
+		andcc			%l1, %o0, %g0
+
+		/* This fpdepth clear is necessary for non-syscall rtraps only */
+user_nowork:
+		bne,pn			%xcc, __handle_userfpu
+		 stb			%g0, [%g6 + TI_FPDEPTH]
+__handle_userfpu_continue:
+
+rt_continue:	ldx			[%sp + PTREGS_OFF + PT_V9_G1], %g1
+		ldx			[%sp + PTREGS_OFF + PT_V9_G2], %g2
+
+		ldx			[%sp + PTREGS_OFF + PT_V9_G3], %g3
+		ldx			[%sp + PTREGS_OFF + PT_V9_G4], %g4
+		ldx			[%sp + PTREGS_OFF + PT_V9_G5], %g5
+		brz,pt			%l3, 1f
+		mov			%g6, %l2
+
+		/* Must do this before thread reg is clobbered below.  */
+		LOAD_PER_CPU_BASE(%g5, %g6, %i0, %i1, %i2)
+1:
+		ldx			[%sp + PTREGS_OFF + PT_V9_G6], %g6
+		ldx			[%sp + PTREGS_OFF + PT_V9_G7], %g7
+
+		/* Normal globals are restored, go to trap globals.  */
+661:		wrpr			%g0, RTRAP_PSTATE_AG_IRQOFF, %pstate
+		nop
+		.section		.sun4v_2insn_patch, "ax"
+		.word			661b
+		wrpr			%g0, RTRAP_PSTATE_IRQOFF, %pstate
+		SET_GL(1)
+		.previous
+
+		mov			%l2, %g6
+
+		ldx			[%sp + PTREGS_OFF + PT_V9_I0], %i0
+		ldx			[%sp + PTREGS_OFF + PT_V9_I1], %i1
+
+		ldx			[%sp + PTREGS_OFF + PT_V9_I2], %i2
+		ldx			[%sp + PTREGS_OFF + PT_V9_I3], %i3
+		ldx			[%sp + PTREGS_OFF + PT_V9_I4], %i4
+		ldx			[%sp + PTREGS_OFF + PT_V9_I5], %i5
+		ldx			[%sp + PTREGS_OFF + PT_V9_I6], %i6
+		ldx			[%sp + PTREGS_OFF + PT_V9_I7], %i7
+		ldx			[%sp + PTREGS_OFF + PT_V9_TPC], %l2
+		ldx			[%sp + PTREGS_OFF + PT_V9_TNPC], %o2
+
+		ld			[%sp + PTREGS_OFF + PT_V9_Y], %o3
+		wr			%o3, %g0, %y
+		wrpr			%l4, 0x0, %pil
+		wrpr			%g0, 0x1, %tl
+		andn			%l1, TSTATE_SYSCALL, %l1
+		wrpr			%l1, %g0, %tstate
+		wrpr			%l2, %g0, %tpc
+		wrpr			%o2, %g0, %tnpc
+
+		brnz,pn			%l3, kern_rtt
+		 mov			PRIMARY_CONTEXT, %l7
+
+661:		ldxa			[%l7 + %l7] ASI_DMMU, %l0
+		.section		.sun4v_1insn_patch, "ax"
+		.word			661b
+		ldxa			[%l7 + %l7] ASI_MMU, %l0
+		.previous
+
+		sethi			%hi(sparc64_kern_pri_nuc_bits), %l1
+		ldx			[%l1 + %lo(sparc64_kern_pri_nuc_bits)], %l1
+		or			%l0, %l1, %l0
+
+661:		stxa			%l0, [%l7] ASI_DMMU
+		.section		.sun4v_1insn_patch, "ax"
+		.word			661b
+		stxa			%l0, [%l7] ASI_MMU
+		.previous
+
+		sethi			%hi(KERNBASE), %l7
+		flush			%l7
+		rdpr			%wstate, %l1
+		rdpr			%otherwin, %l2
+		srl			%l1, 3, %l1
+
+661:		wrpr			%l2, %g0, %canrestore
+		.section		.fast_win_ctrl_1insn_patch, "ax"
+		.word			661b
+		.word			0x89880000	! normalw
+		.previous
+
+		wrpr			%l1, %g0, %wstate
+		brnz,pt			%l2, user_rtt_restore
+661:		 wrpr			%g0, %g0, %otherwin
+		.section		.fast_win_ctrl_1insn_patch, "ax"
+		.word			661b
+		 nop
+		.previous
+
+		ldx			[%g6 + TI_FLAGS], %g3
+		wr			%g0, ASI_AIUP, %asi
+		rdpr			%cwp, %g1
+		andcc			%g3, _TIF_32BIT, %g0
+		sub			%g1, 1, %g1
+		bne,pt			%xcc, user_rtt_fill_32bit
+		 wrpr			%g1, %cwp
+		ba,a,pt			%xcc, user_rtt_fill_64bit
+		 nop
+
+user_rtt_fill_fixup_dax:
+		ba,pt	%xcc, user_rtt_fill_fixup_common
+		 mov	1, %g3
+
+user_rtt_fill_fixup_mna:
+		ba,pt	%xcc, user_rtt_fill_fixup_common
+		 mov	2, %g3
+
+user_rtt_fill_fixup:
+		ba,pt	%xcc, user_rtt_fill_fixup_common
+		 clr	%g3
+
+user_rtt_pre_restore:
+		add			%g1, 1, %g1
+		wrpr			%g1, 0x0, %cwp
+
+user_rtt_restore:
+		restore
+		rdpr			%canrestore, %g1
+		wrpr			%g1, 0x0, %cleanwin
+		retry
+		nop
+
+kern_rtt:	rdpr			%canrestore, %g1
+		brz,pn			%g1, kern_rtt_fill
+		 nop
+kern_rtt_restore:
+		stw			%g0, [%sp + PTREGS_OFF + PT_V9_MAGIC]
+		restore
+		retry
+
+to_kernel:
+#ifdef CONFIG_PREEMPT
+		ldsw			[%g6 + TI_PRE_COUNT], %l5
+		brnz			%l5, kern_fpucheck
+		 ldx			[%g6 + TI_FLAGS], %l5
+		andcc			%l5, _TIF_NEED_RESCHED, %g0
+		be,pt			%xcc, kern_fpucheck
+		 nop
+		cmp			%l4, 0
+		bne,pn			%xcc, kern_fpucheck
+		 nop
+		call			preempt_schedule_irq
+		 nop
+		ba,pt			%xcc, rtrap
+#endif
+kern_fpucheck:	ldub			[%g6 + TI_FPDEPTH], %l5
+		brz,pt			%l5, rt_continue
+		 srl			%l5, 1, %o0
+		add			%g6, TI_FPSAVED, %l6
+		ldub			[%l6 + %o0], %l2
+		sub			%l5, 2, %l5
+
+		add			%g6, TI_GSR, %o1
+		andcc			%l2, (FPRS_FEF|FPRS_DU), %g0
+		be,pt			%icc, 2f
+		 and			%l2, FPRS_DL, %l6
+		andcc			%l2, FPRS_FEF, %g0
+		be,pn			%icc, 5f
+		 sll			%o0, 3, %o5
+		rd			%fprs, %g1
+
+		wr			%g1, FPRS_FEF, %fprs
+		ldx			[%o1 + %o5], %g1
+		add			%g6, TI_XFSR, %o1
+		sll			%o0, 8, %o2
+		add			%g6, TI_FPREGS, %o3
+		brz,pn			%l6, 1f
+		 add			%g6, TI_FPREGS+0x40, %o4
+
+		membar			#Sync
+		ldda			[%o3 + %o2] ASI_BLK_P, %f0
+		ldda			[%o4 + %o2] ASI_BLK_P, %f16
+		membar			#Sync
+1:		andcc			%l2, FPRS_DU, %g0
+		be,pn			%icc, 1f
+		 wr			%g1, 0, %gsr
+		add			%o2, 0x80, %o2
+		membar			#Sync
+		ldda			[%o3 + %o2] ASI_BLK_P, %f32
+		ldda			[%o4 + %o2] ASI_BLK_P, %f48
+1:		membar			#Sync
+		ldx			[%o1 + %o5], %fsr
+2:		stb			%l5, [%g6 + TI_FPDEPTH]
+		ba,pt			%xcc, rt_continue
+		 nop
+5:		wr			%g0, FPRS_FEF, %fprs
+		sll			%o0, 8, %o2
+
+		add			%g6, TI_FPREGS+0x80, %o3
+		add			%g6, TI_FPREGS+0xc0, %o4
+		membar			#Sync
+		ldda			[%o3 + %o2] ASI_BLK_P, %f32
+		ldda			[%o4 + %o2] ASI_BLK_P, %f48
+		membar			#Sync
+		wr			%g0, FPRS_DU, %fprs
+		ba,pt			%xcc, rt_continue
+		 stb			%l5, [%g6 + TI_FPDEPTH]
diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c
new file mode 100644
index 0000000..c133dfc
--- /dev/null
+++ b/arch/sparc/kernel/sbus.c
@@ -0,0 +1,677 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sbus.c: UltraSparc SBUS controller support.
+ *
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/page.h>
+#include <asm/io.h>
+#include <asm/upa.h>
+#include <asm/cache.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+#include <asm/prom.h>
+#include <asm/oplib.h>
+#include <asm/starfire.h>
+
+#include "iommu_common.h"
+
+#define MAP_BASE	((u32)0xc0000000)
+
+/* Offsets from iommu_regs */
+#define SYSIO_IOMMUREG_BASE	0x2400UL
+#define IOMMU_CONTROL	(0x2400UL - 0x2400UL)	/* IOMMU control register */
+#define IOMMU_TSBBASE	(0x2408UL - 0x2400UL)	/* TSB base address register */
+#define IOMMU_FLUSH	(0x2410UL - 0x2400UL)	/* IOMMU flush register */
+#define IOMMU_VADIAG	(0x4400UL - 0x2400UL)	/* SBUS virtual address diagnostic */
+#define IOMMU_TAGCMP	(0x4408UL - 0x2400UL)	/* TLB tag compare diagnostics */
+#define IOMMU_LRUDIAG	(0x4500UL - 0x2400UL)	/* IOMMU LRU queue diagnostics */
+#define IOMMU_TAGDIAG	(0x4580UL - 0x2400UL)	/* TLB tag diagnostics */
+#define IOMMU_DRAMDIAG	(0x4600UL - 0x2400UL)	/* TLB data RAM diagnostics */
+
+#define IOMMU_DRAM_VALID	(1UL << 30UL)
+
+/* Offsets from strbuf_regs */
+#define SYSIO_STRBUFREG_BASE	0x2800UL
+#define STRBUF_CONTROL	(0x2800UL - 0x2800UL)	/* Control */
+#define STRBUF_PFLUSH	(0x2808UL - 0x2800UL)	/* Page flush/invalidate */
+#define STRBUF_FSYNC	(0x2810UL - 0x2800UL)	/* Flush synchronization */
+#define STRBUF_DRAMDIAG	(0x5000UL - 0x2800UL)	/* data RAM diagnostic */
+#define STRBUF_ERRDIAG	(0x5400UL - 0x2800UL)	/* error status diagnostics */
+#define STRBUF_PTAGDIAG	(0x5800UL - 0x2800UL)	/* Page tag diagnostics */
+#define STRBUF_LTAGDIAG	(0x5900UL - 0x2800UL)	/* Line tag diagnostics */
+
+#define STRBUF_TAG_VALID	0x02UL
+
+/* Enable 64-bit DVMA mode for the given device. */
+void sbus_set_sbus64(struct device *dev, int bursts)
+{
+	struct iommu *iommu = dev->archdata.iommu;
+	struct platform_device *op = to_platform_device(dev);
+	const struct linux_prom_registers *regs;
+	unsigned long cfg_reg;
+	int slot;
+	u64 val;
+
+	regs = of_get_property(op->dev.of_node, "reg", NULL);
+	if (!regs) {
+		printk(KERN_ERR "sbus_set_sbus64: Cannot find regs for %s\n",
+		       op->dev.of_node->full_name);
+		return;
+	}
+	slot = regs->which_io;
+
+	cfg_reg = iommu->write_complete_reg;
+	switch (slot) {
+	case 0:
+		cfg_reg += 0x20UL;
+		break;
+	case 1:
+		cfg_reg += 0x28UL;
+		break;
+	case 2:
+		cfg_reg += 0x30UL;
+		break;
+	case 3:
+		cfg_reg += 0x38UL;
+		break;
+	case 13:
+		cfg_reg += 0x40UL;
+		break;
+	case 14:
+		cfg_reg += 0x48UL;
+		break;
+	case 15:
+		cfg_reg += 0x50UL;
+		break;
+
+	default:
+		return;
+	}
+
+	val = upa_readq(cfg_reg);
+	if (val & (1UL << 14UL)) {
+		/* Extended transfer mode already enabled. */
+		return;
+	}
+
+	val |= (1UL << 14UL);
+
+	if (bursts & DMA_BURST8)
+		val |= (1UL << 1UL);
+	if (bursts & DMA_BURST16)
+		val |= (1UL << 2UL);
+	if (bursts & DMA_BURST32)
+		val |= (1UL << 3UL);
+	if (bursts & DMA_BURST64)
+		val |= (1UL << 4UL);
+	upa_writeq(val, cfg_reg);
+}
+EXPORT_SYMBOL(sbus_set_sbus64);
+
+/* INO number to IMAP register offset for SYSIO external IRQ's.
+ * This should conform to both Sunfire/Wildfire server and Fusion
+ * desktop designs.
+ */
+#define SYSIO_IMAP_SLOT0	0x2c00UL
+#define SYSIO_IMAP_SLOT1	0x2c08UL
+#define SYSIO_IMAP_SLOT2	0x2c10UL
+#define SYSIO_IMAP_SLOT3	0x2c18UL
+#define SYSIO_IMAP_SCSI		0x3000UL
+#define SYSIO_IMAP_ETH		0x3008UL
+#define SYSIO_IMAP_BPP		0x3010UL
+#define SYSIO_IMAP_AUDIO	0x3018UL
+#define SYSIO_IMAP_PFAIL	0x3020UL
+#define SYSIO_IMAP_KMS		0x3028UL
+#define SYSIO_IMAP_FLPY		0x3030UL
+#define SYSIO_IMAP_SHW		0x3038UL
+#define SYSIO_IMAP_KBD		0x3040UL
+#define SYSIO_IMAP_MS		0x3048UL
+#define SYSIO_IMAP_SER		0x3050UL
+#define SYSIO_IMAP_TIM0		0x3060UL
+#define SYSIO_IMAP_TIM1		0x3068UL
+#define SYSIO_IMAP_UE		0x3070UL
+#define SYSIO_IMAP_CE		0x3078UL
+#define SYSIO_IMAP_SBERR	0x3080UL
+#define SYSIO_IMAP_PMGMT	0x3088UL
+#define SYSIO_IMAP_GFX		0x3090UL
+#define SYSIO_IMAP_EUPA		0x3098UL
+
+#define bogon     ((unsigned long) -1)
+static unsigned long sysio_irq_offsets[] = {
+	/* SBUS Slot 0 --> 3, level 1 --> 7 */
+	SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0,
+	SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0, SYSIO_IMAP_SLOT0,
+	SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1,
+	SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1, SYSIO_IMAP_SLOT1,
+	SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2,
+	SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2, SYSIO_IMAP_SLOT2,
+	SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3,
+	SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3, SYSIO_IMAP_SLOT3,
+
+	/* Onboard devices (not relevant/used on SunFire). */
+	SYSIO_IMAP_SCSI,
+	SYSIO_IMAP_ETH,
+	SYSIO_IMAP_BPP,
+	bogon,
+	SYSIO_IMAP_AUDIO,
+	SYSIO_IMAP_PFAIL,
+	bogon,
+	bogon,
+	SYSIO_IMAP_KMS,
+	SYSIO_IMAP_FLPY,
+	SYSIO_IMAP_SHW,
+	SYSIO_IMAP_KBD,
+	SYSIO_IMAP_MS,
+	SYSIO_IMAP_SER,
+	bogon,
+	bogon,
+	SYSIO_IMAP_TIM0,
+	SYSIO_IMAP_TIM1,
+	bogon,
+	bogon,
+	SYSIO_IMAP_UE,
+	SYSIO_IMAP_CE,
+	SYSIO_IMAP_SBERR,
+	SYSIO_IMAP_PMGMT,
+};
+
+#undef bogon
+
+#define NUM_SYSIO_OFFSETS ARRAY_SIZE(sysio_irq_offsets)
+
+/* Convert Interrupt Mapping register pointer to associated
+ * Interrupt Clear register pointer, SYSIO specific version.
+ */
+#define SYSIO_ICLR_UNUSED0	0x3400UL
+#define SYSIO_ICLR_SLOT0	0x3408UL
+#define SYSIO_ICLR_SLOT1	0x3448UL
+#define SYSIO_ICLR_SLOT2	0x3488UL
+#define SYSIO_ICLR_SLOT3	0x34c8UL
+static unsigned long sysio_imap_to_iclr(unsigned long imap)
+{
+	unsigned long diff = SYSIO_ICLR_UNUSED0 - SYSIO_IMAP_SLOT0;
+	return imap + diff;
+}
+
+static unsigned int sbus_build_irq(struct platform_device *op, unsigned int ino)
+{
+	struct iommu *iommu = op->dev.archdata.iommu;
+	unsigned long reg_base = iommu->write_complete_reg - 0x2000UL;
+	unsigned long imap, iclr;
+	int sbus_level = 0;
+
+	imap = sysio_irq_offsets[ino];
+	if (imap == ((unsigned long)-1)) {
+		prom_printf("get_irq_translations: Bad SYSIO INO[%x]\n",
+			    ino);
+		prom_halt();
+	}
+	imap += reg_base;
+
+	/* SYSIO inconsistency.  For external SLOTS, we have to select
+	 * the right ICLR register based upon the lower SBUS irq level
+	 * bits.
+	 */
+	if (ino >= 0x20) {
+		iclr = sysio_imap_to_iclr(imap);
+	} else {
+		int sbus_slot = (ino & 0x18)>>3;
+		
+		sbus_level = ino & 0x7;
+
+		switch(sbus_slot) {
+		case 0:
+			iclr = reg_base + SYSIO_ICLR_SLOT0;
+			break;
+		case 1:
+			iclr = reg_base + SYSIO_ICLR_SLOT1;
+			break;
+		case 2:
+			iclr = reg_base + SYSIO_ICLR_SLOT2;
+			break;
+		default:
+		case 3:
+			iclr = reg_base + SYSIO_ICLR_SLOT3;
+			break;
+		}
+
+		iclr += ((unsigned long)sbus_level - 1UL) * 8UL;
+	}
+	return build_irq(sbus_level, iclr, imap);
+}
+
+/* Error interrupt handling. */
+#define SYSIO_UE_AFSR	0x0030UL
+#define SYSIO_UE_AFAR	0x0038UL
+#define  SYSIO_UEAFSR_PPIO  0x8000000000000000UL /* Primary PIO cause         */
+#define  SYSIO_UEAFSR_PDRD  0x4000000000000000UL /* Primary DVMA read cause   */
+#define  SYSIO_UEAFSR_PDWR  0x2000000000000000UL /* Primary DVMA write cause  */
+#define  SYSIO_UEAFSR_SPIO  0x1000000000000000UL /* Secondary PIO is cause    */
+#define  SYSIO_UEAFSR_SDRD  0x0800000000000000UL /* Secondary DVMA read cause */
+#define  SYSIO_UEAFSR_SDWR  0x0400000000000000UL /* Secondary DVMA write cause*/
+#define  SYSIO_UEAFSR_RESV1 0x03ff000000000000UL /* Reserved                  */
+#define  SYSIO_UEAFSR_DOFF  0x0000e00000000000UL /* Doubleword Offset         */
+#define  SYSIO_UEAFSR_SIZE  0x00001c0000000000UL /* Bad transfer size 2^SIZE  */
+#define  SYSIO_UEAFSR_MID   0x000003e000000000UL /* UPA MID causing the fault */
+#define  SYSIO_UEAFSR_RESV2 0x0000001fffffffffUL /* Reserved                  */
+static irqreturn_t sysio_ue_handler(int irq, void *dev_id)
+{
+	struct platform_device *op = dev_id;
+	struct iommu *iommu = op->dev.archdata.iommu;
+	unsigned long reg_base = iommu->write_complete_reg - 0x2000UL;
+	unsigned long afsr_reg, afar_reg;
+	unsigned long afsr, afar, error_bits;
+	int reported, portid;
+
+	afsr_reg = reg_base + SYSIO_UE_AFSR;
+	afar_reg = reg_base + SYSIO_UE_AFAR;
+
+	/* Latch error status. */
+	afsr = upa_readq(afsr_reg);
+	afar = upa_readq(afar_reg);
+
+	/* Clear primary/secondary error status bits. */
+	error_bits = afsr &
+		(SYSIO_UEAFSR_PPIO | SYSIO_UEAFSR_PDRD | SYSIO_UEAFSR_PDWR |
+		 SYSIO_UEAFSR_SPIO | SYSIO_UEAFSR_SDRD | SYSIO_UEAFSR_SDWR);
+	upa_writeq(error_bits, afsr_reg);
+
+	portid = of_getintprop_default(op->dev.of_node, "portid", -1);
+
+	/* Log the error. */
+	printk("SYSIO[%x]: Uncorrectable ECC Error, primary error type[%s]\n",
+	       portid,
+	       (((error_bits & SYSIO_UEAFSR_PPIO) ?
+		 "PIO" :
+		 ((error_bits & SYSIO_UEAFSR_PDRD) ?
+		  "DVMA Read" :
+		  ((error_bits & SYSIO_UEAFSR_PDWR) ?
+		   "DVMA Write" : "???")))));
+	printk("SYSIO[%x]: DOFF[%lx] SIZE[%lx] MID[%lx]\n",
+	       portid,
+	       (afsr & SYSIO_UEAFSR_DOFF) >> 45UL,
+	       (afsr & SYSIO_UEAFSR_SIZE) >> 42UL,
+	       (afsr & SYSIO_UEAFSR_MID) >> 37UL);
+	printk("SYSIO[%x]: AFAR[%016lx]\n", portid, afar);
+	printk("SYSIO[%x]: Secondary UE errors [", portid);
+	reported = 0;
+	if (afsr & SYSIO_UEAFSR_SPIO) {
+		reported++;
+		printk("(PIO)");
+	}
+	if (afsr & SYSIO_UEAFSR_SDRD) {
+		reported++;
+		printk("(DVMA Read)");
+	}
+	if (afsr & SYSIO_UEAFSR_SDWR) {
+		reported++;
+		printk("(DVMA Write)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	return IRQ_HANDLED;
+}
+
+#define SYSIO_CE_AFSR	0x0040UL
+#define SYSIO_CE_AFAR	0x0048UL
+#define  SYSIO_CEAFSR_PPIO  0x8000000000000000UL /* Primary PIO cause         */
+#define  SYSIO_CEAFSR_PDRD  0x4000000000000000UL /* Primary DVMA read cause   */
+#define  SYSIO_CEAFSR_PDWR  0x2000000000000000UL /* Primary DVMA write cause  */
+#define  SYSIO_CEAFSR_SPIO  0x1000000000000000UL /* Secondary PIO cause       */
+#define  SYSIO_CEAFSR_SDRD  0x0800000000000000UL /* Secondary DVMA read cause */
+#define  SYSIO_CEAFSR_SDWR  0x0400000000000000UL /* Secondary DVMA write cause*/
+#define  SYSIO_CEAFSR_RESV1 0x0300000000000000UL /* Reserved                  */
+#define  SYSIO_CEAFSR_ESYND 0x00ff000000000000UL /* Syndrome Bits             */
+#define  SYSIO_CEAFSR_DOFF  0x0000e00000000000UL /* Double Offset             */
+#define  SYSIO_CEAFSR_SIZE  0x00001c0000000000UL /* Bad transfer size 2^SIZE  */
+#define  SYSIO_CEAFSR_MID   0x000003e000000000UL /* UPA MID causing the fault */
+#define  SYSIO_CEAFSR_RESV2 0x0000001fffffffffUL /* Reserved                  */
+static irqreturn_t sysio_ce_handler(int irq, void *dev_id)
+{
+	struct platform_device *op = dev_id;
+	struct iommu *iommu = op->dev.archdata.iommu;
+	unsigned long reg_base = iommu->write_complete_reg - 0x2000UL;
+	unsigned long afsr_reg, afar_reg;
+	unsigned long afsr, afar, error_bits;
+	int reported, portid;
+
+	afsr_reg = reg_base + SYSIO_CE_AFSR;
+	afar_reg = reg_base + SYSIO_CE_AFAR;
+
+	/* Latch error status. */
+	afsr = upa_readq(afsr_reg);
+	afar = upa_readq(afar_reg);
+
+	/* Clear primary/secondary error status bits. */
+	error_bits = afsr &
+		(SYSIO_CEAFSR_PPIO | SYSIO_CEAFSR_PDRD | SYSIO_CEAFSR_PDWR |
+		 SYSIO_CEAFSR_SPIO | SYSIO_CEAFSR_SDRD | SYSIO_CEAFSR_SDWR);
+	upa_writeq(error_bits, afsr_reg);
+
+	portid = of_getintprop_default(op->dev.of_node, "portid", -1);
+
+	printk("SYSIO[%x]: Correctable ECC Error, primary error type[%s]\n",
+	       portid,
+	       (((error_bits & SYSIO_CEAFSR_PPIO) ?
+		 "PIO" :
+		 ((error_bits & SYSIO_CEAFSR_PDRD) ?
+		  "DVMA Read" :
+		  ((error_bits & SYSIO_CEAFSR_PDWR) ?
+		   "DVMA Write" : "???")))));
+
+	/* XXX Use syndrome and afar to print out module string just like
+	 * XXX UDB CE trap handler does... -DaveM
+	 */
+	printk("SYSIO[%x]: DOFF[%lx] ECC Syndrome[%lx] Size[%lx] MID[%lx]\n",
+	       portid,
+	       (afsr & SYSIO_CEAFSR_DOFF) >> 45UL,
+	       (afsr & SYSIO_CEAFSR_ESYND) >> 48UL,
+	       (afsr & SYSIO_CEAFSR_SIZE) >> 42UL,
+	       (afsr & SYSIO_CEAFSR_MID) >> 37UL);
+	printk("SYSIO[%x]: AFAR[%016lx]\n", portid, afar);
+
+	printk("SYSIO[%x]: Secondary CE errors [", portid);
+	reported = 0;
+	if (afsr & SYSIO_CEAFSR_SPIO) {
+		reported++;
+		printk("(PIO)");
+	}
+	if (afsr & SYSIO_CEAFSR_SDRD) {
+		reported++;
+		printk("(DVMA Read)");
+	}
+	if (afsr & SYSIO_CEAFSR_SDWR) {
+		reported++;
+		printk("(DVMA Write)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	return IRQ_HANDLED;
+}
+
+#define SYSIO_SBUS_AFSR		0x2010UL
+#define SYSIO_SBUS_AFAR		0x2018UL
+#define  SYSIO_SBAFSR_PLE   0x8000000000000000UL /* Primary Late PIO Error    */
+#define  SYSIO_SBAFSR_PTO   0x4000000000000000UL /* Primary SBUS Timeout      */
+#define  SYSIO_SBAFSR_PBERR 0x2000000000000000UL /* Primary SBUS Error ACK    */
+#define  SYSIO_SBAFSR_SLE   0x1000000000000000UL /* Secondary Late PIO Error  */
+#define  SYSIO_SBAFSR_STO   0x0800000000000000UL /* Secondary SBUS Timeout    */
+#define  SYSIO_SBAFSR_SBERR 0x0400000000000000UL /* Secondary SBUS Error ACK  */
+#define  SYSIO_SBAFSR_RESV1 0x03ff000000000000UL /* Reserved                  */
+#define  SYSIO_SBAFSR_RD    0x0000800000000000UL /* Primary was late PIO read */
+#define  SYSIO_SBAFSR_RESV2 0x0000600000000000UL /* Reserved                  */
+#define  SYSIO_SBAFSR_SIZE  0x00001c0000000000UL /* Size of transfer          */
+#define  SYSIO_SBAFSR_MID   0x000003e000000000UL /* MID causing the error     */
+#define  SYSIO_SBAFSR_RESV3 0x0000001fffffffffUL /* Reserved                  */
+static irqreturn_t sysio_sbus_error_handler(int irq, void *dev_id)
+{
+	struct platform_device *op = dev_id;
+	struct iommu *iommu = op->dev.archdata.iommu;
+	unsigned long afsr_reg, afar_reg, reg_base;
+	unsigned long afsr, afar, error_bits;
+	int reported, portid;
+
+	reg_base = iommu->write_complete_reg - 0x2000UL;
+	afsr_reg = reg_base + SYSIO_SBUS_AFSR;
+	afar_reg = reg_base + SYSIO_SBUS_AFAR;
+
+	afsr = upa_readq(afsr_reg);
+	afar = upa_readq(afar_reg);
+
+	/* Clear primary/secondary error status bits. */
+	error_bits = afsr &
+		(SYSIO_SBAFSR_PLE | SYSIO_SBAFSR_PTO | SYSIO_SBAFSR_PBERR |
+		 SYSIO_SBAFSR_SLE | SYSIO_SBAFSR_STO | SYSIO_SBAFSR_SBERR);
+	upa_writeq(error_bits, afsr_reg);
+
+	portid = of_getintprop_default(op->dev.of_node, "portid", -1);
+
+	/* Log the error. */
+	printk("SYSIO[%x]: SBUS Error, primary error type[%s] read(%d)\n",
+	       portid,
+	       (((error_bits & SYSIO_SBAFSR_PLE) ?
+		 "Late PIO Error" :
+		 ((error_bits & SYSIO_SBAFSR_PTO) ?
+		  "Time Out" :
+		  ((error_bits & SYSIO_SBAFSR_PBERR) ?
+		   "Error Ack" : "???")))),
+	       (afsr & SYSIO_SBAFSR_RD) ? 1 : 0);
+	printk("SYSIO[%x]: size[%lx] MID[%lx]\n",
+	       portid,
+	       (afsr & SYSIO_SBAFSR_SIZE) >> 42UL,
+	       (afsr & SYSIO_SBAFSR_MID) >> 37UL);
+	printk("SYSIO[%x]: AFAR[%016lx]\n", portid, afar);
+	printk("SYSIO[%x]: Secondary SBUS errors [", portid);
+	reported = 0;
+	if (afsr & SYSIO_SBAFSR_SLE) {
+		reported++;
+		printk("(Late PIO Error)");
+	}
+	if (afsr & SYSIO_SBAFSR_STO) {
+		reported++;
+		printk("(Time Out)");
+	}
+	if (afsr & SYSIO_SBAFSR_SBERR) {
+		reported++;
+		printk("(Error Ack)");
+	}
+	if (!reported)
+		printk("(none)");
+	printk("]\n");
+
+	/* XXX check iommu/strbuf for further error status XXX */
+
+	return IRQ_HANDLED;
+}
+
+#define ECC_CONTROL	0x0020UL
+#define  SYSIO_ECNTRL_ECCEN	0x8000000000000000UL /* Enable ECC Checking   */
+#define  SYSIO_ECNTRL_UEEN	0x4000000000000000UL /* Enable UE Interrupts  */
+#define  SYSIO_ECNTRL_CEEN	0x2000000000000000UL /* Enable CE Interrupts  */
+
+#define SYSIO_UE_INO		0x34
+#define SYSIO_CE_INO		0x35
+#define SYSIO_SBUSERR_INO	0x36
+
+static void __init sysio_register_error_handlers(struct platform_device *op)
+{
+	struct iommu *iommu = op->dev.archdata.iommu;
+	unsigned long reg_base = iommu->write_complete_reg - 0x2000UL;
+	unsigned int irq;
+	u64 control;
+	int portid;
+
+	portid = of_getintprop_default(op->dev.of_node, "portid", -1);
+
+	irq = sbus_build_irq(op, SYSIO_UE_INO);
+	if (request_irq(irq, sysio_ue_handler, 0,
+			"SYSIO_UE", op) < 0) {
+		prom_printf("SYSIO[%x]: Cannot register UE interrupt.\n",
+			    portid);
+		prom_halt();
+	}
+
+	irq = sbus_build_irq(op, SYSIO_CE_INO);
+	if (request_irq(irq, sysio_ce_handler, 0,
+			"SYSIO_CE", op) < 0) {
+		prom_printf("SYSIO[%x]: Cannot register CE interrupt.\n",
+			    portid);
+		prom_halt();
+	}
+
+	irq = sbus_build_irq(op, SYSIO_SBUSERR_INO);
+	if (request_irq(irq, sysio_sbus_error_handler, 0,
+			"SYSIO_SBERR", op) < 0) {
+		prom_printf("SYSIO[%x]: Cannot register SBUS Error interrupt.\n",
+			    portid);
+		prom_halt();
+	}
+
+	/* Now turn the error interrupts on and also enable ECC checking. */
+	upa_writeq((SYSIO_ECNTRL_ECCEN |
+		    SYSIO_ECNTRL_UEEN  |
+		    SYSIO_ECNTRL_CEEN),
+		   reg_base + ECC_CONTROL);
+
+	control = upa_readq(iommu->write_complete_reg);
+	control |= 0x100UL; /* SBUS Error Interrupt Enable */
+	upa_writeq(control, iommu->write_complete_reg);
+}
+
+/* Boot time initialization. */
+static void __init sbus_iommu_init(struct platform_device *op)
+{
+	const struct linux_prom64_registers *pr;
+	struct device_node *dp = op->dev.of_node;
+	struct iommu *iommu;
+	struct strbuf *strbuf;
+	unsigned long regs, reg_base;
+	int i, portid;
+	u64 control;
+
+	pr = of_get_property(dp, "reg", NULL);
+	if (!pr) {
+		prom_printf("sbus_iommu_init: Cannot map SYSIO "
+			    "control registers.\n");
+		prom_halt();
+	}
+	regs = pr->phys_addr;
+
+	iommu = kzalloc(sizeof(*iommu), GFP_ATOMIC);
+	strbuf = kzalloc(sizeof(*strbuf), GFP_ATOMIC);
+	if (!iommu || !strbuf)
+		goto fatal_memory_error;
+
+	op->dev.archdata.iommu = iommu;
+	op->dev.archdata.stc = strbuf;
+	op->dev.archdata.numa_node = -1;
+
+	reg_base = regs + SYSIO_IOMMUREG_BASE;
+	iommu->iommu_control = reg_base + IOMMU_CONTROL;
+	iommu->iommu_tsbbase = reg_base + IOMMU_TSBBASE;
+	iommu->iommu_flush = reg_base + IOMMU_FLUSH;
+	iommu->iommu_tags = iommu->iommu_control +
+		(IOMMU_TAGDIAG - IOMMU_CONTROL);
+
+	reg_base = regs + SYSIO_STRBUFREG_BASE;
+	strbuf->strbuf_control = reg_base + STRBUF_CONTROL;
+	strbuf->strbuf_pflush = reg_base + STRBUF_PFLUSH;
+	strbuf->strbuf_fsync = reg_base + STRBUF_FSYNC;
+
+	strbuf->strbuf_enabled = 1;
+
+	strbuf->strbuf_flushflag = (volatile unsigned long *)
+		((((unsigned long)&strbuf->__flushflag_buf[0])
+		  + 63UL)
+		 & ~63UL);
+	strbuf->strbuf_flushflag_pa = (unsigned long)
+		__pa(strbuf->strbuf_flushflag);
+
+	/* The SYSIO SBUS control register is used for dummy reads
+	 * in order to ensure write completion.
+	 */
+	iommu->write_complete_reg = regs + 0x2000UL;
+
+	portid = of_getintprop_default(op->dev.of_node, "portid", -1);
+	printk(KERN_INFO "SYSIO: UPA portID %x, at %016lx\n",
+	       portid, regs);
+
+	/* Setup for TSB_SIZE=7, TBW_SIZE=0, MMU_DE=1, MMU_EN=1 */
+	if (iommu_table_init(iommu, IO_TSB_SIZE, MAP_BASE, 0xffffffff, -1))
+		goto fatal_memory_error;
+
+	control = upa_readq(iommu->iommu_control);
+	control = ((7UL << 16UL)	|
+		   (0UL << 2UL)		|
+		   (1UL << 1UL)		|
+		   (1UL << 0UL));
+	upa_writeq(control, iommu->iommu_control);
+
+	/* Clean out any cruft in the IOMMU using
+	 * diagnostic accesses.
+	 */
+	for (i = 0; i < 16; i++) {
+		unsigned long dram, tag;
+
+		dram = iommu->iommu_control + (IOMMU_DRAMDIAG - IOMMU_CONTROL);
+		tag = iommu->iommu_control + (IOMMU_TAGDIAG - IOMMU_CONTROL);
+
+		dram += (unsigned long)i * 8UL;
+		tag += (unsigned long)i * 8UL;
+		upa_writeq(0, dram);
+		upa_writeq(0, tag);
+	}
+	upa_readq(iommu->write_complete_reg);
+
+	/* Give the TSB to SYSIO. */
+	upa_writeq(__pa(iommu->page_table), iommu->iommu_tsbbase);
+
+	/* Setup streaming buffer, DE=1 SB_EN=1 */
+	control = (1UL << 1UL) | (1UL << 0UL);
+	upa_writeq(control, strbuf->strbuf_control);
+
+	/* Clear out the tags using diagnostics. */
+	for (i = 0; i < 16; i++) {
+		unsigned long ptag, ltag;
+
+		ptag = strbuf->strbuf_control +
+			(STRBUF_PTAGDIAG - STRBUF_CONTROL);
+		ltag = strbuf->strbuf_control +
+			(STRBUF_LTAGDIAG - STRBUF_CONTROL);
+		ptag += (unsigned long)i * 8UL;
+		ltag += (unsigned long)i * 8UL;
+
+		upa_writeq(0UL, ptag);
+		upa_writeq(0UL, ltag);
+	}
+
+	/* Enable DVMA arbitration for all devices/slots. */
+	control = upa_readq(iommu->write_complete_reg);
+	control |= 0x3fUL;
+	upa_writeq(control, iommu->write_complete_reg);
+
+	/* Now some Xfire specific grot... */
+	if (this_is_starfire)
+		starfire_hookup(portid);
+
+	sysio_register_error_handlers(op);
+	return;
+
+fatal_memory_error:
+	kfree(iommu);
+	kfree(strbuf);
+	prom_printf("sbus_iommu_init: Fatal memory allocation error.\n");
+}
+
+static int __init sbus_init(void)
+{
+	struct device_node *dp;
+
+	for_each_node_by_name(dp, "sbus") {
+		struct platform_device *op = of_find_device_by_node(dp);
+
+		sbus_iommu_init(op);
+		of_propagate_archdata(op);
+	}
+
+	return 0;
+}
+
+subsys_initcall(sbus_init);
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
new file mode 100644
index 0000000..13664c3
--- /dev/null
+++ b/arch/sparc/kernel/setup_32.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/sparc/kernel/setup.c
+ *
+ *  Copyright (C) 1995  David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 2000  Anton Blanchard (anton@samba.org)
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/initrd.h>
+#include <asm/smp.h>
+#include <linux/user.h>
+#include <linux/screen_info.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/syscalls.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/console.h>
+#include <linux/spinlock.h>
+#include <linux/root_dev.h>
+#include <linux/cpu.h>
+#include <linux/kdebug.h>
+#include <linux/export.h>
+#include <linux/start_kernel.h>
+
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/oplib.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/traps.h>
+#include <asm/vaddrs.h>
+#include <asm/mbus.h>
+#include <asm/idprom.h>
+#include <asm/cpudata.h>
+#include <asm/setup.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+
+#include "kernel.h"
+
+struct screen_info screen_info = {
+	0, 0,			/* orig-x, orig-y */
+	0,			/* unused */
+	0,			/* orig-video-page */
+	0,			/* orig-video-mode */
+	128,			/* orig-video-cols */
+	0,0,0,			/* ega_ax, ega_bx, ega_cx */
+	54,			/* orig-video-lines */
+	0,                      /* orig-video-isVGA */
+	16                      /* orig-video-points */
+};
+
+/* Typing sync at the prom prompt calls the function pointed to by
+ * romvec->pv_synchook which I set to the following function.
+ * This should sync all filesystems and return, for now it just
+ * prints out pretty messages and returns.
+ */
+
+/* Pretty sick eh? */
+static void prom_sync_me(void)
+{
+	unsigned long prom_tbr, flags;
+
+	/* XXX Badly broken. FIX! - Anton */
+	local_irq_save(flags);
+	__asm__ __volatile__("rd %%tbr, %0\n\t" : "=r" (prom_tbr));
+	__asm__ __volatile__("wr %0, 0x0, %%tbr\n\t"
+			     "nop\n\t"
+			     "nop\n\t"
+			     "nop\n\t" : : "r" (&trapbase));
+
+	prom_printf("PROM SYNC COMMAND...\n");
+	show_free_areas(0, NULL);
+	if (!is_idle_task(current)) {
+		local_irq_enable();
+		ksys_sync();
+		local_irq_disable();
+	}
+	prom_printf("Returning to prom\n");
+
+	__asm__ __volatile__("wr %0, 0x0, %%tbr\n\t"
+			     "nop\n\t"
+			     "nop\n\t"
+			     "nop\n\t" : : "r" (prom_tbr));
+	local_irq_restore(flags);
+}
+
+static unsigned int boot_flags __initdata = 0;
+#define BOOTME_DEBUG  0x1
+
+/* Exported for mm/init.c:paging_init. */
+unsigned long cmdline_memory_size __initdata = 0;
+
+/* which CPU booted us (0xff = not set) */
+unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */
+
+static void
+prom_console_write(struct console *con, const char *s, unsigned int n)
+{
+	prom_write(s, n);
+}
+
+static struct console prom_early_console = {
+	.name =		"earlyprom",
+	.write =	prom_console_write,
+	.flags =	CON_PRINTBUFFER | CON_BOOT,
+	.index =	-1,
+};
+
+/* 
+ * Process kernel command line switches that are specific to the
+ * SPARC or that require special low-level processing.
+ */
+static void __init process_switch(char c)
+{
+	switch (c) {
+	case 'd':
+		boot_flags |= BOOTME_DEBUG;
+		break;
+	case 's':
+		break;
+	case 'h':
+		prom_printf("boot_flags_init: Halt!\n");
+		prom_halt();
+		break;
+	case 'p':
+		prom_early_console.flags &= ~CON_BOOT;
+		break;
+	default:
+		printk("Unknown boot switch (-%c)\n", c);
+		break;
+	}
+}
+
+static void __init boot_flags_init(char *commands)
+{
+	while (*commands) {
+		/* Move to the start of the next "argument". */
+		while (*commands == ' ')
+			commands++;
+
+		/* Process any command switches, otherwise skip it. */
+		if (*commands == '\0')
+			break;
+		if (*commands == '-') {
+			commands++;
+			while (*commands && *commands != ' ')
+				process_switch(*commands++);
+			continue;
+		}
+		if (!strncmp(commands, "mem=", 4)) {
+			/*
+			 * "mem=XXX[kKmM] overrides the PROM-reported
+			 * memory size.
+			 */
+			cmdline_memory_size = simple_strtoul(commands + 4,
+						     &commands, 0);
+			if (*commands == 'K' || *commands == 'k') {
+				cmdline_memory_size <<= 10;
+				commands++;
+			} else if (*commands=='M' || *commands=='m') {
+				cmdline_memory_size <<= 20;
+				commands++;
+			}
+		}
+		while (*commands && *commands != ' ')
+			commands++;
+	}
+}
+
+extern unsigned short root_flags;
+extern unsigned short root_dev;
+extern unsigned short ram_flags;
+#define RAMDISK_IMAGE_START_MASK	0x07FF
+#define RAMDISK_PROMPT_FLAG		0x8000
+#define RAMDISK_LOAD_FLAG		0x4000
+
+extern int root_mountflags;
+
+char reboot_command[COMMAND_LINE_SIZE];
+
+struct cpuid_patch_entry {
+	unsigned int	addr;
+	unsigned int	sun4d[3];
+	unsigned int	leon[3];
+};
+extern struct cpuid_patch_entry __cpuid_patch, __cpuid_patch_end;
+
+static void __init per_cpu_patch(void)
+{
+	struct cpuid_patch_entry *p;
+
+	if (sparc_cpu_model == sun4m) {
+		/* Nothing to do, this is what the unpatched code
+		 * targets.
+		 */
+		return;
+	}
+
+	p = &__cpuid_patch;
+	while (p < &__cpuid_patch_end) {
+		unsigned long addr = p->addr;
+		unsigned int *insns;
+
+		switch (sparc_cpu_model) {
+		case sun4d:
+			insns = &p->sun4d[0];
+			break;
+
+		case sparc_leon:
+			insns = &p->leon[0];
+			break;
+		default:
+			prom_printf("Unknown cpu type, halting.\n");
+			prom_halt();
+		}
+		*(unsigned int *) (addr + 0) = insns[0];
+		flushi(addr + 0);
+		*(unsigned int *) (addr + 4) = insns[1];
+		flushi(addr + 4);
+		*(unsigned int *) (addr + 8) = insns[2];
+		flushi(addr + 8);
+
+		p++;
+	}
+}
+
+struct leon_1insn_patch_entry {
+	unsigned int addr;
+	unsigned int insn;
+};
+
+enum sparc_cpu sparc_cpu_model;
+EXPORT_SYMBOL(sparc_cpu_model);
+
+static __init void leon_patch(void)
+{
+	struct leon_1insn_patch_entry *start = (void *)__leon_1insn_patch;
+	struct leon_1insn_patch_entry *end = (void *)__leon_1insn_patch_end;
+
+	/* Default instruction is leon - no patching */
+	if (sparc_cpu_model == sparc_leon)
+		return;
+
+	while (start < end) {
+		unsigned long addr = start->addr;
+
+		*(unsigned int *)(addr) = start->insn;
+		flushi(addr);
+
+		start++;
+	}
+}
+
+struct tt_entry *sparc_ttable;
+static struct pt_regs fake_swapper_regs;
+
+/* Called from head_32.S - before we have setup anything
+ * in the kernel. Be very careful with what you do here.
+ */
+void __init sparc32_start_kernel(struct linux_romvec *rp)
+{
+	prom_init(rp);
+
+	/* Set sparc_cpu_model */
+	sparc_cpu_model = sun_unknown;
+	if (!strcmp(&cputypval[0], "sun4m"))
+		sparc_cpu_model = sun4m;
+	if (!strcmp(&cputypval[0], "sun4s"))
+		sparc_cpu_model = sun4m; /* CP-1200 with PROM 2.30 -E */
+	if (!strcmp(&cputypval[0], "sun4d"))
+		sparc_cpu_model = sun4d;
+	if (!strcmp(&cputypval[0], "sun4e"))
+		sparc_cpu_model = sun4e;
+	if (!strcmp(&cputypval[0], "sun4u"))
+		sparc_cpu_model = sun4u;
+	if (!strncmp(&cputypval[0], "leon" , 4))
+		sparc_cpu_model = sparc_leon;
+
+	leon_patch();
+	start_kernel();
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	int i;
+	unsigned long highest_paddr;
+
+	sparc_ttable = &trapbase;
+
+	/* Initialize PROM console and command line. */
+	*cmdline_p = prom_getbootargs();
+	strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+	parse_early_param();
+
+	boot_flags_init(*cmdline_p);
+
+	register_console(&prom_early_console);
+
+	printk("ARCH: ");
+	switch(sparc_cpu_model) {
+	case sun4m:
+		printk("SUN4M\n");
+		break;
+	case sun4d:
+		printk("SUN4D\n");
+		break;
+	case sun4e:
+		printk("SUN4E\n");
+		break;
+	case sun4u:
+		printk("SUN4U\n");
+		break;
+	case sparc_leon:
+		printk("LEON\n");
+		break;
+	default:
+		printk("UNKNOWN!\n");
+		break;
+	}
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	idprom_init();
+	load_mmu();
+
+	phys_base = 0xffffffffUL;
+	highest_paddr = 0UL;
+	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+		unsigned long top;
+
+		if (sp_banks[i].base_addr < phys_base)
+			phys_base = sp_banks[i].base_addr;
+		top = sp_banks[i].base_addr +
+			sp_banks[i].num_bytes;
+		if (highest_paddr < top)
+			highest_paddr = top;
+	}
+	pfn_base = phys_base >> PAGE_SHIFT;
+
+	if (!root_flags)
+		root_mountflags &= ~MS_RDONLY;
+	ROOT_DEV = old_decode_dev(root_dev);
+#ifdef CONFIG_BLK_DEV_RAM
+	rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
+	rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
+	rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);	
+#endif
+
+	prom_setsync(prom_sync_me);
+
+	if((boot_flags & BOOTME_DEBUG) && (linux_dbvec != NULL) &&
+	   ((*(short *)linux_dbvec) != -1)) {
+		printk("Booted under KADB. Syncing trap table.\n");
+		(*(linux_dbvec->teach_debugger))();
+	}
+
+	init_task.thread.kregs = &fake_swapper_regs;
+
+	/* Run-time patch instructions to match the cpu model */
+	per_cpu_patch();
+
+	paging_init();
+
+	smp_setup_cpu_possible_map();
+}
+
+extern int stop_a_enabled;
+
+void sun_do_break(void)
+{
+	if (!stop_a_enabled)
+		return;
+
+	printk("\n");
+	flush_user_windows();
+
+	prom_cmdline();
+}
+EXPORT_SYMBOL(sun_do_break);
+
+int stop_a_enabled = 1;
+
+static int __init topology_init(void)
+{
+	int i, ncpus, err;
+
+	/* Count the number of physically present processors in
+	 * the machine, even on uniprocessor, so that /proc/cpuinfo
+	 * output is consistent with 2.4.x
+	 */
+	ncpus = 0;
+	while (!cpu_find_by_instance(ncpus, NULL, NULL))
+		ncpus++;
+	ncpus_probed = ncpus;
+
+	err = 0;
+	for_each_online_cpu(i) {
+		struct cpu *p = kzalloc(sizeof(*p), GFP_KERNEL);
+		if (!p)
+			err = -ENOMEM;
+		else
+			register_cpu(p, i);
+	}
+
+	return err;
+}
+
+subsys_initcall(topology_init);
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
new file mode 100644
index 0000000..7944b3c
--- /dev/null
+++ b/arch/sparc/kernel/setup_64.c
@@ -0,0 +1,717 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/sparc64/kernel/setup.c
+ *
+ *  Copyright (C) 1995,1996  David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1997       Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <asm/smp.h>
+#include <linux/user.h>
+#include <linux/screen_info.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/syscalls.h>
+#include <linux/kdev_t.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/inet.h>
+#include <linux/console.h>
+#include <linux/root_dev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/initrd.h>
+#include <linux/module.h>
+#include <linux/start_kernel.h>
+#include <linux/bootmem.h>
+
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/oplib.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/idprom.h>
+#include <asm/head.h>
+#include <asm/starfire.h>
+#include <asm/mmu_context.h>
+#include <asm/timer.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/mmu.h>
+#include <asm/ns87303.h>
+#include <asm/btext.h>
+#include <asm/elf.h>
+#include <asm/mdesc.h>
+#include <asm/cacheflush.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
+
+#ifdef CONFIG_IP_PNP
+#include <net/ipconfig.h>
+#endif
+
+#include "entry.h"
+#include "kernel.h"
+
+/* Used to synchronize accesses to NatSemi SUPER I/O chip configure
+ * operations in asm/ns87303.h
+ */
+DEFINE_SPINLOCK(ns87303_lock);
+EXPORT_SYMBOL(ns87303_lock);
+
+struct screen_info screen_info = {
+	0, 0,			/* orig-x, orig-y */
+	0,			/* unused */
+	0,			/* orig-video-page */
+	0,			/* orig-video-mode */
+	128,			/* orig-video-cols */
+	0, 0, 0,		/* unused, ega_bx, unused */
+	54,			/* orig-video-lines */
+	0,                      /* orig-video-isVGA */
+	16                      /* orig-video-points */
+};
+
+static void
+prom_console_write(struct console *con, const char *s, unsigned int n)
+{
+	prom_write(s, n);
+}
+
+/* Exported for mm/init.c:paging_init. */
+unsigned long cmdline_memory_size = 0;
+
+static struct console prom_early_console = {
+	.name =		"earlyprom",
+	.write =	prom_console_write,
+	.flags =	CON_PRINTBUFFER | CON_BOOT | CON_ANYTIME,
+	.index =	-1,
+};
+
+/*
+ * Process kernel command line switches that are specific to the
+ * SPARC or that require special low-level processing.
+ */
+static void __init process_switch(char c)
+{
+	switch (c) {
+	case 'd':
+	case 's':
+		break;
+	case 'h':
+		prom_printf("boot_flags_init: Halt!\n");
+		prom_halt();
+		break;
+	case 'p':
+		prom_early_console.flags &= ~CON_BOOT;
+		break;
+	case 'P':
+		/* Force UltraSPARC-III P-Cache on. */
+		if (tlb_type != cheetah) {
+			printk("BOOT: Ignoring P-Cache force option.\n");
+			break;
+		}
+		cheetah_pcache_forced_on = 1;
+		add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
+		cheetah_enable_pcache();
+		break;
+
+	default:
+		printk("Unknown boot switch (-%c)\n", c);
+		break;
+	}
+}
+
+static void __init boot_flags_init(char *commands)
+{
+	while (*commands) {
+		/* Move to the start of the next "argument". */
+		while (*commands == ' ')
+			commands++;
+
+		/* Process any command switches, otherwise skip it. */
+		if (*commands == '\0')
+			break;
+		if (*commands == '-') {
+			commands++;
+			while (*commands && *commands != ' ')
+				process_switch(*commands++);
+			continue;
+		}
+		if (!strncmp(commands, "mem=", 4))
+			cmdline_memory_size = memparse(commands + 4, &commands);
+
+		while (*commands && *commands != ' ')
+			commands++;
+	}
+}
+
+extern unsigned short root_flags;
+extern unsigned short root_dev;
+extern unsigned short ram_flags;
+#define RAMDISK_IMAGE_START_MASK	0x07FF
+#define RAMDISK_PROMPT_FLAG		0x8000
+#define RAMDISK_LOAD_FLAG		0x4000
+
+extern int root_mountflags;
+
+char reboot_command[COMMAND_LINE_SIZE];
+
+static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
+
+static void __init per_cpu_patch(void)
+{
+	struct cpuid_patch_entry *p;
+	unsigned long ver;
+	int is_jbus;
+
+	if (tlb_type == spitfire && !this_is_starfire)
+		return;
+
+	is_jbus = 0;
+	if (tlb_type != hypervisor) {
+		__asm__ ("rdpr %%ver, %0" : "=r" (ver));
+		is_jbus = ((ver >> 32UL) == __JALAPENO_ID ||
+			   (ver >> 32UL) == __SERRANO_ID);
+	}
+
+	p = &__cpuid_patch;
+	while (p < &__cpuid_patch_end) {
+		unsigned long addr = p->addr;
+		unsigned int *insns;
+
+		switch (tlb_type) {
+		case spitfire:
+			insns = &p->starfire[0];
+			break;
+		case cheetah:
+		case cheetah_plus:
+			if (is_jbus)
+				insns = &p->cheetah_jbus[0];
+			else
+				insns = &p->cheetah_safari[0];
+			break;
+		case hypervisor:
+			insns = &p->sun4v[0];
+			break;
+		default:
+			prom_printf("Unknown cpu type, halting.\n");
+			prom_halt();
+		}
+
+		*(unsigned int *) (addr +  0) = insns[0];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
+
+		*(unsigned int *) (addr +  4) = insns[1];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  4));
+
+		*(unsigned int *) (addr +  8) = insns[2];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  8));
+
+		*(unsigned int *) (addr + 12) = insns[3];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr + 12));
+
+		p++;
+	}
+}
+
+void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *start,
+			     struct sun4v_1insn_patch_entry *end)
+{
+	while (start < end) {
+		unsigned long addr = start->addr;
+
+		*(unsigned int *) (addr +  0) = start->insn;
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
+
+		start++;
+	}
+}
+
+void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+			     struct sun4v_2insn_patch_entry *end)
+{
+	while (start < end) {
+		unsigned long addr = start->addr;
+
+		*(unsigned int *) (addr +  0) = start->insns[0];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
+
+		*(unsigned int *) (addr +  4) = start->insns[1];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  4));
+
+		start++;
+	}
+}
+
+void sun_m7_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
+			     struct sun4v_2insn_patch_entry *end)
+{
+	while (start < end) {
+		unsigned long addr = start->addr;
+
+		*(unsigned int *) (addr +  0) = start->insns[0];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  0));
+
+		*(unsigned int *) (addr +  4) = start->insns[1];
+		wmb();
+		__asm__ __volatile__("flush	%0" : : "r" (addr +  4));
+
+		start++;
+	}
+}
+
+static void __init sun4v_patch(void)
+{
+	extern void sun4v_hvapi_init(void);
+
+	if (tlb_type != hypervisor)
+		return;
+
+	sun4v_patch_1insn_range(&__sun4v_1insn_patch,
+				&__sun4v_1insn_patch_end);
+
+	sun4v_patch_2insn_range(&__sun4v_2insn_patch,
+				&__sun4v_2insn_patch_end);
+
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
+	case SUN4V_CHIP_SPARC_SN:
+		sun4v_patch_1insn_range(&__sun_m7_1insn_patch,
+					&__sun_m7_1insn_patch_end);
+		sun_m7_patch_2insn_range(&__sun_m7_2insn_patch,
+					 &__sun_m7_2insn_patch_end);
+		break;
+	default:
+		break;
+	}
+
+	if (sun4v_chip_type != SUN4V_CHIP_NIAGARA1) {
+		sun4v_patch_1insn_range(&__fast_win_ctrl_1insn_patch,
+					&__fast_win_ctrl_1insn_patch_end);
+	}
+
+	sun4v_hvapi_init();
+}
+
+static void __init popc_patch(void)
+{
+	struct popc_3insn_patch_entry *p3;
+	struct popc_6insn_patch_entry *p6;
+
+	p3 = &__popc_3insn_patch;
+	while (p3 < &__popc_3insn_patch_end) {
+		unsigned long i, addr = p3->addr;
+
+		for (i = 0; i < 3; i++) {
+			*(unsigned int *) (addr +  (i * 4)) = p3->insns[i];
+			wmb();
+			__asm__ __volatile__("flush	%0"
+					     : : "r" (addr +  (i * 4)));
+		}
+
+		p3++;
+	}
+
+	p6 = &__popc_6insn_patch;
+	while (p6 < &__popc_6insn_patch_end) {
+		unsigned long i, addr = p6->addr;
+
+		for (i = 0; i < 6; i++) {
+			*(unsigned int *) (addr +  (i * 4)) = p6->insns[i];
+			wmb();
+			__asm__ __volatile__("flush	%0"
+					     : : "r" (addr +  (i * 4)));
+		}
+
+		p6++;
+	}
+}
+
+static void __init pause_patch(void)
+{
+	struct pause_patch_entry *p;
+
+	p = &__pause_3insn_patch;
+	while (p < &__pause_3insn_patch_end) {
+		unsigned long i, addr = p->addr;
+
+		for (i = 0; i < 3; i++) {
+			*(unsigned int *) (addr +  (i * 4)) = p->insns[i];
+			wmb();
+			__asm__ __volatile__("flush	%0"
+					     : : "r" (addr +  (i * 4)));
+		}
+
+		p++;
+	}
+}
+
+void __init start_early_boot(void)
+{
+	int cpu;
+
+	check_if_starfire();
+	per_cpu_patch();
+	sun4v_patch();
+	smp_init_cpu_poke();
+
+	cpu = hard_smp_processor_id();
+	if (cpu >= NR_CPUS) {
+		prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
+			    cpu, NR_CPUS);
+		prom_halt();
+	}
+	current_thread_info()->cpu = cpu;
+
+	time_init_early();
+	prom_init_report();
+	start_kernel();
+}
+
+/* On Ultra, we support all of the v8 capabilities. */
+unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
+				   HWCAP_SPARC_SWAP | HWCAP_SPARC_MULDIV |
+				   HWCAP_SPARC_V9);
+EXPORT_SYMBOL(sparc64_elf_hwcap);
+
+static const char *hwcaps[] = {
+	"flush", "stbar", "swap", "muldiv", "v9",
+	"ultra3", "blkinit", "n2",
+
+	/* These strings are as they appear in the machine description
+	 * 'hwcap-list' property for cpu nodes.
+	 */
+	"mul32", "div32", "fsmuld", "v8plus", "popc", "vis", "vis2",
+	"ASIBlkInit", "fmaf", "vis3", "hpc", "random", "trans", "fjfmau",
+	"ima", "cspare", "pause", "cbcond", NULL /*reserved for crypto */,
+	"adp",
+};
+
+static const char *crypto_hwcaps[] = {
+	"aes", "des", "kasumi", "camellia", "md5", "sha1", "sha256",
+	"sha512", "mpmul", "montmul", "montsqr", "crc32c",
+};
+
+void cpucap_info(struct seq_file *m)
+{
+	unsigned long caps = sparc64_elf_hwcap;
+	int i, printed = 0;
+
+	seq_puts(m, "cpucaps\t\t: ");
+	for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+		unsigned long bit = 1UL << i;
+		if (hwcaps[i] && (caps & bit)) {
+			seq_printf(m, "%s%s",
+				   printed ? "," : "", hwcaps[i]);
+			printed++;
+		}
+	}
+	if (caps & HWCAP_SPARC_CRYPTO) {
+		unsigned long cfr;
+
+		__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+		for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
+			unsigned long bit = 1UL << i;
+			if (cfr & bit) {
+				seq_printf(m, "%s%s",
+					   printed ? "," : "", crypto_hwcaps[i]);
+				printed++;
+			}
+		}
+	}
+	seq_putc(m, '\n');
+}
+
+static void __init report_one_hwcap(int *printed, const char *name)
+{
+	if ((*printed) == 0)
+		printk(KERN_INFO "CPU CAPS: [");
+	printk(KERN_CONT "%s%s",
+	       (*printed) ? "," : "", name);
+	if (++(*printed) == 8) {
+		printk(KERN_CONT "]\n");
+		*printed = 0;
+	}
+}
+
+static void __init report_crypto_hwcaps(int *printed)
+{
+	unsigned long cfr;
+	int i;
+
+	__asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr));
+
+	for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
+		unsigned long bit = 1UL << i;
+		if (cfr & bit)
+			report_one_hwcap(printed, crypto_hwcaps[i]);
+	}
+}
+
+static void __init report_hwcaps(unsigned long caps)
+{
+	int i, printed = 0;
+
+	for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+		unsigned long bit = 1UL << i;
+		if (hwcaps[i] && (caps & bit))
+			report_one_hwcap(&printed, hwcaps[i]);
+	}
+	if (caps & HWCAP_SPARC_CRYPTO)
+		report_crypto_hwcaps(&printed);
+	if (printed != 0)
+		printk(KERN_CONT "]\n");
+}
+
+static unsigned long __init mdesc_cpu_hwcap_list(void)
+{
+	struct mdesc_handle *hp;
+	unsigned long caps = 0;
+	const char *prop;
+	int len;
+	u64 pn;
+
+	hp = mdesc_grab();
+	if (!hp)
+		return 0;
+
+	pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "cpu");
+	if (pn == MDESC_NODE_NULL)
+		goto out;
+
+	prop = mdesc_get_property(hp, pn, "hwcap-list", &len);
+	if (!prop)
+		goto out;
+
+	while (len) {
+		int i, plen;
+
+		for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+			unsigned long bit = 1UL << i;
+
+			if (hwcaps[i] && !strcmp(prop, hwcaps[i])) {
+				caps |= bit;
+				break;
+			}
+		}
+		for (i = 0; i < ARRAY_SIZE(crypto_hwcaps); i++) {
+			if (!strcmp(prop, crypto_hwcaps[i]))
+				caps |= HWCAP_SPARC_CRYPTO;
+		}
+
+		plen = strlen(prop) + 1;
+		prop += plen;
+		len -= plen;
+	}
+
+out:
+	mdesc_release(hp);
+	return caps;
+}
+
+/* This yields a mask that user programs can use to figure out what
+ * instruction set this cpu supports.
+ */
+static void __init init_sparc64_elf_hwcap(void)
+{
+	unsigned long cap = sparc64_elf_hwcap;
+	unsigned long mdesc_caps;
+
+	if (tlb_type == cheetah || tlb_type == cheetah_plus)
+		cap |= HWCAP_SPARC_ULTRA3;
+	else if (tlb_type == hypervisor) {
+		if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+			cap |= HWCAP_SPARC_BLKINIT;
+		if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+			cap |= HWCAP_SPARC_N2;
+	}
+
+	cap |= (AV_SPARC_MUL32 | AV_SPARC_DIV32 | AV_SPARC_V8PLUS);
+
+	mdesc_caps = mdesc_cpu_hwcap_list();
+	if (!mdesc_caps) {
+		if (tlb_type == spitfire)
+			cap |= AV_SPARC_VIS;
+		if (tlb_type == cheetah || tlb_type == cheetah_plus)
+			cap |= AV_SPARC_VIS | AV_SPARC_VIS2;
+		if (tlb_type == cheetah_plus) {
+			unsigned long impl, ver;
+
+			__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+			impl = ((ver >> 32) & 0xffff);
+			if (impl == PANTHER_IMPL)
+				cap |= AV_SPARC_POPC;
+		}
+		if (tlb_type == hypervisor) {
+			if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1)
+				cap |= AV_SPARC_ASI_BLK_INIT;
+			if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
+			    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+			    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+				cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
+					AV_SPARC_ASI_BLK_INIT |
+					AV_SPARC_POPC);
+			if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
+			    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
+			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M8 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_SN ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
+				cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
+					AV_SPARC_FMAF);
+		}
+	}
+	sparc64_elf_hwcap = cap | mdesc_caps;
+
+	report_hwcaps(sparc64_elf_hwcap);
+
+	if (sparc64_elf_hwcap & AV_SPARC_POPC)
+		popc_patch();
+	if (sparc64_elf_hwcap & AV_SPARC_PAUSE)
+		pause_patch();
+}
+
+void __init alloc_irqstack_bootmem(void)
+{
+	unsigned int i, node;
+
+	for_each_possible_cpu(i) {
+		node = cpu_to_node(i);
+
+		softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+							THREAD_SIZE,
+							THREAD_SIZE, 0);
+		hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+							THREAD_SIZE,
+							THREAD_SIZE, 0);
+	}
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+	/* Initialize PROM console and command line. */
+	*cmdline_p = prom_getbootargs();
+	strlcpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE);
+	parse_early_param();
+
+	boot_flags_init(*cmdline_p);
+#ifdef CONFIG_EARLYFB
+	if (btext_find_display())
+#endif
+		register_console(&prom_early_console);
+
+	if (tlb_type == hypervisor)
+		printk("ARCH: SUN4V\n");
+	else
+		printk("ARCH: SUN4U\n");
+
+#ifdef CONFIG_DUMMY_CONSOLE
+	conswitchp = &dummy_con;
+#endif
+
+	idprom_init();
+
+	if (!root_flags)
+		root_mountflags &= ~MS_RDONLY;
+	ROOT_DEV = old_decode_dev(root_dev);
+#ifdef CONFIG_BLK_DEV_RAM
+	rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
+	rd_prompt = ((ram_flags & RAMDISK_PROMPT_FLAG) != 0);
+	rd_doload = ((ram_flags & RAMDISK_LOAD_FLAG) != 0);
+#endif
+
+	task_thread_info(&init_task)->kregs = &fake_swapper_regs;
+
+#ifdef CONFIG_IP_PNP
+	if (!ic_set_manually) {
+		phandle chosen = prom_finddevice("/chosen");
+		u32 cl, sv, gw;
+
+		cl = prom_getintdefault (chosen, "client-ip", 0);
+		sv = prom_getintdefault (chosen, "server-ip", 0);
+		gw = prom_getintdefault (chosen, "gateway-ip", 0);
+		if (cl && sv) {
+			ic_myaddr = cl;
+			ic_servaddr = sv;
+			if (gw)
+				ic_gateway = gw;
+#if defined(CONFIG_IP_PNP_BOOTP) || defined(CONFIG_IP_PNP_RARP)
+			ic_proto_enabled = 0;
+#endif
+		}
+	}
+#endif
+
+	/* Get boot processor trap_block[] setup.  */
+	init_cur_cpu_trap(current_thread_info());
+
+	paging_init();
+	init_sparc64_elf_hwcap();
+	smp_fill_in_cpu_possible_map();
+	/*
+	 * Once the OF device tree and MDESC have been setup and nr_cpus has
+	 * been parsed, we know the list of possible cpus.  Therefore we can
+	 * allocate the IRQ stacks.
+	 */
+	alloc_irqstack_bootmem();
+}
+
+extern int stop_a_enabled;
+
+void sun_do_break(void)
+{
+	if (!stop_a_enabled)
+		return;
+
+	prom_printf("\n");
+	flush_user_windows();
+
+	prom_cmdline();
+}
+EXPORT_SYMBOL(sun_do_break);
+
+int stop_a_enabled = 1;
+EXPORT_SYMBOL(stop_a_enabled);
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
new file mode 100644
index 0000000..4c5b3fc
--- /dev/null
+++ b/arch/sparc/kernel/signal32.c
@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  arch/sparc64/kernel/signal32.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ *  Copyright (C) 1997 Eddie C. Dost   (ecd@skynet.be)
+ *  Copyright (C) 1997,1998 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/bitops.h>
+#include <linux/tracehook.h>
+
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+#include <asm/psrcompat.h>
+#include <asm/fpumacro.h>
+#include <asm/visasm.h>
+#include <asm/compat_signal.h>
+#include <asm/switch_to.h>
+
+#include "sigutil.h"
+#include "kernel.h"
+
+/* This magic should be in g_upper[0] for all upper parts
+ * to be valid.
+ */
+#define SIGINFO_EXTRA_V8PLUS_MAGIC	0x130e269
+typedef struct {
+	unsigned int g_upper[8];
+	unsigned int o_upper[8];
+	unsigned int asi;
+} siginfo_extra_v8plus_t;
+
+struct signal_frame32 {
+	struct sparc_stackf32	ss;
+	__siginfo32_t		info;
+	/* __siginfo_fpu_t * */ u32 fpu_save;
+	unsigned int		insns[2];
+	unsigned int		extramask[_COMPAT_NSIG_WORDS - 1];
+	unsigned int		extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
+	/* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
+	siginfo_extra_v8plus_t	v8plus;
+	/* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
+
+struct rt_signal_frame32 {
+	struct sparc_stackf32	ss;
+	compat_siginfo_t	info;
+	struct pt_regs32	regs;
+	compat_sigset_t		mask;
+	/* __siginfo_fpu_t * */ u32 fpu_save;
+	unsigned int		insns[2];
+	compat_stack_t		stack;
+	unsigned int		extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */
+	/* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */
+	siginfo_extra_v8plus_t	v8plus;
+	/* __siginfo_rwin_t * */u32 rwin_save;
+} __attribute__((aligned(8)));
+
+/* Checks if the fp is valid.  We always build signal frames which are
+ * 16-byte aligned, therefore we can always enforce that the restore
+ * frame has that property as well.
+ */
+static bool invalid_frame_pointer(void __user *fp, int fplen)
+{
+	if ((((unsigned long) fp) & 15) ||
+	    ((unsigned long)fp) > 0x100000000ULL - fplen)
+		return true;
+	return false;
+}
+
+void do_sigreturn32(struct pt_regs *regs)
+{
+	struct signal_frame32 __user *sf;
+	compat_uptr_t fpu_save;
+	compat_uptr_t rwin_save;
+	unsigned int psr, ufp;
+	unsigned int pc, npc;
+	sigset_t set;
+	compat_sigset_t seta;
+	int err, i;
+	
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	synchronize_user_stack();
+
+	regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+	sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP];
+
+	/* 1. Make sure we are not getting garbage from the user */
+	if (invalid_frame_pointer(sf, sizeof(*sf)))
+		goto segv;
+
+	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
+		goto segv;
+
+	if (ufp & 0x7)
+		goto segv;
+
+	if (__get_user(pc, &sf->info.si_regs.pc) ||
+	    __get_user(npc, &sf->info.si_regs.npc))
+		goto segv;
+
+	if ((pc | npc) & 3)
+		goto segv;
+
+	if (test_thread_flag(TIF_32BIT)) {
+		pc &= 0xffffffff;
+		npc &= 0xffffffff;
+	}
+	regs->tpc = pc;
+	regs->tnpc = npc;
+
+	/* 2. Restore the state */
+	err = __get_user(regs->y, &sf->info.si_regs.y);
+	err |= __get_user(psr, &sf->info.si_regs.psr);
+
+	for (i = UREG_G1; i <= UREG_I7; i++)
+		err |= __get_user(regs->u_regs[i], &sf->info.si_regs.u_regs[i]);
+	if ((psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS) {
+		err |= __get_user(i, &sf->v8plus.g_upper[0]);
+		if (i == SIGINFO_EXTRA_V8PLUS_MAGIC) {
+			unsigned long asi;
+
+			for (i = UREG_G1; i <= UREG_I7; i++)
+				err |= __get_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]);
+			err |= __get_user(asi, &sf->v8plus.asi);
+			regs->tstate &= ~TSTATE_ASI;
+			regs->tstate |= ((asi & 0xffUL) << 24UL);
+		}
+	}
+
+	/* User can only change condition codes in %tstate. */
+	regs->tstate &= ~(TSTATE_ICC|TSTATE_XCC);
+	regs->tstate |= psr_to_tstate_icc(psr);
+
+	/* Prevent syscall restart.  */
+	pt_regs_clear_syscall(regs);
+
+	err |= __get_user(fpu_save, &sf->fpu_save);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, compat_ptr(fpu_save));
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(compat_ptr(rwin_save)))
+			goto segv;
+	}
+	err |= __get_user(seta.sig[0], &sf->info.si_mask);
+	err |= copy_from_user(&seta.sig[1], &sf->extramask,
+			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
+	if (err)
+	    	goto segv;
+
+	set.sig[0] = seta.sig[0] + (((long)seta.sig[1]) << 32);
+	set_current_blocked(&set);
+	return;
+
+segv:
+	force_sig(SIGSEGV, current);
+}
+
+asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
+{
+	struct rt_signal_frame32 __user *sf;
+	unsigned int psr, pc, npc, ufp;
+	compat_uptr_t fpu_save;
+	compat_uptr_t rwin_save;
+	sigset_t set;
+	int err, i;
+	
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	synchronize_user_stack();
+	regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+	sf = (struct rt_signal_frame32 __user *) regs->u_regs[UREG_FP];
+
+	/* 1. Make sure we are not getting garbage from the user */
+	if (invalid_frame_pointer(sf, sizeof(*sf)))
+		goto segv;
+
+	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+		goto segv;
+
+	if (ufp & 0x7)
+		goto segv;
+
+	if (__get_user(pc, &sf->regs.pc) || 
+	    __get_user(npc, &sf->regs.npc))
+		goto segv;
+
+	if ((pc | npc) & 3)
+		goto segv;
+
+	if (test_thread_flag(TIF_32BIT)) {
+		pc &= 0xffffffff;
+		npc &= 0xffffffff;
+	}
+	regs->tpc = pc;
+	regs->tnpc = npc;
+
+	/* 2. Restore the state */
+	err = __get_user(regs->y, &sf->regs.y);
+	err |= __get_user(psr, &sf->regs.psr);
+	
+	for (i = UREG_G1; i <= UREG_I7; i++)
+		err |= __get_user(regs->u_regs[i], &sf->regs.u_regs[i]);
+	if ((psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS) {
+		err |= __get_user(i, &sf->v8plus.g_upper[0]);
+		if (i == SIGINFO_EXTRA_V8PLUS_MAGIC) {
+			unsigned long asi;
+
+			for (i = UREG_G1; i <= UREG_I7; i++)
+				err |= __get_user(((u32 *)regs->u_regs)[2*i], &sf->v8plus.g_upper[i]);
+			err |= __get_user(asi, &sf->v8plus.asi);
+			regs->tstate &= ~TSTATE_ASI;
+			regs->tstate |= ((asi & 0xffUL) << 24UL);
+		}
+	}
+
+	/* User can only change condition codes in %tstate. */
+	regs->tstate &= ~(TSTATE_ICC|TSTATE_XCC);
+	regs->tstate |= psr_to_tstate_icc(psr);
+
+	/* Prevent syscall restart.  */
+	pt_regs_clear_syscall(regs);
+
+	err |= __get_user(fpu_save, &sf->fpu_save);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, compat_ptr(fpu_save));
+	err |= get_compat_sigset(&set, &sf->mask);
+	err |= compat_restore_altstack(&sf->stack);
+	if (err)
+		goto segv;
+		
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(compat_ptr(rwin_save)))
+			goto segv;
+	}
+
+	set_current_blocked(&set);
+	return;
+segv:
+	force_sig(SIGSEGV, current);
+}
+
+static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+{
+	unsigned long sp;
+	
+	regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+	sp = regs->u_regs[UREG_FP];
+	
+	/*
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
+		return (void __user *) -1L;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	sp = sigsp(sp, ksig) - framesize;
+
+	/* Always align the stack frame.  This handles two cases.  First,
+	 * sigaltstack need not be mindful of platform specific stack
+	 * alignment.  Second, if we took this signal because the stack
+	 * is not aligned properly, we'd like to take the signal cleanly
+	 * and report that.
+	 */
+	sp &= ~15UL;
+
+	return (void __user *) sp;
+}
+
+/* The I-cache flush instruction only works in the primary ASI, which
+ * right now is the nucleus, aka. kernel space.
+ *
+ * Therefore we have to kick the instructions out using the kernel
+ * side linear mapping of the physical address backing the user
+ * instructions.
+ */
+static void flush_signal_insns(unsigned long address)
+{
+	unsigned long pstate, paddr;
+	pte_t *ptep, pte;
+	pgd_t *pgdp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+
+	/* Commit all stores of the instructions we are about to flush.  */
+	wmb();
+
+	/* Disable cross-call reception.  In this way even a very wide
+	 * munmap() on another cpu can't tear down the page table
+	 * hierarchy from underneath us, since that can't complete
+	 * until the IPI tlb flush returns.
+	 */
+
+	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
+	__asm__ __volatile__("wrpr %0, %1, %%pstate"
+				: : "r" (pstate), "i" (PSTATE_IE));
+
+	pgdp = pgd_offset(current->mm, address);
+	if (pgd_none(*pgdp))
+		goto out_irqs_on;
+	pudp = pud_offset(pgdp, address);
+	if (pud_none(*pudp))
+		goto out_irqs_on;
+	pmdp = pmd_offset(pudp, address);
+	if (pmd_none(*pmdp))
+		goto out_irqs_on;
+
+	ptep = pte_offset_map(pmdp, address);
+	pte = *ptep;
+	if (!pte_present(pte))
+		goto out_unmap;
+
+	paddr = (unsigned long) page_address(pte_page(pte));
+
+	__asm__ __volatile__("flush	%0 + %1"
+			     : /* no outputs */
+			     : "r" (paddr),
+			       "r" (address & (PAGE_SIZE - 1))
+			     : "memory");
+
+out_unmap:
+	pte_unmap(ptep);
+out_irqs_on:
+	__asm__ __volatile__("wrpr %0, 0x0, %%pstate" : : "r" (pstate));
+
+}
+
+static int setup_frame32(struct ksignal *ksig, struct pt_regs *regs,
+			 sigset_t *oldset)
+{
+	struct signal_frame32 __user *sf;
+	int i, err, wsaved;
+	void __user *tail;
+	int sigframe_size;
+	u32 psr;
+	compat_sigset_t seta;
+
+	/* 1. Make sure everything is clean */
+	synchronize_user_stack();
+	save_and_clear_fpu();
+	
+	wsaved = get_thread_wsaved();
+
+	sigframe_size = sizeof(*sf);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
+
+	sf = (struct signal_frame32 __user *)
+		get_sigframe(ksig, regs, sigframe_size);
+	
+	if (invalid_frame_pointer(sf, sigframe_size)) {
+		if (show_unhandled_signals)
+			pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n",
+				current->comm, current->pid, (unsigned long)sf,
+				regs->tpc, regs->u_regs[UREG_I7]);
+		force_sigsegv(ksig->sig, current);
+		return -EINVAL;
+	}
+
+	tail = (sf + 1);
+
+	/* 2. Save the current process state */
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	err  = put_user(regs->tpc, &sf->info.si_regs.pc);
+	err |= __put_user(regs->tnpc, &sf->info.si_regs.npc);
+	err |= __put_user(regs->y, &sf->info.si_regs.y);
+	psr = tstate_to_psr(regs->tstate);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		psr |= PSR_EF;
+	err |= __put_user(psr, &sf->info.si_regs.psr);
+	for (i = 0; i < 16; i++)
+		err |= __put_user(regs->u_regs[i], &sf->info.si_regs.u_regs[i]);
+	err |= __put_user(sizeof(siginfo_extra_v8plus_t), &sf->extra_size);
+	err |= __put_user(SIGINFO_EXTRA_V8PLUS_MAGIC, &sf->v8plus.g_upper[0]);
+	for (i = 1; i < 16; i++)
+		err |= __put_user(((u32 *)regs->u_regs)[2*i],
+				  &sf->v8plus.g_upper[i]);
+	err |= __put_user((regs->tstate & TSTATE_ASI) >> 24UL,
+			  &sf->v8plus.asi);
+
+	if (psr & PSR_EF) {
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user((u64)fp, &sf->fpu_save);
+	} else {
+		err |= __put_user(0, &sf->fpu_save);
+	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user((u64)rwp, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
+
+	/* If these change we need to know - assignments to seta relies on these sizes */
+	BUILD_BUG_ON(_NSIG_WORDS != 1);
+	BUILD_BUG_ON(_COMPAT_NSIG_WORDS != 2);
+	seta.sig[1] = (oldset->sig[0] >> 32);
+	seta.sig[0] = oldset->sig[0];
+
+	err |= __put_user(seta.sig[0], &sf->info.si_mask);
+	err |= __copy_to_user(sf->extramask, &seta.sig[1],
+			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
+
+	if (!wsaved) {
+		err |= copy_in_user((u32 __user *)sf,
+				    (u32 __user *)(regs->u_regs[UREG_FP]),
+				    sizeof(struct reg_window32));
+	} else {
+		struct reg_window *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		for (i = 0; i < 8; i++)
+			err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+		for (i = 0; i < 6; i++)
+			err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+		err |= __put_user(rp->ins[6], &sf->ss.fp);
+		err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+	}	
+	if (err)
+		return err;
+
+	/* 3. signal handler back-trampoline and parameters */
+	regs->u_regs[UREG_FP] = (unsigned long) sf;
+	regs->u_regs[UREG_I0] = ksig->sig;
+	regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
+	regs->u_regs[UREG_I2] = (unsigned long) &sf->info;
+
+	/* 4. signal handler */
+	regs->tpc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->tnpc = (regs->tpc + 4);
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+
+	/* 5. return to kernel instructions */
+	if (ksig->ka.ka_restorer) {
+		regs->u_regs[UREG_I7] = (unsigned long)ksig->ka.ka_restorer;
+	} else {
+		unsigned long address = ((unsigned long)&(sf->insns[0]));
+
+		regs->u_regs[UREG_I7] = (unsigned long) (&(sf->insns[0]) - 2);
+	
+		err  = __put_user(0x821020d8, &sf->insns[0]); /*mov __NR_sigreturn, %g1*/
+		err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/
+		if (err)
+			return err;
+		flush_signal_insns(address);
+	}
+	return 0;
+}
+
+static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs,
+			    sigset_t *oldset)
+{
+	struct rt_signal_frame32 __user *sf;
+	int i, err, wsaved;
+	void __user *tail;
+	int sigframe_size;
+	u32 psr;
+
+	/* 1. Make sure everything is clean */
+	synchronize_user_stack();
+	save_and_clear_fpu();
+	
+	wsaved = get_thread_wsaved();
+
+	sigframe_size = sizeof(*sf);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
+
+	sf = (struct rt_signal_frame32 __user *)
+		get_sigframe(ksig, regs, sigframe_size);
+	
+	if (invalid_frame_pointer(sf, sigframe_size)) {
+		if (show_unhandled_signals)
+			pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n",
+				current->comm, current->pid, (unsigned long)sf,
+				regs->tpc, regs->u_regs[UREG_I7]);
+		force_sigsegv(ksig->sig, current);
+		return -EINVAL;
+	}
+
+	tail = (sf + 1);
+
+	/* 2. Save the current process state */
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	err  = put_user(regs->tpc, &sf->regs.pc);
+	err |= __put_user(regs->tnpc, &sf->regs.npc);
+	err |= __put_user(regs->y, &sf->regs.y);
+	psr = tstate_to_psr(regs->tstate);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		psr |= PSR_EF;
+	err |= __put_user(psr, &sf->regs.psr);
+	for (i = 0; i < 16; i++)
+		err |= __put_user(regs->u_regs[i], &sf->regs.u_regs[i]);
+	err |= __put_user(sizeof(siginfo_extra_v8plus_t), &sf->extra_size);
+	err |= __put_user(SIGINFO_EXTRA_V8PLUS_MAGIC, &sf->v8plus.g_upper[0]);
+	for (i = 1; i < 16; i++)
+		err |= __put_user(((u32 *)regs->u_regs)[2*i],
+				  &sf->v8plus.g_upper[i]);
+	err |= __put_user((regs->tstate & TSTATE_ASI) >> 24UL,
+			  &sf->v8plus.asi);
+
+	if (psr & PSR_EF) {
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user((u64)fp, &sf->fpu_save);
+	} else {
+		err |= __put_user(0, &sf->fpu_save);
+	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user((u64)rwp, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
+
+	/* Update the siginfo structure.  */
+	err |= copy_siginfo_to_user32(&sf->info, &ksig->info);
+	
+	/* Setup sigaltstack */
+	err |= __compat_save_altstack(&sf->stack, regs->u_regs[UREG_FP]);
+
+	err |= put_compat_sigset(&sf->mask, oldset, sizeof(compat_sigset_t));
+
+	if (!wsaved) {
+		err |= copy_in_user((u32 __user *)sf,
+				    (u32 __user *)(regs->u_regs[UREG_FP]),
+				    sizeof(struct reg_window32));
+	} else {
+		struct reg_window *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		for (i = 0; i < 8; i++)
+			err |= __put_user(rp->locals[i], &sf->ss.locals[i]);
+		for (i = 0; i < 6; i++)
+			err |= __put_user(rp->ins[i], &sf->ss.ins[i]);
+		err |= __put_user(rp->ins[6], &sf->ss.fp);
+		err |= __put_user(rp->ins[7], &sf->ss.callers_pc);
+	}
+	if (err)
+		return err;
+	
+	/* 3. signal handler back-trampoline and parameters */
+	regs->u_regs[UREG_FP] = (unsigned long) sf;
+	regs->u_regs[UREG_I0] = ksig->sig;
+	regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
+	regs->u_regs[UREG_I2] = (unsigned long) &sf->regs;
+
+	/* 4. signal handler */
+	regs->tpc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->tnpc = (regs->tpc + 4);
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+
+	/* 5. return to kernel instructions */
+	if (ksig->ka.ka_restorer)
+		regs->u_regs[UREG_I7] = (unsigned long)ksig->ka.ka_restorer;
+	else {
+		unsigned long address = ((unsigned long)&(sf->insns[0]));
+
+		regs->u_regs[UREG_I7] = (unsigned long) (&(sf->insns[0]) - 2);
+	
+		/* mov __NR_rt_sigreturn, %g1 */
+		err |= __put_user(0x82102065, &sf->insns[0]);
+
+		/* t 0x10 */
+		err |= __put_user(0x91d02010, &sf->insns[1]);
+		if (err)
+			return err;
+
+		flush_signal_insns(address);
+	}
+	return 0;
+}
+
+static inline void handle_signal32(struct ksignal *ksig, 
+				  struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int err;
+
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		err = setup_rt_frame32(ksig, regs, oldset);
+	else
+		err = setup_frame32(ksig, regs, oldset);
+
+	signal_setup_done(err, ksig, 0);
+}
+
+static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs,
+				     struct sigaction *sa)
+{
+	switch (regs->u_regs[UREG_I0]) {
+	case ERESTART_RESTARTBLOCK:
+	case ERESTARTNOHAND:
+	no_system_call_restart:
+		regs->u_regs[UREG_I0] = EINTR;
+		regs->tstate |= TSTATE_ICARRY;
+		break;
+	case ERESTARTSYS:
+		if (!(sa->sa_flags & SA_RESTART))
+			goto no_system_call_restart;
+		/* fallthrough */
+	case ERESTARTNOINTR:
+		regs->u_regs[UREG_I0] = orig_i0;
+		regs->tpc -= 4;
+		regs->tnpc -= 4;
+	}
+}
+
+/* Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+void do_signal32(struct pt_regs * regs)
+{
+	struct ksignal ksig;
+	unsigned long orig_i0 = 0;
+	int restart_syscall = 0;
+	bool has_handler = get_signal(&ksig);
+
+	if (pt_regs_is_syscall(regs) &&
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
+
+	if (has_handler) {
+		if (restart_syscall)
+			syscall_restart32(orig_i0, regs, &ksig.ka.sa);
+		handle_signal32(&ksig, regs);
+	} else {
+		if (restart_syscall) {
+			switch (regs->u_regs[UREG_I0]) {
+			case ERESTARTNOHAND:
+	     		case ERESTARTSYS:
+			case ERESTARTNOINTR:
+				/* replay the system call when we are done */
+				regs->u_regs[UREG_I0] = orig_i0;
+				regs->tpc -= 4;
+				regs->tnpc -= 4;
+				pt_regs_clear_syscall(regs);
+			case ERESTART_RESTARTBLOCK:
+				regs->u_regs[UREG_G1] = __NR_restart_syscall;
+				regs->tpc -= 4;
+				regs->tnpc -= 4;
+				pt_regs_clear_syscall(regs);
+			}
+		}
+		restore_saved_sigmask();
+	}
+}
+
+struct sigstack32 {
+	u32 the_stack;
+	int cur_status;
+};
+
+asmlinkage int do_sys32_sigstack(u32 u_ssptr, u32 u_ossptr, unsigned long sp)
+{
+	struct sigstack32 __user *ssptr =
+		(struct sigstack32 __user *)((unsigned long)(u_ssptr));
+	struct sigstack32 __user *ossptr =
+		(struct sigstack32 __user *)((unsigned long)(u_ossptr));
+	int ret = -EFAULT;
+
+	/* First see if old state is wanted. */
+	if (ossptr) {
+		if (put_user(current->sas_ss_sp + current->sas_ss_size,
+			     &ossptr->the_stack) ||
+		    __put_user(on_sig_stack(sp), &ossptr->cur_status))
+			goto out;
+	}
+	
+	/* Now see if we want to update the new state. */
+	if (ssptr) {
+		u32 ss_sp;
+
+		if (get_user(ss_sp, &ssptr->the_stack))
+			goto out;
+
+		/* If the current stack was set with sigaltstack, don't
+		 * swap stacks while we are on it.
+		 */
+		ret = -EPERM;
+		if (current->sas_ss_sp && on_sig_stack(sp))
+			goto out;
+			
+		/* Since we don't know the extent of the stack, and we don't
+		 * track onstack-ness, but rather calculate it, we must
+		 * presume a size.  Ho hum this interface is lossy.
+		 */
+		current->sas_ss_sp = (unsigned long)ss_sp - SIGSTKSZ;
+		current->sas_ss_size = SIGSTKSZ;
+	}
+	
+	ret = 0;
+out:
+	return ret;
+}
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
new file mode 100644
index 0000000..5665261
--- /dev/null
+++ b/arch/sparc/kernel/signal_32.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0
+/*  linux/arch/sparc/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ *  Copyright (C) 1997 Eddie C. Dost   (ecd@skynet.be)
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/smp.h>
+#include <linux/binfmts.h>	/* do_coredum */
+#include <linux/bitops.h>
+#include <linux/tracehook.h>
+
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/cacheflush.h>	/* flush_sig_insns */
+#include <asm/switch_to.h>
+
+#include "sigutil.h"
+#include "kernel.h"
+
+extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
+		   void *fpqueue, unsigned long *fpqdepth);
+extern void fpload(unsigned long *fpregs, unsigned long *fsr);
+
+struct signal_frame {
+	struct sparc_stackf	ss;
+	__siginfo32_t		info;
+	__siginfo_fpu_t __user	*fpu_save;
+	unsigned long		insns[2] __attribute__ ((aligned (8)));
+	unsigned int		extramask[_NSIG_WORDS - 1];
+	unsigned int		extra_size; /* Should be 0 */
+	__siginfo_rwin_t __user	*rwin_save;
+} __attribute__((aligned(8)));
+
+struct rt_signal_frame {
+	struct sparc_stackf	ss;
+	siginfo_t		info;
+	struct pt_regs		regs;
+	sigset_t		mask;
+	__siginfo_fpu_t __user	*fpu_save;
+	unsigned int		insns[2];
+	stack_t			stack;
+	unsigned int		extra_size; /* Should be 0 */
+	__siginfo_rwin_t __user	*rwin_save;
+} __attribute__((aligned(8)));
+
+/* Align macros */
+#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
+#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame) + 7) & (~7)))
+
+/* Checks if the fp is valid.  We always build signal frames which are
+ * 16-byte aligned, therefore we can always enforce that the restore
+ * frame has that property as well.
+ */
+static inline bool invalid_frame_pointer(void __user *fp, int fplen)
+{
+	if ((((unsigned long) fp) & 15) || !__access_ok((unsigned long)fp, fplen))
+		return true;
+
+	return false;
+}
+
+asmlinkage void do_sigreturn(struct pt_regs *regs)
+{
+	unsigned long up_psr, pc, npc, ufp;
+	struct signal_frame __user *sf;
+	sigset_t set;
+	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
+	int err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	synchronize_user_stack();
+
+	sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
+
+	/* 1. Make sure we are not getting garbage from the user */
+	if (invalid_frame_pointer(sf, sizeof(*sf)))
+		goto segv_and_exit;
+
+	if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
+		goto segv_and_exit;
+
+	if (ufp & 0x7)
+		goto segv_and_exit;
+
+	err = __get_user(pc,  &sf->info.si_regs.pc);
+	err |= __get_user(npc, &sf->info.si_regs.npc);
+
+	if ((pc | npc) & 3)
+		goto segv_and_exit;
+
+	/* 2. Restore the state */
+	up_psr = regs->psr;
+	err |= __copy_from_user(regs, &sf->info.si_regs, sizeof(struct pt_regs));
+
+	/* User can only change condition codes and FPU enabling in %psr. */
+	regs->psr = (up_psr & ~(PSR_ICC | PSR_EF))
+		  | (regs->psr & (PSR_ICC | PSR_EF));
+
+	/* Prevent syscall restart.  */
+	pt_regs_clear_syscall(regs);
+
+	err |= __get_user(fpu_save, &sf->fpu_save);
+	if (fpu_save)
+		err |= restore_fpu_state(regs, fpu_save);
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (rwin_save)
+		err |= restore_rwin_state(rwin_save);
+
+	/* This is pretty much atomic, no amount locking would prevent
+	 * the races which exist anyways.
+	 */
+	err |= __get_user(set.sig[0], &sf->info.si_mask);
+	err |= __copy_from_user(&set.sig[1], &sf->extramask,
+			        (_NSIG_WORDS-1) * sizeof(unsigned int));
+			   
+	if (err)
+		goto segv_and_exit;
+
+	set_current_blocked(&set);
+	return;
+
+segv_and_exit:
+	force_sig(SIGSEGV, current);
+}
+
+asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
+{
+	struct rt_signal_frame __user *sf;
+	unsigned int psr, pc, npc, ufp;
+	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
+	sigset_t set;
+	int err;
+
+	synchronize_user_stack();
+	sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
+	if (invalid_frame_pointer(sf, sizeof(*sf)))
+		goto segv;
+
+	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+		goto segv;
+
+	if (ufp & 0x7)
+		goto segv;
+
+	err = __get_user(pc, &sf->regs.pc);
+	err |= __get_user(npc, &sf->regs.npc);
+	err |= ((pc | npc) & 0x03);
+
+	err |= __get_user(regs->y, &sf->regs.y);
+	err |= __get_user(psr, &sf->regs.psr);
+
+	err |= __copy_from_user(&regs->u_regs[UREG_G1],
+				&sf->regs.u_regs[UREG_G1], 15 * sizeof(u32));
+
+	regs->psr = (regs->psr & ~PSR_ICC) | (psr & PSR_ICC);
+
+	/* Prevent syscall restart.  */
+	pt_regs_clear_syscall(regs);
+
+	err |= __get_user(fpu_save, &sf->fpu_save);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, fpu_save);
+	err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
+	err |= restore_altstack(&sf->stack);
+	
+	if (err)
+		goto segv;
+		
+	regs->pc = pc;
+	regs->npc = npc;
+	
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(rwin_save))
+			goto segv;
+	}
+
+	set_current_blocked(&set);
+	return;
+segv:
+	force_sig(SIGSEGV, current);
+}
+
+static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+{
+	unsigned long sp = regs->u_regs[UREG_FP];
+
+	/*
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
+		return (void __user *) -1L;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	sp = sigsp(sp, ksig) - framesize;
+
+	/* Always align the stack frame.  This handles two cases.  First,
+	 * sigaltstack need not be mindful of platform specific stack
+	 * alignment.  Second, if we took this signal because the stack
+	 * is not aligned properly, we'd like to take the signal cleanly
+	 * and report that.
+	 */
+	sp &= ~15UL;
+
+	return (void __user *) sp;
+}
+
+static int setup_frame(struct ksignal *ksig, struct pt_regs *regs,
+		       sigset_t *oldset)
+{
+	struct signal_frame __user *sf;
+	int sigframe_size, err, wsaved;
+	void __user *tail;
+
+	/* 1. Make sure everything is clean */
+	synchronize_user_stack();
+
+	wsaved = current_thread_info()->w_saved;
+
+	sigframe_size = sizeof(*sf);
+	if (used_math())
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
+
+	sf = (struct signal_frame __user *)
+		get_sigframe(ksig, regs, sigframe_size);
+
+	if (invalid_frame_pointer(sf, sigframe_size)) {
+		do_exit(SIGILL);
+		return -EINVAL;
+	}
+
+	tail = sf + 1;
+
+	/* 2. Save the current process state */
+	err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs));
+	
+	err |= __put_user(0, &sf->extra_size);
+
+	if (used_math()) {
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user(fp, &sf->fpu_save);
+	} else {
+		err |= __put_user(0, &sf->fpu_save);
+	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user(rwp, &sf->rwin_save);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
+
+	err |= __put_user(oldset->sig[0], &sf->info.si_mask);
+	err |= __copy_to_user(sf->extramask, &oldset->sig[1],
+			      (_NSIG_WORDS - 1) * sizeof(unsigned int));
+	if (!wsaved) {
+		err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+				      sizeof(struct reg_window32));
+	} else {
+		struct reg_window32 *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+	}
+	if (err)
+		return err;
+	
+	/* 3. signal handler back-trampoline and parameters */
+	regs->u_regs[UREG_FP] = (unsigned long) sf;
+	regs->u_regs[UREG_I0] = ksig->sig;
+	regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
+	regs->u_regs[UREG_I2] = (unsigned long) &sf->info;
+
+	/* 4. signal handler */
+	regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->npc = (regs->pc + 4);
+
+	/* 5. return to kernel instructions */
+	if (ksig->ka.ka_restorer)
+		regs->u_regs[UREG_I7] = (unsigned long)ksig->ka.ka_restorer;
+	else {
+		regs->u_regs[UREG_I7] = (unsigned long)(&(sf->insns[0]) - 2);
+
+		/* mov __NR_sigreturn, %g1 */
+		err |= __put_user(0x821020d8, &sf->insns[0]);
+
+		/* t 0x10 */
+		err |= __put_user(0x91d02010, &sf->insns[1]);
+		if (err)
+			return err;
+
+		/* Flush instruction space. */
+		flush_sig_insns(current->mm, (unsigned long) &(sf->insns[0]));
+	}
+	return 0;
+}
+
+static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs,
+			  sigset_t *oldset)
+{
+	struct rt_signal_frame __user *sf;
+	int sigframe_size, wsaved;
+	void __user *tail;
+	unsigned int psr;
+	int err;
+
+	synchronize_user_stack();
+	wsaved = current_thread_info()->w_saved;
+	sigframe_size = sizeof(*sf);
+	if (used_math())
+		sigframe_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sigframe_size += sizeof(__siginfo_rwin_t);
+	sf = (struct rt_signal_frame __user *)
+		get_sigframe(ksig, regs, sigframe_size);
+	if (invalid_frame_pointer(sf, sigframe_size)) {
+		do_exit(SIGILL);
+		return -EINVAL;
+	}
+
+	tail = sf + 1;
+	err  = __put_user(regs->pc, &sf->regs.pc);
+	err |= __put_user(regs->npc, &sf->regs.npc);
+	err |= __put_user(regs->y, &sf->regs.y);
+	psr = regs->psr;
+	if (used_math())
+		psr |= PSR_EF;
+	err |= __put_user(psr, &sf->regs.psr);
+	err |= __copy_to_user(&sf->regs.u_regs, regs->u_regs, sizeof(regs->u_regs));
+	err |= __put_user(0, &sf->extra_size);
+
+	if (psr & PSR_EF) {
+		__siginfo_fpu_t __user *fp = tail;
+		tail += sizeof(*fp);
+		err |= save_fpu_state(regs, fp);
+		err |= __put_user(fp, &sf->fpu_save);
+	} else {
+		err |= __put_user(0, &sf->fpu_save);
+	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwp = tail;
+		tail += sizeof(*rwp);
+		err |= save_rwin_state(wsaved, rwp);
+		err |= __put_user(rwp, &sf->rwin_save);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
+	err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t));
+	
+	/* Setup sigaltstack */
+	err |= __save_altstack(&sf->stack, regs->u_regs[UREG_FP]);
+	
+	if (!wsaved) {
+		err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP],
+				      sizeof(struct reg_window32));
+	} else {
+		struct reg_window32 *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= __copy_to_user(sf, rp, sizeof(struct reg_window32));
+	}
+
+	err |= copy_siginfo_to_user(&sf->info, &ksig->info);
+
+	if (err)
+		return err;
+
+	regs->u_regs[UREG_FP] = (unsigned long) sf;
+	regs->u_regs[UREG_I0] = ksig->sig;
+	regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
+	regs->u_regs[UREG_I2] = (unsigned long) &sf->regs;
+
+	regs->pc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->npc = (regs->pc + 4);
+
+	if (ksig->ka.ka_restorer)
+		regs->u_regs[UREG_I7] = (unsigned long)ksig->ka.ka_restorer;
+	else {
+		regs->u_regs[UREG_I7] = (unsigned long)(&(sf->insns[0]) - 2);
+
+		/* mov __NR_sigreturn, %g1 */
+		err |= __put_user(0x821020d8, &sf->insns[0]);
+
+		/* t 0x10 */
+		err |= __put_user(0x91d02010, &sf->insns[1]);
+		if (err)
+			return err;
+
+		/* Flush instruction space. */
+		flush_sig_insns(current->mm, (unsigned long) &(sf->insns[0]));
+	}
+	return 0;
+}
+
+static inline void
+handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+{
+	sigset_t *oldset = sigmask_to_save();
+	int err;
+
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		err = setup_rt_frame(ksig, regs, oldset);
+	else
+		err = setup_frame(ksig, regs, oldset);
+	signal_setup_done(err, ksig, 0);
+}
+
+static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
+				   struct sigaction *sa)
+{
+	switch(regs->u_regs[UREG_I0]) {
+	case ERESTART_RESTARTBLOCK:
+	case ERESTARTNOHAND:
+	no_system_call_restart:
+		regs->u_regs[UREG_I0] = EINTR;
+		regs->psr |= PSR_C;
+		break;
+	case ERESTARTSYS:
+		if (!(sa->sa_flags & SA_RESTART))
+			goto no_system_call_restart;
+		/* fallthrough */
+	case ERESTARTNOINTR:
+		regs->u_regs[UREG_I0] = orig_i0;
+		regs->pc -= 4;
+		regs->npc -= 4;
+	}
+}
+
+/* Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
+{
+	struct ksignal ksig;
+	int restart_syscall;
+	bool has_handler;
+
+	/* It's a lot of work and synchronization to add a new ptrace
+	 * register for GDB to save and restore in order to get
+	 * orig_i0 correct for syscall restarts when debugging.
+	 *
+	 * Although it should be the case that most of the global
+	 * registers are volatile across a system call, glibc already
+	 * depends upon that fact that we preserve them.  So we can't
+	 * just use any global register to save away the orig_i0 value.
+	 *
+	 * In particular %g2, %g3, %g4, and %g5 are all assumed to be
+	 * preserved across a system call trap by various pieces of
+	 * code in glibc.
+	 *
+	 * %g7 is used as the "thread register".   %g6 is not used in
+	 * any fixed manner.  %g6 is used as a scratch register and
+	 * a compiler temporary, but it's value is never used across
+	 * a system call.  Therefore %g6 is usable for orig_i0 storage.
+	 */
+	if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C))
+		regs->u_regs[UREG_G6] = orig_i0;
+
+	has_handler = get_signal(&ksig);
+
+	/* If the debugger messes with the program counter, it clears
+	 * the software "in syscall" bit, directing us to not perform
+	 * a syscall restart.
+	 */
+	restart_syscall = 0;
+	if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C)) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
+
+	if (has_handler) {
+		if (restart_syscall)
+			syscall_restart(orig_i0, regs, &ksig.ka.sa);
+		handle_signal(&ksig, regs);
+	} else {
+		if (restart_syscall) {
+			switch (regs->u_regs[UREG_I0]) {
+			case ERESTARTNOHAND:
+	     		case ERESTARTSYS:
+			case ERESTARTNOINTR:
+				/* replay the system call when we are done */
+				regs->u_regs[UREG_I0] = orig_i0;
+				regs->pc -= 4;
+				regs->npc -= 4;
+				pt_regs_clear_syscall(regs);
+			case ERESTART_RESTARTBLOCK:
+				regs->u_regs[UREG_G1] = __NR_restart_syscall;
+				regs->pc -= 4;
+				regs->npc -= 4;
+				pt_regs_clear_syscall(regs);
+			}
+		}
+		restore_saved_sigmask();
+	}
+}
+
+void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
+		      unsigned long thread_info_flags)
+{
+	if (thread_info_flags & _TIF_SIGPENDING)
+		do_signal(regs, orig_i0);
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+}
+
+asmlinkage int do_sys_sigstack(struct sigstack __user *ssptr,
+                               struct sigstack __user *ossptr,
+                               unsigned long sp)
+{
+	int ret = -EFAULT;
+
+	/* First see if old state is wanted. */
+	if (ossptr) {
+		if (put_user(current->sas_ss_sp + current->sas_ss_size,
+			     &ossptr->the_stack) ||
+		    __put_user(on_sig_stack(sp), &ossptr->cur_status))
+			goto out;
+	}
+
+	/* Now see if we want to update the new state. */
+	if (ssptr) {
+		char *ss_sp;
+
+		if (get_user(ss_sp, &ssptr->the_stack))
+			goto out;
+		/* If the current stack was set with sigaltstack, don't
+		   swap stacks while we are on it.  */
+		ret = -EPERM;
+		if (current->sas_ss_sp && on_sig_stack(sp))
+			goto out;
+
+		/* Since we don't know the extent of the stack, and we don't
+		   track onstack-ness, but rather calculate it, we must
+		   presume a size.  Ho hum this interface is lossy.  */
+		current->sas_ss_sp = (unsigned long)ss_sp - SIGSTKSZ;
+		current->sas_ss_size = SIGSTKSZ;
+	}
+	ret = 0;
+out:
+	return ret;
+}
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
new file mode 100644
index 0000000..e9de180
--- /dev/null
+++ b/arch/sparc/kernel/signal_64.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  arch/sparc64/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1995, 2008 David S. Miller (davem@davemloft.net)
+ *  Copyright (C) 1996 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ *  Copyright (C) 1997 Eddie C. Dost   (ecd@skynet.be)
+ *  Copyright (C) 1997,1998 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/tracehook.h>
+#include <linux/unistd.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/bitops.h>
+#include <linux/context_tracking.h>
+
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/pgtable.h>
+#include <asm/fpumacro.h>
+#include <asm/uctx.h>
+#include <asm/siginfo.h>
+#include <asm/visasm.h>
+#include <asm/switch_to.h>
+#include <asm/cacheflush.h>
+
+#include "sigutil.h"
+#include "systbls.h"
+#include "kernel.h"
+#include "entry.h"
+
+/* {set, get}context() needed for 64-bit SparcLinux userland. */
+asmlinkage void sparc64_set_context(struct pt_regs *regs)
+{
+	struct ucontext __user *ucp = (struct ucontext __user *)
+		regs->u_regs[UREG_I0];
+	enum ctx_state prev_state = exception_enter();
+	mc_gregset_t __user *grp;
+	unsigned long pc, npc, tstate;
+	unsigned long fp, i7;
+	unsigned char fenab;
+	int err;
+
+	synchronize_user_stack();
+	if (get_thread_wsaved()					||
+	    (((unsigned long)ucp) & (sizeof(unsigned long)-1))	||
+	    (!__access_ok(ucp, sizeof(*ucp))))
+		goto do_sigsegv;
+	grp  = &ucp->uc_mcontext.mc_gregs;
+	err  = __get_user(pc, &((*grp)[MC_PC]));
+	err |= __get_user(npc, &((*grp)[MC_NPC]));
+	if (err || ((pc | npc) & 3))
+		goto do_sigsegv;
+	if (regs->u_regs[UREG_I1]) {
+		sigset_t set;
+
+		if (_NSIG_WORDS == 1) {
+			if (__get_user(set.sig[0], &ucp->uc_sigmask.sig[0]))
+				goto do_sigsegv;
+		} else {
+			if (__copy_from_user(&set, &ucp->uc_sigmask, sizeof(sigset_t)))
+				goto do_sigsegv;
+		}
+		set_current_blocked(&set);
+	}
+	if (test_thread_flag(TIF_32BIT)) {
+		pc &= 0xffffffff;
+		npc &= 0xffffffff;
+	}
+	regs->tpc = pc;
+	regs->tnpc = npc;
+	err |= __get_user(regs->y, &((*grp)[MC_Y]));
+	err |= __get_user(tstate, &((*grp)[MC_TSTATE]));
+	regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC);
+	regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
+	err |= __get_user(regs->u_regs[UREG_G1], (&(*grp)[MC_G1]));
+	err |= __get_user(regs->u_regs[UREG_G2], (&(*grp)[MC_G2]));
+	err |= __get_user(regs->u_regs[UREG_G3], (&(*grp)[MC_G3]));
+	err |= __get_user(regs->u_regs[UREG_G4], (&(*grp)[MC_G4]));
+	err |= __get_user(regs->u_regs[UREG_G5], (&(*grp)[MC_G5]));
+	err |= __get_user(regs->u_regs[UREG_G6], (&(*grp)[MC_G6]));
+
+	/* Skip %g7 as that's the thread register in userspace.  */
+
+	err |= __get_user(regs->u_regs[UREG_I0], (&(*grp)[MC_O0]));
+	err |= __get_user(regs->u_regs[UREG_I1], (&(*grp)[MC_O1]));
+	err |= __get_user(regs->u_regs[UREG_I2], (&(*grp)[MC_O2]));
+	err |= __get_user(regs->u_regs[UREG_I3], (&(*grp)[MC_O3]));
+	err |= __get_user(regs->u_regs[UREG_I4], (&(*grp)[MC_O4]));
+	err |= __get_user(regs->u_regs[UREG_I5], (&(*grp)[MC_O5]));
+	err |= __get_user(regs->u_regs[UREG_I6], (&(*grp)[MC_O6]));
+	err |= __get_user(regs->u_regs[UREG_I7], (&(*grp)[MC_O7]));
+
+	err |= __get_user(fp, &(ucp->uc_mcontext.mc_fp));
+	err |= __get_user(i7, &(ucp->uc_mcontext.mc_i7));
+	err |= __put_user(fp,
+	      (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[6])));
+	err |= __put_user(i7,
+	      (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[7])));
+
+	err |= __get_user(fenab, &(ucp->uc_mcontext.mc_fpregs.mcfpu_enab));
+	if (fenab) {
+		unsigned long *fpregs = current_thread_info()->fpregs;
+		unsigned long fprs;
+		
+		fprs_write(0);
+		err |= __get_user(fprs, &(ucp->uc_mcontext.mc_fpregs.mcfpu_fprs));
+		if (fprs & FPRS_DL)
+			err |= copy_from_user(fpregs,
+					      &(ucp->uc_mcontext.mc_fpregs.mcfpu_fregs),
+					      (sizeof(unsigned int) * 32));
+		if (fprs & FPRS_DU)
+			err |= copy_from_user(fpregs+16,
+			 ((unsigned long __user *)&(ucp->uc_mcontext.mc_fpregs.mcfpu_fregs))+16,
+			 (sizeof(unsigned int) * 32));
+		err |= __get_user(current_thread_info()->xfsr[0],
+				  &(ucp->uc_mcontext.mc_fpregs.mcfpu_fsr));
+		err |= __get_user(current_thread_info()->gsr[0],
+				  &(ucp->uc_mcontext.mc_fpregs.mcfpu_gsr));
+		regs->tstate &= ~TSTATE_PEF;
+	}
+	if (err)
+		goto do_sigsegv;
+out:
+	exception_exit(prev_state);
+	return;
+do_sigsegv:
+	force_sig(SIGSEGV, current);
+	goto out;
+}
+
+asmlinkage void sparc64_get_context(struct pt_regs *regs)
+{
+	struct ucontext __user *ucp = (struct ucontext __user *)
+		regs->u_regs[UREG_I0];
+	enum ctx_state prev_state = exception_enter();
+	mc_gregset_t __user *grp;
+	mcontext_t __user *mcp;
+	unsigned long fp, i7;
+	unsigned char fenab;
+	int err;
+
+	synchronize_user_stack();
+	if (get_thread_wsaved() || clear_user(ucp, sizeof(*ucp)))
+		goto do_sigsegv;
+
+#if 1
+	fenab = 0; /* IMO get_context is like any other system call, thus modifies FPU state -jj */
+#else
+	fenab = (current_thread_info()->fpsaved[0] & FPRS_FEF);
+#endif
+		
+	mcp = &ucp->uc_mcontext;
+	grp = &mcp->mc_gregs;
+
+	/* Skip over the trap instruction, first. */
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc   = (regs->tnpc & 0xffffffff);
+		regs->tnpc  = (regs->tnpc + 4) & 0xffffffff;
+	} else {
+		regs->tpc   = regs->tnpc;
+		regs->tnpc += 4;
+	}
+	err = 0;
+	if (_NSIG_WORDS == 1)
+		err |= __put_user(current->blocked.sig[0],
+				  (unsigned long __user *)&ucp->uc_sigmask);
+	else
+		err |= __copy_to_user(&ucp->uc_sigmask, &current->blocked,
+				      sizeof(sigset_t));
+
+	err |= __put_user(regs->tstate, &((*grp)[MC_TSTATE]));
+	err |= __put_user(regs->tpc, &((*grp)[MC_PC]));
+	err |= __put_user(regs->tnpc, &((*grp)[MC_NPC]));
+	err |= __put_user(regs->y, &((*grp)[MC_Y]));
+	err |= __put_user(regs->u_regs[UREG_G1], &((*grp)[MC_G1]));
+	err |= __put_user(regs->u_regs[UREG_G2], &((*grp)[MC_G2]));
+	err |= __put_user(regs->u_regs[UREG_G3], &((*grp)[MC_G3]));
+	err |= __put_user(regs->u_regs[UREG_G4], &((*grp)[MC_G4]));
+	err |= __put_user(regs->u_regs[UREG_G5], &((*grp)[MC_G5]));
+	err |= __put_user(regs->u_regs[UREG_G6], &((*grp)[MC_G6]));
+	err |= __put_user(regs->u_regs[UREG_G7], &((*grp)[MC_G7]));
+	err |= __put_user(regs->u_regs[UREG_I0], &((*grp)[MC_O0]));
+	err |= __put_user(regs->u_regs[UREG_I1], &((*grp)[MC_O1]));
+	err |= __put_user(regs->u_regs[UREG_I2], &((*grp)[MC_O2]));
+	err |= __put_user(regs->u_regs[UREG_I3], &((*grp)[MC_O3]));
+	err |= __put_user(regs->u_regs[UREG_I4], &((*grp)[MC_O4]));
+	err |= __put_user(regs->u_regs[UREG_I5], &((*grp)[MC_O5]));
+	err |= __put_user(regs->u_regs[UREG_I6], &((*grp)[MC_O6]));
+	err |= __put_user(regs->u_regs[UREG_I7], &((*grp)[MC_O7]));
+
+	err |= __get_user(fp,
+		 (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[6])));
+	err |= __get_user(i7,
+		 (&(((struct reg_window __user *)(STACK_BIAS+regs->u_regs[UREG_I6]))->ins[7])));
+	err |= __put_user(fp, &(mcp->mc_fp));
+	err |= __put_user(i7, &(mcp->mc_i7));
+
+	err |= __put_user(fenab, &(mcp->mc_fpregs.mcfpu_enab));
+	if (fenab) {
+		unsigned long *fpregs = current_thread_info()->fpregs;
+		unsigned long fprs;
+		
+		fprs = current_thread_info()->fpsaved[0];
+		if (fprs & FPRS_DL)
+			err |= copy_to_user(&(mcp->mc_fpregs.mcfpu_fregs), fpregs,
+					    (sizeof(unsigned int) * 32));
+		if (fprs & FPRS_DU)
+			err |= copy_to_user(
+                          ((unsigned long __user *)&(mcp->mc_fpregs.mcfpu_fregs))+16, fpregs+16,
+			  (sizeof(unsigned int) * 32));
+		err |= __put_user(current_thread_info()->xfsr[0], &(mcp->mc_fpregs.mcfpu_fsr));
+		err |= __put_user(current_thread_info()->gsr[0], &(mcp->mc_fpregs.mcfpu_gsr));
+		err |= __put_user(fprs, &(mcp->mc_fpregs.mcfpu_fprs));
+	}
+	if (err)
+		goto do_sigsegv;
+out:
+	exception_exit(prev_state);
+	return;
+do_sigsegv:
+	force_sig(SIGSEGV, current);
+	goto out;
+}
+
+/* Checks if the fp is valid.  We always build rt signal frames which
+ * are 16-byte aligned, therefore we can always enforce that the
+ * restore frame has that property as well.
+ */
+static bool invalid_frame_pointer(void __user *fp)
+{
+	if (((unsigned long) fp) & 15)
+		return true;
+	return false;
+}
+
+struct rt_signal_frame {
+	struct sparc_stackf	ss;
+	siginfo_t		info;
+	struct pt_regs		regs;
+	__siginfo_fpu_t __user	*fpu_save;
+	stack_t			stack;
+	sigset_t		mask;
+	__siginfo_rwin_t	*rwin_save;
+};
+
+void do_rt_sigreturn(struct pt_regs *regs)
+{
+	unsigned long tpc, tnpc, tstate, ufp;
+	struct rt_signal_frame __user *sf;
+	__siginfo_fpu_t __user *fpu_save;
+	__siginfo_rwin_t __user *rwin_save;
+	sigset_t set;
+	int err;
+
+	/* Always make any pending restarted system calls return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	synchronize_user_stack ();
+	sf = (struct rt_signal_frame __user *)
+		(regs->u_regs [UREG_FP] + STACK_BIAS);
+
+	/* 1. Make sure we are not getting garbage from the user */
+	if (invalid_frame_pointer(sf))
+		goto segv;
+
+	if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
+		goto segv;
+
+	if ((ufp + STACK_BIAS) & 0x7)
+		goto segv;
+
+	err = __get_user(tpc, &sf->regs.tpc);
+	err |= __get_user(tnpc, &sf->regs.tnpc);
+	if (test_thread_flag(TIF_32BIT)) {
+		tpc &= 0xffffffff;
+		tnpc &= 0xffffffff;
+	}
+	err |= ((tpc | tnpc) & 3);
+
+	/* 2. Restore the state */
+	err |= __get_user(regs->y, &sf->regs.y);
+	err |= __get_user(tstate, &sf->regs.tstate);
+	err |= copy_from_user(regs->u_regs, sf->regs.u_regs, sizeof(regs->u_regs));
+
+	/* User can only change condition codes and %asi in %tstate. */
+	regs->tstate &= ~(TSTATE_ASI | TSTATE_ICC | TSTATE_XCC);
+	regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC));
+
+	err |= __get_user(fpu_save, &sf->fpu_save);
+	if (!err && fpu_save)
+		err |= restore_fpu_state(regs, fpu_save);
+
+	err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t));
+	err |= restore_altstack(&sf->stack);
+	if (err)
+		goto segv;
+
+	err |= __get_user(rwin_save, &sf->rwin_save);
+	if (!err && rwin_save) {
+		if (restore_rwin_state(rwin_save))
+			goto segv;
+	}
+
+	regs->tpc = tpc;
+	regs->tnpc = tnpc;
+
+	/* Prevent syscall restart.  */
+	pt_regs_clear_syscall(regs);
+
+	set_current_blocked(&set);
+	return;
+segv:
+	force_sig(SIGSEGV, current);
+}
+
+static inline void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, unsigned long framesize)
+{
+	unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
+
+	/*
+	 * If we are on the alternate signal stack and would overflow it, don't.
+	 * Return an always-bogus address instead so we will die with SIGSEGV.
+	 */
+	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
+		return (void __user *) -1L;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	sp = sigsp(sp, ksig) - framesize;
+
+	/* Always align the stack frame.  This handles two cases.  First,
+	 * sigaltstack need not be mindful of platform specific stack
+	 * alignment.  Second, if we took this signal because the stack
+	 * is not aligned properly, we'd like to take the signal cleanly
+	 * and report that.
+	 */
+	sp &= ~15UL;
+
+	return (void __user *) sp;
+}
+
+static inline int
+setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
+{
+	struct rt_signal_frame __user *sf;
+	int wsaved, err, sf_size;
+	void __user *tail;
+
+	/* 1. Make sure everything is clean */
+	synchronize_user_stack();
+	save_and_clear_fpu();
+	
+	wsaved = get_thread_wsaved();
+
+	sf_size = sizeof(struct rt_signal_frame);
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
+		sf_size += sizeof(__siginfo_fpu_t);
+	if (wsaved)
+		sf_size += sizeof(__siginfo_rwin_t);
+	sf = (struct rt_signal_frame __user *)
+		get_sigframe(ksig, regs, sf_size);
+
+	if (invalid_frame_pointer (sf)) {
+		if (show_unhandled_signals)
+			pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n",
+				current->comm, current->pid, (unsigned long)sf,
+				regs->tpc, regs->u_regs[UREG_I7]);
+		force_sigsegv(ksig->sig, current);
+		return -EINVAL;
+	}
+
+	tail = (sf + 1);
+
+	/* 2. Save the current process state */
+	err = copy_to_user(&sf->regs, regs, sizeof (*regs));
+
+	if (current_thread_info()->fpsaved[0] & FPRS_FEF) {
+		__siginfo_fpu_t __user *fpu_save = tail;
+		tail += sizeof(__siginfo_fpu_t);
+		err |= save_fpu_state(regs, fpu_save);
+		err |= __put_user((u64)fpu_save, &sf->fpu_save);
+	} else {
+		err |= __put_user(0, &sf->fpu_save);
+	}
+	if (wsaved) {
+		__siginfo_rwin_t __user *rwin_save = tail;
+		tail += sizeof(__siginfo_rwin_t);
+		err |= save_rwin_state(wsaved, rwin_save);
+		err |= __put_user((u64)rwin_save, &sf->rwin_save);
+		set_thread_wsaved(0);
+	} else {
+		err |= __put_user(0, &sf->rwin_save);
+	}
+	
+	/* Setup sigaltstack */
+	err |= __save_altstack(&sf->stack, regs->u_regs[UREG_FP]);
+
+	err |= copy_to_user(&sf->mask, sigmask_to_save(), sizeof(sigset_t));
+
+	if (!wsaved) {
+		err |= copy_in_user((u64 __user *)sf,
+				    (u64 __user *)(regs->u_regs[UREG_FP] +
+						   STACK_BIAS),
+				    sizeof(struct reg_window));
+	} else {
+		struct reg_window *rp;
+
+		rp = &current_thread_info()->reg_window[wsaved - 1];
+		err |= copy_to_user(sf, rp, sizeof(struct reg_window));
+	}
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		err |= copy_siginfo_to_user(&sf->info, &ksig->info);
+	else {
+		err |= __put_user(ksig->sig, &sf->info.si_signo);
+		err |= __put_user(SI_NOINFO, &sf->info.si_code);
+	}
+	if (err)
+		return err;
+	
+	/* 3. signal handler back-trampoline and parameters */
+	regs->u_regs[UREG_FP] = ((unsigned long) sf) - STACK_BIAS;
+	regs->u_regs[UREG_I0] = ksig->sig;
+	regs->u_regs[UREG_I1] = (unsigned long) &sf->info;
+
+	/* The sigcontext is passed in this way because of how it
+	 * is defined in GLIBC's /usr/include/bits/sigcontext.h
+	 * for sparc64.  It includes the 128 bytes of siginfo_t.
+	 */
+	regs->u_regs[UREG_I2] = (unsigned long) &sf->info;
+
+	/* 5. signal handler */
+	regs->tpc = (unsigned long) ksig->ka.sa.sa_handler;
+	regs->tnpc = (regs->tpc + 4);
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	/* 4. return to kernel instructions */
+	regs->u_regs[UREG_I7] = (unsigned long)ksig->ka.ka_restorer;
+	return 0;
+}
+
+static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
+				   struct sigaction *sa)
+{
+	switch (regs->u_regs[UREG_I0]) {
+	case ERESTART_RESTARTBLOCK:
+	case ERESTARTNOHAND:
+	no_system_call_restart:
+		regs->u_regs[UREG_I0] = EINTR;
+		regs->tstate |= (TSTATE_ICARRY|TSTATE_XCARRY);
+		break;
+	case ERESTARTSYS:
+		if (!(sa->sa_flags & SA_RESTART))
+			goto no_system_call_restart;
+		/* fallthrough */
+	case ERESTARTNOINTR:
+		regs->u_regs[UREG_I0] = orig_i0;
+		regs->tpc -= 4;
+		regs->tnpc -= 4;
+	}
+}
+
+/* Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
+{
+	struct ksignal ksig;
+	int restart_syscall;
+	bool has_handler;
+	
+	/* It's a lot of work and synchronization to add a new ptrace
+	 * register for GDB to save and restore in order to get
+	 * orig_i0 correct for syscall restarts when debugging.
+	 *
+	 * Although it should be the case that most of the global
+	 * registers are volatile across a system call, glibc already
+	 * depends upon that fact that we preserve them.  So we can't
+	 * just use any global register to save away the orig_i0 value.
+	 *
+	 * In particular %g2, %g3, %g4, and %g5 are all assumed to be
+	 * preserved across a system call trap by various pieces of
+	 * code in glibc.
+	 *
+	 * %g7 is used as the "thread register".   %g6 is not used in
+	 * any fixed manner.  %g6 is used as a scratch register and
+	 * a compiler temporary, but it's value is never used across
+	 * a system call.  Therefore %g6 is usable for orig_i0 storage.
+	 */
+	if (pt_regs_is_syscall(regs) &&
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
+		regs->u_regs[UREG_G6] = orig_i0;
+
+#ifdef CONFIG_COMPAT
+	if (test_thread_flag(TIF_32BIT)) {
+		do_signal32(regs);
+		return;
+	}
+#endif	
+
+	has_handler = get_signal(&ksig);
+
+	restart_syscall = 0;
+	if (pt_regs_is_syscall(regs) &&
+	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY))) {
+		restart_syscall = 1;
+		orig_i0 = regs->u_regs[UREG_G6];
+	}
+
+	if (has_handler) {
+		if (restart_syscall)
+			syscall_restart(orig_i0, regs, &ksig.ka.sa);
+		signal_setup_done(setup_rt_frame(&ksig, regs), &ksig, 0);
+	} else {
+		if (restart_syscall) {
+			switch (regs->u_regs[UREG_I0]) {
+			case ERESTARTNOHAND:
+	     		case ERESTARTSYS:
+			case ERESTARTNOINTR:
+				/* replay the system call when we are done */
+				regs->u_regs[UREG_I0] = orig_i0;
+				regs->tpc -= 4;
+				regs->tnpc -= 4;
+				pt_regs_clear_syscall(regs);
+			case ERESTART_RESTARTBLOCK:
+				regs->u_regs[UREG_G1] = __NR_restart_syscall;
+				regs->tpc -= 4;
+				regs->tnpc -= 4;
+				pt_regs_clear_syscall(regs);
+			}
+		}
+		restore_saved_sigmask();
+	}
+}
+
+void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
+{
+	user_exit();
+	if (thread_info_flags & _TIF_UPROBE)
+		uprobe_notify_resume(regs);
+	if (thread_info_flags & _TIF_SIGPENDING)
+		do_signal(regs, orig_i0);
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+	user_enter();
+}
+
diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h
new file mode 100644
index 0000000..21d332d
--- /dev/null
+++ b/arch/sparc/kernel/sigutil.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SIGUTIL_H
+#define _SIGUTIL_H
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu);
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin);
+int restore_rwin_state(__siginfo_rwin_t __user *rp);
+
+#endif /* _SIGUTIL_H */
diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c
new file mode 100644
index 0000000..1e9fae5
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_32.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+#include <asm/switch_to.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	int err = 0;
+#ifdef CONFIG_SMP
+	if (test_tsk_thread_flag(current, TIF_USEDFPU)) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		regs->psr &= ~(PSR_EF);
+		clear_tsk_thread_flag(current, TIF_USEDFPU);
+	}
+#else
+	if (current == last_task_used_math) {
+		put_psr(get_psr() | PSR_EF);
+		fpsave(&current->thread.float_regs[0], &current->thread.fsr,
+		       &current->thread.fpqueue[0], &current->thread.fpqdepth);
+		last_task_used_math = NULL;
+		regs->psr &= ~(PSR_EF);
+	}
+#endif
+	err |= __copy_to_user(&fpu->si_float_regs[0],
+			      &current->thread.float_regs[0],
+			      (sizeof(unsigned long) * 32));
+	err |= __put_user(current->thread.fsr, &fpu->si_fsr);
+	err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+	if (current->thread.fpqdepth != 0)
+		err |= __copy_to_user(&fpu->si_fpqueue[0],
+				      &current->thread.fpqueue[0],
+				      ((sizeof(unsigned long) +
+				      (sizeof(unsigned long *)))*16));
+	clear_used_math();
+	return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	int err;
+
+	if (((unsigned long) fpu) & 3)
+		return -EFAULT;
+
+#ifdef CONFIG_SMP
+	if (test_tsk_thread_flag(current, TIF_USEDFPU))
+		regs->psr &= ~PSR_EF;
+#else
+	if (current == last_task_used_math) {
+		last_task_used_math = NULL;
+		regs->psr &= ~PSR_EF;
+	}
+#endif
+	set_used_math();
+	clear_tsk_thread_flag(current, TIF_USEDFPU);
+
+	if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu)))
+		return -EFAULT;
+
+	err = __copy_from_user(&current->thread.float_regs[0], &fpu->si_float_regs[0],
+			       (sizeof(unsigned long) * 32));
+	err |= __get_user(current->thread.fsr, &fpu->si_fsr);
+	err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth);
+	if (current->thread.fpqdepth != 0)
+		err |= __copy_from_user(&current->thread.fpqueue[0],
+					&fpu->si_fpqueue[0],
+					((sizeof(unsigned long) +
+					(sizeof(unsigned long *)))*16));
+	return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+	int i, err = __put_user(wsaved, &rwin->wsaved);
+
+	for (i = 0; i < wsaved; i++) {
+		struct reg_window32 *rp;
+		unsigned long fp;
+
+		rp = &current_thread_info()->reg_window[i];
+		fp = current_thread_info()->rwbuf_stkptrs[i];
+		err |= copy_to_user(&rwin->reg_window[i], rp,
+				    sizeof(struct reg_window32));
+		err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+	}
+	return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+	struct thread_info *t = current_thread_info();
+	int i, wsaved, err;
+
+	if (((unsigned long) rp) & 3)
+		return -EFAULT;
+
+	get_user(wsaved, &rp->wsaved);
+	if (wsaved > NSWINS)
+		return -EFAULT;
+
+	err = 0;
+	for (i = 0; i < wsaved; i++) {
+		err |= copy_from_user(&t->reg_window[i],
+				      &rp->reg_window[i],
+				      sizeof(struct reg_window32));
+		err |= __get_user(t->rwbuf_stkptrs[i],
+				  &rp->rwbuf_stkptrs[i]);
+	}
+	if (err)
+		return err;
+
+	t->w_saved = wsaved;
+	synchronize_user_stack();
+	if (t->w_saved)
+		return -EFAULT;
+	return 0;
+
+}
diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c
new file mode 100644
index 0000000..512e463
--- /dev/null
+++ b/arch/sparc/kernel/sigutil_64.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/thread_info.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+
+#include <asm/sigcontext.h>
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+#include <asm/switch_to.h>
+
+#include "sigutil.h"
+
+int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	unsigned long *fpregs = current_thread_info()->fpregs;
+	unsigned long fprs;
+	int err = 0;
+	
+	fprs = current_thread_info()->fpsaved[0];
+	if (fprs & FPRS_DL)
+		err |= copy_to_user(&fpu->si_float_regs[0], fpregs,
+				    (sizeof(unsigned int) * 32));
+	if (fprs & FPRS_DU)
+		err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16,
+				    (sizeof(unsigned int) * 32));
+	err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+	err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+	err |= __put_user(fprs, &fpu->si_fprs);
+
+	return err;
+}
+
+int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+{
+	unsigned long *fpregs = current_thread_info()->fpregs;
+	unsigned long fprs;
+	int err;
+
+	if (((unsigned long) fpu) & 7)
+		return -EFAULT;
+
+	err = get_user(fprs, &fpu->si_fprs);
+	fprs_write(0);
+	regs->tstate &= ~TSTATE_PEF;
+	if (fprs & FPRS_DL)
+		err |= copy_from_user(fpregs, &fpu->si_float_regs[0],
+		       	       (sizeof(unsigned int) * 32));
+	if (fprs & FPRS_DU)
+		err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32],
+		       	       (sizeof(unsigned int) * 32));
+	err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr);
+	err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr);
+	current_thread_info()->fpsaved[0] |= fprs;
+	return err;
+}
+
+int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin)
+{
+	int i, err = __put_user(wsaved, &rwin->wsaved);
+
+	for (i = 0; i < wsaved; i++) {
+		struct reg_window *rp = &current_thread_info()->reg_window[i];
+		unsigned long fp = current_thread_info()->rwbuf_stkptrs[i];
+
+		err |= copy_to_user(&rwin->reg_window[i], rp,
+				    sizeof(struct reg_window));
+		err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]);
+	}
+	return err;
+}
+
+int restore_rwin_state(__siginfo_rwin_t __user *rp)
+{
+	struct thread_info *t = current_thread_info();
+	int i, wsaved, err;
+
+	if (((unsigned long) rp) & 7)
+		return -EFAULT;
+
+	get_user(wsaved, &rp->wsaved);
+	if (wsaved > NSWINS)
+		return -EFAULT;
+
+	err = 0;
+	for (i = 0; i < wsaved; i++) {
+		err |= copy_from_user(&t->reg_window[i],
+				      &rp->reg_window[i],
+				      sizeof(struct reg_window));
+		err |= __get_user(t->rwbuf_stkptrs[i],
+				  &rp->rwbuf_stkptrs[i]);
+	}
+	if (err)
+		return err;
+
+	set_thread_wsaved(wsaved);
+	synchronize_user_stack();
+	if (get_thread_wsaved())
+		return -EFAULT;
+	return 0;
+}
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
new file mode 100644
index 0000000..e078680
--- /dev/null
+++ b/arch/sparc/kernel/smp_32.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/* smp.c: Sparc SMP support.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 2004 Keith M Wesolowski (wesolows@foobazco.org)
+ */
+
+#include <asm/head.h>
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/profile.h>
+#include <linux/cpu.h>
+
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/oplib.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/cpudata.h>
+#include <asm/timer.h>
+#include <asm/leon.h>
+
+#include "kernel.h"
+#include "irq.h"
+
+volatile unsigned long cpu_callin_map[NR_CPUS] = {0,};
+
+cpumask_t smp_commenced_mask = CPU_MASK_NONE;
+
+const struct sparc32_ipi_ops *sparc32_ipi_ops;
+
+/* The only guaranteed locking primitive available on all Sparc
+ * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically
+ * places the current byte at the effective address into dest_reg and
+ * places 0xff there afterwards.  Pretty lame locking primitive
+ * compared to the Alpha and the Intel no?  Most Sparcs have 'swap'
+ * instruction which is much better...
+ */
+
+void smp_store_cpu_info(int id)
+{
+	int cpu_node;
+	int mid;
+
+	cpu_data(id).udelay_val = loops_per_jiffy;
+
+	cpu_find_by_mid(id, &cpu_node);
+	cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
+						     "clock-frequency", 0);
+	cpu_data(id).prom_node = cpu_node;
+	mid = cpu_get_hwmid(cpu_node);
+
+	if (mid < 0) {
+		printk(KERN_NOTICE "No MID found for CPU%d at node 0x%08x", id, cpu_node);
+		mid = 0;
+	}
+	cpu_data(id).mid = mid;
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+	unsigned long bogosum = 0;
+	int cpu, num = 0;
+
+	for_each_online_cpu(cpu) {
+		num++;
+		bogosum += cpu_data(cpu).udelay_val;
+	}
+
+	printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+		num, bogosum/(500000/HZ),
+		(bogosum/(5000/HZ))%100);
+
+	switch(sparc_cpu_model) {
+	case sun4m:
+		smp4m_smp_done();
+		break;
+	case sun4d:
+		smp4d_smp_done();
+		break;
+	case sparc_leon:
+		leon_smp_done();
+		break;
+	case sun4e:
+		printk("SUN4E\n");
+		BUG();
+		break;
+	case sun4u:
+		printk("SUN4U\n");
+		BUG();
+		break;
+	default:
+		printk("UNKNOWN!\n");
+		BUG();
+		break;
+	}
+}
+
+void cpu_panic(void)
+{
+	printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
+	panic("SMP bolixed\n");
+}
+
+struct linux_prom_registers smp_penguin_ctable = { 0 };
+
+void smp_send_reschedule(int cpu)
+{
+	/*
+	 * CPU model dependent way of implementing IPI generation targeting
+	 * a single CPU. The trap handler needs only to do trap entry/return
+	 * to call schedule.
+	 */
+	sparc32_ipi_ops->resched(cpu);
+}
+
+void smp_send_stop(void)
+{
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	/* trigger one IPI single call on one CPU */
+	sparc32_ipi_ops->single(cpu);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	int cpu;
+
+	/* trigger IPI mask call on each CPU */
+	for_each_cpu(cpu, mask)
+		sparc32_ipi_ops->mask_one(cpu);
+}
+
+void smp_resched_interrupt(void)
+{
+	irq_enter();
+	scheduler_ipi();
+	local_cpu_data().irq_resched_count++;
+	irq_exit();
+	/* re-schedule routine called by interrupt return code. */
+}
+
+void smp_call_function_single_interrupt(void)
+{
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	local_cpu_data().irq_call_count++;
+	irq_exit();
+}
+
+void smp_call_function_interrupt(void)
+{
+	irq_enter();
+	generic_smp_call_function_interrupt();
+	local_cpu_data().irq_call_count++;
+	irq_exit();
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	int i, cpuid, extra;
+
+	printk("Entering SMP Mode...\n");
+
+	extra = 0;
+	for (i = 0; !cpu_find_by_instance(i, NULL, &cpuid); i++) {
+		if (cpuid >= NR_CPUS)
+			extra++;
+	}
+	/* i = number of cpus */
+	if (extra && max_cpus > i - extra)
+		printk("Warning: NR_CPUS is too low to start all cpus\n");
+
+	smp_store_cpu_info(boot_cpu_id);
+
+	switch(sparc_cpu_model) {
+	case sun4m:
+		smp4m_boot_cpus();
+		break;
+	case sun4d:
+		smp4d_boot_cpus();
+		break;
+	case sparc_leon:
+		leon_boot_cpus();
+		break;
+	case sun4e:
+		printk("SUN4E\n");
+		BUG();
+		break;
+	case sun4u:
+		printk("SUN4U\n");
+		BUG();
+		break;
+	default:
+		printk("UNKNOWN!\n");
+		BUG();
+		break;
+	}
+}
+
+/* Set this up early so that things like the scheduler can init
+ * properly.  We use the same cpu mask for both the present and
+ * possible cpu map.
+ */
+void __init smp_setup_cpu_possible_map(void)
+{
+	int instance, mid;
+
+	instance = 0;
+	while (!cpu_find_by_instance(instance, NULL, &mid)) {
+		if (mid < NR_CPUS) {
+			set_cpu_possible(mid, true);
+			set_cpu_present(mid, true);
+		}
+		instance++;
+	}
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	int cpuid = hard_smp_processor_id();
+
+	if (cpuid >= NR_CPUS) {
+		prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
+		prom_halt();
+	}
+	if (cpuid != 0)
+		printk("boot cpu id != 0, this could work but is untested\n");
+
+	current_thread_info()->cpu = cpuid;
+	set_cpu_online(cpuid, true);
+	set_cpu_possible(cpuid, true);
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	int ret=0;
+
+	switch(sparc_cpu_model) {
+	case sun4m:
+		ret = smp4m_boot_one_cpu(cpu, tidle);
+		break;
+	case sun4d:
+		ret = smp4d_boot_one_cpu(cpu, tidle);
+		break;
+	case sparc_leon:
+		ret = leon_boot_one_cpu(cpu, tidle);
+		break;
+	case sun4e:
+		printk("SUN4E\n");
+		BUG();
+		break;
+	case sun4u:
+		printk("SUN4U\n");
+		BUG();
+		break;
+	default:
+		printk("UNKNOWN!\n");
+		BUG();
+		break;
+	}
+
+	if (!ret) {
+		cpumask_set_cpu(cpu, &smp_commenced_mask);
+		while (!cpu_online(cpu))
+			mb();
+	}
+	return ret;
+}
+
+static void arch_cpu_pre_starting(void *arg)
+{
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	switch(sparc_cpu_model) {
+	case sun4m:
+		sun4m_cpu_pre_starting(arg);
+		break;
+	case sun4d:
+		sun4d_cpu_pre_starting(arg);
+		break;
+	case sparc_leon:
+		leon_cpu_pre_starting(arg);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void arch_cpu_pre_online(void *arg)
+{
+	unsigned int cpuid = hard_smp_processor_id();
+
+	register_percpu_ce(cpuid);
+
+	calibrate_delay();
+	smp_store_cpu_info(cpuid);
+
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	switch(sparc_cpu_model) {
+	case sun4m:
+		sun4m_cpu_pre_online(arg);
+		break;
+	case sun4d:
+		sun4d_cpu_pre_online(arg);
+		break;
+	case sparc_leon:
+		leon_cpu_pre_online(arg);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void sparc_start_secondary(void *arg)
+{
+	unsigned int cpu;
+
+	/*
+	 * SMP booting is extremely fragile in some architectures. So run
+	 * the cpu initialization code first before anything else.
+	 */
+	arch_cpu_pre_starting(arg);
+
+	preempt_disable();
+	cpu = smp_processor_id();
+
+	notify_cpu_starting(cpu);
+	arch_cpu_pre_online(arg);
+
+	/* Set the CPU in the cpu_online_mask */
+	set_cpu_online(cpu, true);
+
+	/* Enable local interrupts now */
+	local_irq_enable();
+
+	wmb();
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+
+	/* We should never reach here! */
+	BUG();
+}
+
+void smp_callin(void)
+{
+	sparc_start_secondary(NULL);
+}
+
+void smp_bogo(struct seq_file *m)
+{
+	int i;
+	
+	for_each_online_cpu(i) {
+		seq_printf(m,
+			   "Cpu%dBogo\t: %lu.%02lu\n",
+			   i,
+			   cpu_data(i).udelay_val/(500000/HZ),
+			   (cpu_data(i).udelay_val/(5000/HZ))%100);
+	}
+}
+
+void smp_info(struct seq_file *m)
+{
+	int i;
+
+	seq_printf(m, "State:\n");
+	for_each_online_cpu(i)
+		seq_printf(m, "CPU%d\t\t: online\n", i);
+}
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
new file mode 100644
index 0000000..d3ea1f3
--- /dev/null
+++ b/arch/sparc/kernel/smp_64.c
@@ -0,0 +1,1686 @@
+// SPDX-License-Identifier: GPL-2.0
+/* smp.c: Sparc64 SMP support.
+ *
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/hotplug.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/threads.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/cache.h>
+#include <linux/jiffies.h>
+#include <linux/profile.h>
+#include <linux/bootmem.h>
+#include <linux/vmalloc.h>
+#include <linux/ftrace.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/kgdb.h>
+
+#include <asm/head.h>
+#include <asm/ptrace.h>
+#include <linux/atomic.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/cpudata.h>
+#include <asm/hvtramp.h>
+#include <asm/io.h>
+#include <asm/timer.h>
+#include <asm/setup.h>
+
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/oplib.h>
+#include <linux/uaccess.h>
+#include <asm/starfire.h>
+#include <asm/tlb.h>
+#include <asm/sections.h>
+#include <asm/prom.h>
+#include <asm/mdesc.h>
+#include <asm/ldc.h>
+#include <asm/hypervisor.h>
+#include <asm/pcr.h>
+
+#include "cpumap.h"
+#include "kernel.h"
+
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
+	{ [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
+cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS-1] = CPU_MASK_NONE };
+
+cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
+	[0 ... NR_CPUS - 1] = CPU_MASK_NONE };
+
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL(cpu_core_sib_map);
+EXPORT_SYMBOL(cpu_core_sib_cache_map);
+
+static cpumask_t smp_commenced_mask;
+
+static DEFINE_PER_CPU(bool, poke);
+static bool cpu_poke;
+
+void smp_info(struct seq_file *m)
+{
+	int i;
+	
+	seq_printf(m, "State:\n");
+	for_each_online_cpu(i)
+		seq_printf(m, "CPU%d:\t\tonline\n", i);
+}
+
+void smp_bogo(struct seq_file *m)
+{
+	int i;
+	
+	for_each_online_cpu(i)
+		seq_printf(m,
+			   "Cpu%dClkTck\t: %016lx\n",
+			   i, cpu_data(i).clock_tick);
+}
+
+extern void setup_sparc64_timer(void);
+
+static volatile unsigned long callin_flag = 0;
+
+void smp_callin(void)
+{
+	int cpuid = hard_smp_processor_id();
+
+	__local_per_cpu_offset = __per_cpu_offset(cpuid);
+
+	if (tlb_type == hypervisor)
+		sun4v_ktsb_register();
+
+	__flush_tlb_all();
+
+	setup_sparc64_timer();
+
+	if (cheetah_pcache_forced_on)
+		cheetah_enable_pcache();
+
+	callin_flag = 1;
+	__asm__ __volatile__("membar #Sync\n\t"
+			     "flush  %%g6" : : : "memory");
+
+	/* Clear this or we will die instantly when we
+	 * schedule back to this idler...
+	 */
+	current_thread_info()->new_child = 0;
+
+	/* Attach to the address space of init_task. */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+
+	/* inform the notifiers about the new cpu */
+	notify_cpu_starting(cpuid);
+
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
+		rmb();
+
+	set_cpu_online(cpuid, true);
+
+	/* idle thread is expected to have preempt disabled */
+	preempt_disable();
+
+	local_irq_enable();
+
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+void cpu_panic(void)
+{
+	printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
+	panic("SMP bolixed\n");
+}
+
+/* This tick register synchronization scheme is taken entirely from
+ * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
+ *
+ * The only change I've made is to rework it so that the master
+ * initiates the synchonization instead of the slave. -DaveM
+ */
+
+#define MASTER	0
+#define SLAVE	(SMP_CACHE_BYTES/sizeof(unsigned long))
+
+#define NUM_ROUNDS	64	/* magic value */
+#define NUM_ITERS	5	/* likewise */
+
+static DEFINE_RAW_SPINLOCK(itc_sync_lock);
+static unsigned long go[SLAVE + 1];
+
+#define DEBUG_TICK_SYNC	0
+
+static inline long get_delta (long *rt, long *master)
+{
+	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
+	unsigned long tcenter, t0, t1, tm;
+	unsigned long i;
+
+	for (i = 0; i < NUM_ITERS; i++) {
+		t0 = tick_ops->get_tick();
+		go[MASTER] = 1;
+		membar_safe("#StoreLoad");
+		while (!(tm = go[SLAVE]))
+			rmb();
+		go[SLAVE] = 0;
+		wmb();
+		t1 = tick_ops->get_tick();
+
+		if (t1 - t0 < best_t1 - best_t0)
+			best_t0 = t0, best_t1 = t1, best_tm = tm;
+	}
+
+	*rt = best_t1 - best_t0;
+	*master = best_tm - best_t0;
+
+	/* average best_t0 and best_t1 without overflow: */
+	tcenter = (best_t0/2 + best_t1/2);
+	if (best_t0 % 2 + best_t1 % 2 == 2)
+		tcenter++;
+	return tcenter - best_tm;
+}
+
+void smp_synchronize_tick_client(void)
+{
+	long i, delta, adj, adjust_latency = 0, done = 0;
+	unsigned long flags, rt, master_time_stamp;
+#if DEBUG_TICK_SYNC
+	struct {
+		long rt;	/* roundtrip time */
+		long master;	/* master's timestamp */
+		long diff;	/* difference between midpoint and master's timestamp */
+		long lat;	/* estimate of itc adjustment latency */
+	} t[NUM_ROUNDS];
+#endif
+
+	go[MASTER] = 1;
+
+	while (go[MASTER])
+		rmb();
+
+	local_irq_save(flags);
+	{
+		for (i = 0; i < NUM_ROUNDS; i++) {
+			delta = get_delta(&rt, &master_time_stamp);
+			if (delta == 0)
+				done = 1;	/* let's lock on to this... */
+
+			if (!done) {
+				if (i > 0) {
+					adjust_latency += -delta;
+					adj = -delta + adjust_latency/4;
+				} else
+					adj = -delta;
+
+				tick_ops->add_tick(adj);
+			}
+#if DEBUG_TICK_SYNC
+			t[i].rt = rt;
+			t[i].master = master_time_stamp;
+			t[i].diff = delta;
+			t[i].lat = adjust_latency/4;
+#endif
+		}
+	}
+	local_irq_restore(flags);
+
+#if DEBUG_TICK_SYNC
+	for (i = 0; i < NUM_ROUNDS; i++)
+		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
+		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
+#endif
+
+	printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
+	       "(last diff %ld cycles, maxerr %lu cycles)\n",
+	       smp_processor_id(), delta, rt);
+}
+
+static void smp_start_sync_tick_client(int cpu);
+
+static void smp_synchronize_one_tick(int cpu)
+{
+	unsigned long flags, i;
+
+	go[MASTER] = 0;
+
+	smp_start_sync_tick_client(cpu);
+
+	/* wait for client to be ready */
+	while (!go[MASTER])
+		rmb();
+
+	/* now let the client proceed into his loop */
+	go[MASTER] = 0;
+	membar_safe("#StoreLoad");
+
+	raw_spin_lock_irqsave(&itc_sync_lock, flags);
+	{
+		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
+			while (!go[MASTER])
+				rmb();
+			go[MASTER] = 0;
+			wmb();
+			go[SLAVE] = tick_ops->get_tick();
+			membar_safe("#StoreLoad");
+		}
+	}
+	raw_spin_unlock_irqrestore(&itc_sync_lock, flags);
+}
+
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg,
+				void **descrp)
+{
+	extern unsigned long sparc64_ttable_tl0;
+	extern unsigned long kern_locked_tte_data;
+	struct hvtramp_descr *hdesc;
+	unsigned long trampoline_ra;
+	struct trap_per_cpu *tb;
+	u64 tte_vaddr, tte_data;
+	unsigned long hv_err;
+	int i;
+
+	hdesc = kzalloc(sizeof(*hdesc) +
+			(sizeof(struct hvtramp_mapping) *
+			 num_kernel_image_mappings - 1),
+			GFP_KERNEL);
+	if (!hdesc) {
+		printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
+		       "hvtramp_descr.\n");
+		return;
+	}
+	*descrp = hdesc;
+
+	hdesc->cpu = cpu;
+	hdesc->num_mappings = num_kernel_image_mappings;
+
+	tb = &trap_block[cpu];
+
+	hdesc->fault_info_va = (unsigned long) &tb->fault_info;
+	hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
+
+	hdesc->thread_reg = thread_reg;
+
+	tte_vaddr = (unsigned long) KERNBASE;
+	tte_data = kern_locked_tte_data;
+
+	for (i = 0; i < hdesc->num_mappings; i++) {
+		hdesc->maps[i].vaddr = tte_vaddr;
+		hdesc->maps[i].tte   = tte_data;
+		tte_vaddr += 0x400000;
+		tte_data  += 0x400000;
+	}
+
+	trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
+
+	hv_err = sun4v_cpu_start(cpu, trampoline_ra,
+				 kimage_addr_to_ra(&sparc64_ttable_tl0),
+				 __pa(hdesc));
+	if (hv_err)
+		printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
+		       "gives error %lu\n", hv_err);
+}
+#endif
+
+extern unsigned long sparc64_cpu_startup;
+
+/* The OBP cpu startup callback truncates the 3rd arg cookie to
+ * 32-bits (I think) so to be safe we have it read the pointer
+ * contained here so we work on >4GB machines. -DaveM
+ */
+static struct thread_info *cpu_new_thread = NULL;
+
+static int smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
+{
+	unsigned long entry =
+		(unsigned long)(&sparc64_cpu_startup);
+	unsigned long cookie =
+		(unsigned long)(&cpu_new_thread);
+	void *descr = NULL;
+	int timeout, ret;
+
+	callin_flag = 0;
+	cpu_new_thread = task_thread_info(idle);
+
+	if (tlb_type == hypervisor) {
+#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
+		if (ldom_domaining_enabled)
+			ldom_startcpu_cpuid(cpu,
+					    (unsigned long) cpu_new_thread,
+					    &descr);
+		else
+#endif
+			prom_startcpu_cpuid(cpu, entry, cookie);
+	} else {
+		struct device_node *dp = of_find_node_by_cpuid(cpu);
+
+		prom_startcpu(dp->phandle, entry, cookie);
+	}
+
+	for (timeout = 0; timeout < 50000; timeout++) {
+		if (callin_flag)
+			break;
+		udelay(100);
+	}
+
+	if (callin_flag) {
+		ret = 0;
+	} else {
+		printk("Processor %d is stuck.\n", cpu);
+		ret = -ENODEV;
+	}
+	cpu_new_thread = NULL;
+
+	kfree(descr);
+
+	return ret;
+}
+
+static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
+{
+	u64 result, target;
+	int stuck, tmp;
+
+	if (this_is_starfire) {
+		/* map to real upaid */
+		cpu = (((cpu & 0x3c) << 1) |
+			((cpu & 0x40) >> 4) |
+			(cpu & 0x3));
+	}
+
+	target = (cpu << 14) | 0x70;
+again:
+	/* Ok, this is the real Spitfire Errata #54.
+	 * One must read back from a UDB internal register
+	 * after writes to the UDB interrupt dispatch, but
+	 * before the membar Sync for that write.
+	 * So we use the high UDB control register (ASI 0x7f,
+	 * ADDR 0x20) for the dummy read. -DaveM
+	 */
+	tmp = 0x40;
+	__asm__ __volatile__(
+	"wrpr	%1, %2, %%pstate\n\t"
+	"stxa	%4, [%0] %3\n\t"
+	"stxa	%5, [%0+%8] %3\n\t"
+	"add	%0, %8, %0\n\t"
+	"stxa	%6, [%0+%8] %3\n\t"
+	"membar	#Sync\n\t"
+	"stxa	%%g0, [%7] %3\n\t"
+	"membar	#Sync\n\t"
+	"mov	0x20, %%g1\n\t"
+	"ldxa	[%%g1] 0x7f, %%g0\n\t"
+	"membar	#Sync"
+	: "=r" (tmp)
+	: "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
+	  "r" (data0), "r" (data1), "r" (data2), "r" (target),
+	  "r" (0x10), "0" (tmp)
+        : "g1");
+
+	/* NOTE: PSTATE_IE is still clear. */
+	stuck = 100000;
+	do {
+		__asm__ __volatile__("ldxa [%%g0] %1, %0"
+			: "=r" (result)
+			: "i" (ASI_INTR_DISPATCH_STAT));
+		if (result == 0) {
+			__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
+					     : : "r" (pstate));
+			return;
+		}
+		stuck -= 1;
+		if (stuck == 0)
+			break;
+	} while (result & 0x1);
+	__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
+			     : : "r" (pstate));
+	if (stuck == 0) {
+		printk("CPU[%d]: mondo stuckage result[%016llx]\n",
+		       smp_processor_id(), result);
+	} else {
+		udelay(2);
+		goto again;
+	}
+}
+
+static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
+{
+	u64 *mondo, data0, data1, data2;
+	u16 *cpu_list;
+	u64 pstate;
+	int i;
+
+	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
+	cpu_list = __va(tb->cpu_list_pa);
+	mondo = __va(tb->cpu_mondo_block_pa);
+	data0 = mondo[0];
+	data1 = mondo[1];
+	data2 = mondo[2];
+	for (i = 0; i < cnt; i++)
+		spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
+}
+
+/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
+ * packet, but we have no use for that.  However we do take advantage of
+ * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
+ */
+static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
+{
+	int nack_busy_id, is_jbus, need_more;
+	u64 *mondo, pstate, ver, busy_mask;
+	u16 *cpu_list;
+
+	cpu_list = __va(tb->cpu_list_pa);
+	mondo = __va(tb->cpu_mondo_block_pa);
+
+	/* Unfortunately, someone at Sun had the brilliant idea to make the
+	 * busy/nack fields hard-coded by ITID number for this Ultra-III
+	 * derivative processor.
+	 */
+	__asm__ ("rdpr %%ver, %0" : "=r" (ver));
+	is_jbus = ((ver >> 32) == __JALAPENO_ID ||
+		   (ver >> 32) == __SERRANO_ID);
+
+	__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
+
+retry:
+	need_more = 0;
+	__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
+			     : : "r" (pstate), "i" (PSTATE_IE));
+
+	/* Setup the dispatch data registers. */
+	__asm__ __volatile__("stxa	%0, [%3] %6\n\t"
+			     "stxa	%1, [%4] %6\n\t"
+			     "stxa	%2, [%5] %6\n\t"
+			     "membar	#Sync\n\t"
+			     : /* no outputs */
+			     : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
+			       "r" (0x40), "r" (0x50), "r" (0x60),
+			       "i" (ASI_INTR_W));
+
+	nack_busy_id = 0;
+	busy_mask = 0;
+	{
+		int i;
+
+		for (i = 0; i < cnt; i++) {
+			u64 target, nr;
+
+			nr = cpu_list[i];
+			if (nr == 0xffff)
+				continue;
+
+			target = (nr << 14) | 0x70;
+			if (is_jbus) {
+				busy_mask |= (0x1UL << (nr * 2));
+			} else {
+				target |= (nack_busy_id << 24);
+				busy_mask |= (0x1UL <<
+					      (nack_busy_id * 2));
+			}
+			__asm__ __volatile__(
+				"stxa	%%g0, [%0] %1\n\t"
+				"membar	#Sync\n\t"
+				: /* no outputs */
+				: "r" (target), "i" (ASI_INTR_W));
+			nack_busy_id++;
+			if (nack_busy_id == 32) {
+				need_more = 1;
+				break;
+			}
+		}
+	}
+
+	/* Now, poll for completion. */
+	{
+		u64 dispatch_stat, nack_mask;
+		long stuck;
+
+		stuck = 100000 * nack_busy_id;
+		nack_mask = busy_mask << 1;
+		do {
+			__asm__ __volatile__("ldxa	[%%g0] %1, %0"
+					     : "=r" (dispatch_stat)
+					     : "i" (ASI_INTR_DISPATCH_STAT));
+			if (!(dispatch_stat & (busy_mask | nack_mask))) {
+				__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
+						     : : "r" (pstate));
+				if (unlikely(need_more)) {
+					int i, this_cnt = 0;
+					for (i = 0; i < cnt; i++) {
+						if (cpu_list[i] == 0xffff)
+							continue;
+						cpu_list[i] = 0xffff;
+						this_cnt++;
+						if (this_cnt == 32)
+							break;
+					}
+					goto retry;
+				}
+				return;
+			}
+			if (!--stuck)
+				break;
+		} while (dispatch_stat & busy_mask);
+
+		__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
+				     : : "r" (pstate));
+
+		if (dispatch_stat & busy_mask) {
+			/* Busy bits will not clear, continue instead
+			 * of freezing up on this cpu.
+			 */
+			printk("CPU[%d]: mondo stuckage result[%016llx]\n",
+			       smp_processor_id(), dispatch_stat);
+		} else {
+			int i, this_busy_nack = 0;
+
+			/* Delay some random time with interrupts enabled
+			 * to prevent deadlock.
+			 */
+			udelay(2 * nack_busy_id);
+
+			/* Clear out the mask bits for cpus which did not
+			 * NACK us.
+			 */
+			for (i = 0; i < cnt; i++) {
+				u64 check_mask, nr;
+
+				nr = cpu_list[i];
+				if (nr == 0xffff)
+					continue;
+
+				if (is_jbus)
+					check_mask = (0x2UL << (2*nr));
+				else
+					check_mask = (0x2UL <<
+						      this_busy_nack);
+				if ((dispatch_stat & check_mask) == 0)
+					cpu_list[i] = 0xffff;
+				this_busy_nack += 2;
+				if (this_busy_nack == 64)
+					break;
+			}
+
+			goto retry;
+		}
+	}
+}
+
+#define	CPU_MONDO_COUNTER(cpuid)	(cpu_mondo_counter[cpuid])
+#define	MONDO_USEC_WAIT_MIN		2
+#define	MONDO_USEC_WAIT_MAX		100
+#define	MONDO_RETRY_LIMIT		500000
+
+/* Multi-cpu list version.
+ *
+ * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'.
+ * Sometimes not all cpus receive the mondo, requiring us to re-send
+ * the mondo until all cpus have received, or cpus are truly stuck
+ * unable to receive mondo, and we timeout.
+ * Occasionally a target cpu strand is borrowed briefly by hypervisor to
+ * perform guest service, such as PCIe error handling. Consider the
+ * service time, 1 second overall wait is reasonable for 1 cpu.
+ * Here two in-between mondo check wait time are defined: 2 usec for
+ * single cpu quick turn around and up to 100usec for large cpu count.
+ * Deliver mondo to large number of cpus could take longer, we adjusts
+ * the retry count as long as target cpus are making forward progress.
+ */
+static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
+{
+	int this_cpu, tot_cpus, prev_sent, i, rem;
+	int usec_wait, retries, tot_retries;
+	u16 first_cpu = 0xffff;
+	unsigned long xc_rcvd = 0;
+	unsigned long status;
+	int ecpuerror_id = 0;
+	int enocpu_id = 0;
+	u16 *cpu_list;
+	u16 cpu;
+
+	this_cpu = smp_processor_id();
+	cpu_list = __va(tb->cpu_list_pa);
+	usec_wait = cnt * MONDO_USEC_WAIT_MIN;
+	if (usec_wait > MONDO_USEC_WAIT_MAX)
+		usec_wait = MONDO_USEC_WAIT_MAX;
+	retries = tot_retries = 0;
+	tot_cpus = cnt;
+	prev_sent = 0;
+
+	do {
+		int n_sent, mondo_delivered, target_cpu_busy;
+
+		status = sun4v_cpu_mondo_send(cnt,
+					      tb->cpu_list_pa,
+					      tb->cpu_mondo_block_pa);
+
+		/* HV_EOK means all cpus received the xcall, we're done.  */
+		if (likely(status == HV_EOK))
+			goto xcall_done;
+
+		/* If not these non-fatal errors, panic */
+		if (unlikely((status != HV_EWOULDBLOCK) &&
+			(status != HV_ECPUERROR) &&
+			(status != HV_ENOCPU)))
+			goto fatal_errors;
+
+		/* First, see if we made any forward progress.
+		 *
+		 * Go through the cpu_list, count the target cpus that have
+		 * received our mondo (n_sent), and those that did not (rem).
+		 * Re-pack cpu_list with the cpus remain to be retried in the
+		 * front - this simplifies tracking the truly stalled cpus.
+		 *
+		 * The hypervisor indicates successful sends by setting
+		 * cpu list entries to the value 0xffff.
+		 *
+		 * EWOULDBLOCK means some target cpus did not receive the
+		 * mondo and retry usually helps.
+		 *
+		 * ECPUERROR means at least one target cpu is in error state,
+		 * it's usually safe to skip the faulty cpu and retry.
+		 *
+		 * ENOCPU means one of the target cpu doesn't belong to the
+		 * domain, perhaps offlined which is unexpected, but not
+		 * fatal and it's okay to skip the offlined cpu.
+		 */
+		rem = 0;
+		n_sent = 0;
+		for (i = 0; i < cnt; i++) {
+			cpu = cpu_list[i];
+			if (likely(cpu == 0xffff)) {
+				n_sent++;
+			} else if ((status == HV_ECPUERROR) &&
+				(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
+				ecpuerror_id = cpu + 1;
+			} else if (status == HV_ENOCPU && !cpu_online(cpu)) {
+				enocpu_id = cpu + 1;
+			} else {
+				cpu_list[rem++] = cpu;
+			}
+		}
+
+		/* No cpu remained, we're done. */
+		if (rem == 0)
+			break;
+
+		/* Otherwise, update the cpu count for retry. */
+		cnt = rem;
+
+		/* Record the overall number of mondos received by the
+		 * first of the remaining cpus.
+		 */
+		if (first_cpu != cpu_list[0]) {
+			first_cpu = cpu_list[0];
+			xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
+		}
+
+		/* Was any mondo delivered successfully? */
+		mondo_delivered = (n_sent > prev_sent);
+		prev_sent = n_sent;
+
+		/* or, was any target cpu busy processing other mondos? */
+		target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
+		xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
+
+		/* Retry count is for no progress. If we're making progress,
+		 * reset the retry count.
+		 */
+		if (likely(mondo_delivered || target_cpu_busy)) {
+			tot_retries += retries;
+			retries = 0;
+		} else if (unlikely(retries > MONDO_RETRY_LIMIT)) {
+			goto fatal_mondo_timeout;
+		}
+
+		/* Delay a little bit to let other cpus catch up on
+		 * their cpu mondo queue work.
+		 */
+		if (!mondo_delivered)
+			udelay(usec_wait);
+
+		retries++;
+	} while (1);
+
+xcall_done:
+	if (unlikely(ecpuerror_id > 0)) {
+		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
+		       this_cpu, ecpuerror_id - 1);
+	} else if (unlikely(enocpu_id > 0)) {
+		pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
+		       this_cpu, enocpu_id - 1);
+	}
+	return;
+
+fatal_errors:
+	/* fatal errors include bad alignment, etc */
+	pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
+	       this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
+	panic("Unexpected SUN4V mondo error %lu\n", status);
+
+fatal_mondo_timeout:
+	/* some cpus being non-responsive to the cpu mondo */
+	pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
+	       this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
+	panic("SUN4V mondo timeout panic\n");
+}
+
+static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
+
+static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
+{
+	struct trap_per_cpu *tb;
+	int this_cpu, i, cnt;
+	unsigned long flags;
+	u16 *cpu_list;
+	u64 *mondo;
+
+	/* We have to do this whole thing with interrupts fully disabled.
+	 * Otherwise if we send an xcall from interrupt context it will
+	 * corrupt both our mondo block and cpu list state.
+	 *
+	 * One consequence of this is that we cannot use timeout mechanisms
+	 * that depend upon interrupts being delivered locally.  So, for
+	 * example, we cannot sample jiffies and expect it to advance.
+	 *
+	 * Fortunately, udelay() uses %stick/%tick so we can use that.
+	 */
+	local_irq_save(flags);
+
+	this_cpu = smp_processor_id();
+	tb = &trap_block[this_cpu];
+
+	mondo = __va(tb->cpu_mondo_block_pa);
+	mondo[0] = data0;
+	mondo[1] = data1;
+	mondo[2] = data2;
+	wmb();
+
+	cpu_list = __va(tb->cpu_list_pa);
+
+	/* Setup the initial cpu list.  */
+	cnt = 0;
+	for_each_cpu(i, mask) {
+		if (i == this_cpu || !cpu_online(i))
+			continue;
+		cpu_list[cnt++] = i;
+	}
+
+	if (cnt)
+		xcall_deliver_impl(tb, cnt);
+
+	local_irq_restore(flags);
+}
+
+/* Send cross call to all processors mentioned in MASK_P
+ * except self.  Really, there are only two cases currently,
+ * "cpu_online_mask" and "mm_cpumask(mm)".
+ */
+static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
+{
+	u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
+
+	xcall_deliver(data0, data1, data2, mask);
+}
+
+/* Send cross call to all processors except self. */
+static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
+{
+	smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
+}
+
+extern unsigned long xcall_sync_tick;
+
+static void smp_start_sync_tick_client(int cpu)
+{
+	xcall_deliver((u64) &xcall_sync_tick, 0, 0,
+		      cpumask_of(cpu));
+}
+
+extern unsigned long xcall_call_function;
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	xcall_deliver((u64) &xcall_call_function, 0, 0, mask);
+}
+
+extern unsigned long xcall_call_function_single;
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	xcall_deliver((u64) &xcall_call_function_single, 0, 0,
+		      cpumask_of(cpu));
+}
+
+void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
+{
+	clear_softint(1 << irq);
+	irq_enter();
+	generic_smp_call_function_interrupt();
+	irq_exit();
+}
+
+void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
+{
+	clear_softint(1 << irq);
+	irq_enter();
+	generic_smp_call_function_single_interrupt();
+	irq_exit();
+}
+
+static void tsb_sync(void *info)
+{
+	struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
+	struct mm_struct *mm = info;
+
+	/* It is not valid to test "current->active_mm == mm" here.
+	 *
+	 * The value of "current" is not changed atomically with
+	 * switch_mm().  But that's OK, we just need to check the
+	 * current cpu's trap block PGD physical address.
+	 */
+	if (tp->pgd_paddr == __pa(mm->pgd))
+		tsb_context_switch(mm);
+}
+
+void smp_tsb_sync(struct mm_struct *mm)
+{
+	smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
+}
+
+extern unsigned long xcall_flush_tlb_mm;
+extern unsigned long xcall_flush_tlb_page;
+extern unsigned long xcall_flush_tlb_kernel_range;
+extern unsigned long xcall_fetch_glob_regs;
+extern unsigned long xcall_fetch_glob_pmu;
+extern unsigned long xcall_fetch_glob_pmu_n4;
+extern unsigned long xcall_receive_signal;
+extern unsigned long xcall_new_mmu_context_version;
+#ifdef CONFIG_KGDB
+extern unsigned long xcall_kgdb_capture;
+#endif
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+extern unsigned long xcall_flush_dcache_page_cheetah;
+#endif
+extern unsigned long xcall_flush_dcache_page_spitfire;
+
+static inline void __local_flush_dcache_page(struct page *page)
+{
+#ifdef DCACHE_ALIASING_POSSIBLE
+	__flush_dcache_page(page_address(page),
+			    ((tlb_type == spitfire) &&
+			     page_mapping_file(page) != NULL));
+#else
+	if (page_mapping_file(page) != NULL &&
+	    tlb_type == spitfire)
+		__flush_icache_page(__pa(page_address(page)));
+#endif
+}
+
+void smp_flush_dcache_page_impl(struct page *page, int cpu)
+{
+	int this_cpu;
+
+	if (tlb_type == hypervisor)
+		return;
+
+#ifdef CONFIG_DEBUG_DCFLUSH
+	atomic_inc(&dcpage_flushes);
+#endif
+
+	this_cpu = get_cpu();
+
+	if (cpu == this_cpu) {
+		__local_flush_dcache_page(page);
+	} else if (cpu_online(cpu)) {
+		void *pg_addr = page_address(page);
+		u64 data0 = 0;
+
+		if (tlb_type == spitfire) {
+			data0 = ((u64)&xcall_flush_dcache_page_spitfire);
+			if (page_mapping_file(page) != NULL)
+				data0 |= ((u64)1 << 32);
+		} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+#ifdef DCACHE_ALIASING_POSSIBLE
+			data0 =	((u64)&xcall_flush_dcache_page_cheetah);
+#endif
+		}
+		if (data0) {
+			xcall_deliver(data0, __pa(pg_addr),
+				      (u64) pg_addr, cpumask_of(cpu));
+#ifdef CONFIG_DEBUG_DCFLUSH
+			atomic_inc(&dcpage_flushes_xcall);
+#endif
+		}
+	}
+
+	put_cpu();
+}
+
+void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
+{
+	void *pg_addr;
+	u64 data0;
+
+	if (tlb_type == hypervisor)
+		return;
+
+	preempt_disable();
+
+#ifdef CONFIG_DEBUG_DCFLUSH
+	atomic_inc(&dcpage_flushes);
+#endif
+	data0 = 0;
+	pg_addr = page_address(page);
+	if (tlb_type == spitfire) {
+		data0 = ((u64)&xcall_flush_dcache_page_spitfire);
+		if (page_mapping_file(page) != NULL)
+			data0 |= ((u64)1 << 32);
+	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+#ifdef DCACHE_ALIASING_POSSIBLE
+		data0 = ((u64)&xcall_flush_dcache_page_cheetah);
+#endif
+	}
+	if (data0) {
+		xcall_deliver(data0, __pa(pg_addr),
+			      (u64) pg_addr, cpu_online_mask);
+#ifdef CONFIG_DEBUG_DCFLUSH
+		atomic_inc(&dcpage_flushes_xcall);
+#endif
+	}
+	__local_flush_dcache_page(page);
+
+	preempt_enable();
+}
+
+#ifdef CONFIG_KGDB
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
+}
+#endif
+
+void smp_fetch_global_regs(void)
+{
+	smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
+}
+
+void smp_fetch_global_pmu(void)
+{
+	if (tlb_type == hypervisor &&
+	    sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
+		smp_cross_call(&xcall_fetch_glob_pmu_n4, 0, 0, 0);
+	else
+		smp_cross_call(&xcall_fetch_glob_pmu, 0, 0, 0);
+}
+
+/* We know that the window frames of the user have been flushed
+ * to the stack before we get here because all callers of us
+ * are flush_tlb_*() routines, and these run after flush_cache_*()
+ * which performs the flushw.
+ *
+ * The SMP TLB coherency scheme we use works as follows:
+ *
+ * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
+ *    space has (potentially) executed on, this is the heuristic
+ *    we use to avoid doing cross calls.
+ *
+ *    Also, for flushing from kswapd and also for clones, we
+ *    use cpu_vm_mask as the list of cpus to make run the TLB.
+ *
+ * 2) TLB context numbers are shared globally across all processors
+ *    in the system, this allows us to play several games to avoid
+ *    cross calls.
+ *
+ *    One invariant is that when a cpu switches to a process, and
+ *    that processes tsk->active_mm->cpu_vm_mask does not have the
+ *    current cpu's bit set, that tlb context is flushed locally.
+ *
+ *    If the address space is non-shared (ie. mm->count == 1) we avoid
+ *    cross calls when we want to flush the currently running process's
+ *    tlb state.  This is done by clearing all cpu bits except the current
+ *    processor's in current->mm->cpu_vm_mask and performing the
+ *    flush locally only.  This will force any subsequent cpus which run
+ *    this task to flush the context from the local tlb if the process
+ *    migrates to another cpu (again).
+ *
+ * 3) For shared address spaces (threads) and swapping we bite the
+ *    bullet for most cases and perform the cross call (but only to
+ *    the cpus listed in cpu_vm_mask).
+ *
+ *    The performance gain from "optimizing" away the cross call for threads is
+ *    questionable (in theory the big win for threads is the massive sharing of
+ *    address space state across processors).
+ */
+
+/* This currently is only used by the hugetlb arch pre-fault
+ * hook on UltraSPARC-III+ and later when changing the pagesize
+ * bits of the context register for an address space.
+ */
+void smp_flush_tlb_mm(struct mm_struct *mm)
+{
+	u32 ctx = CTX_HWBITS(mm->context);
+	int cpu = get_cpu();
+
+	if (atomic_read(&mm->mm_users) == 1) {
+		cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
+		goto local_flush_and_out;
+	}
+
+	smp_cross_call_masked(&xcall_flush_tlb_mm,
+			      ctx, 0, 0,
+			      mm_cpumask(mm));
+
+local_flush_and_out:
+	__flush_tlb_mm(ctx, SECONDARY_CONTEXT);
+
+	put_cpu();
+}
+
+struct tlb_pending_info {
+	unsigned long ctx;
+	unsigned long nr;
+	unsigned long *vaddrs;
+};
+
+static void tlb_pending_func(void *info)
+{
+	struct tlb_pending_info *t = info;
+
+	__flush_tlb_pending(t->ctx, t->nr, t->vaddrs);
+}
+
+void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
+{
+	u32 ctx = CTX_HWBITS(mm->context);
+	struct tlb_pending_info info;
+	int cpu = get_cpu();
+
+	info.ctx = ctx;
+	info.nr = nr;
+	info.vaddrs = vaddrs;
+
+	if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
+		cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
+	else
+		smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
+				       &info, 1);
+
+	__flush_tlb_pending(ctx, nr, vaddrs);
+
+	put_cpu();
+}
+
+void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
+{
+	unsigned long context = CTX_HWBITS(mm->context);
+	int cpu = get_cpu();
+
+	if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
+		cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
+	else
+		smp_cross_call_masked(&xcall_flush_tlb_page,
+				      context, vaddr, 0,
+				      mm_cpumask(mm));
+	__flush_tlb_page(context, vaddr);
+
+	put_cpu();
+}
+
+void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	start &= PAGE_MASK;
+	end    = PAGE_ALIGN(end);
+	if (start != end) {
+		smp_cross_call(&xcall_flush_tlb_kernel_range,
+			       0, start, end);
+
+		__flush_tlb_kernel_range(start, end);
+	}
+}
+
+/* CPU capture. */
+/* #define CAPTURE_DEBUG */
+extern unsigned long xcall_capture;
+
+static atomic_t smp_capture_depth = ATOMIC_INIT(0);
+static atomic_t smp_capture_registry = ATOMIC_INIT(0);
+static unsigned long penguins_are_doing_time;
+
+void smp_capture(void)
+{
+	int result = atomic_add_return(1, &smp_capture_depth);
+
+	if (result == 1) {
+		int ncpus = num_online_cpus();
+
+#ifdef CAPTURE_DEBUG
+		printk("CPU[%d]: Sending penguins to jail...",
+		       smp_processor_id());
+#endif
+		penguins_are_doing_time = 1;
+		atomic_inc(&smp_capture_registry);
+		smp_cross_call(&xcall_capture, 0, 0, 0);
+		while (atomic_read(&smp_capture_registry) != ncpus)
+			rmb();
+#ifdef CAPTURE_DEBUG
+		printk("done\n");
+#endif
+	}
+}
+
+void smp_release(void)
+{
+	if (atomic_dec_and_test(&smp_capture_depth)) {
+#ifdef CAPTURE_DEBUG
+		printk("CPU[%d]: Giving pardon to "
+		       "imprisoned penguins\n",
+		       smp_processor_id());
+#endif
+		penguins_are_doing_time = 0;
+		membar_safe("#StoreLoad");
+		atomic_dec(&smp_capture_registry);
+	}
+}
+
+/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
+ * set, so they can service tlb flush xcalls...
+ */
+extern void prom_world(int);
+
+void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
+{
+	clear_softint(1 << irq);
+
+	preempt_disable();
+
+	__asm__ __volatile__("flushw");
+	prom_world(1);
+	atomic_inc(&smp_capture_registry);
+	membar_safe("#StoreLoad");
+	while (penguins_are_doing_time)
+		rmb();
+	atomic_dec(&smp_capture_registry);
+	prom_world(0);
+
+	preempt_enable();
+}
+
+/* /proc/profile writes can call this, don't __init it please. */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+}
+
+void smp_prepare_boot_cpu(void)
+{
+}
+
+void __init smp_setup_processor_id(void)
+{
+	if (tlb_type == spitfire)
+		xcall_deliver_impl = spitfire_xcall_deliver;
+	else if (tlb_type == cheetah || tlb_type == cheetah_plus)
+		xcall_deliver_impl = cheetah_xcall_deliver;
+	else
+		xcall_deliver_impl = hypervisor_xcall_deliver;
+}
+
+void __init smp_fill_in_cpu_possible_map(void)
+{
+	int possible_cpus = num_possible_cpus();
+	int i;
+
+	if (possible_cpus > nr_cpu_ids)
+		possible_cpus = nr_cpu_ids;
+
+	for (i = 0; i < possible_cpus; i++)
+		set_cpu_possible(i, true);
+	for (; i < NR_CPUS; i++)
+		set_cpu_possible(i, false);
+}
+
+void smp_fill_in_sib_core_maps(void)
+{
+	unsigned int i;
+
+	for_each_present_cpu(i) {
+		unsigned int j;
+
+		cpumask_clear(&cpu_core_map[i]);
+		if (cpu_data(i).core_id == 0) {
+			cpumask_set_cpu(i, &cpu_core_map[i]);
+			continue;
+		}
+
+		for_each_present_cpu(j) {
+			if (cpu_data(i).core_id ==
+			    cpu_data(j).core_id)
+				cpumask_set_cpu(j, &cpu_core_map[i]);
+		}
+	}
+
+	for_each_present_cpu(i)  {
+		unsigned int j;
+
+		for_each_present_cpu(j)  {
+			if (cpu_data(i).max_cache_id ==
+			    cpu_data(j).max_cache_id)
+				cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
+
+			if (cpu_data(i).sock_id == cpu_data(j).sock_id)
+				cpumask_set_cpu(j, &cpu_core_sib_map[i]);
+		}
+	}
+
+	for_each_present_cpu(i) {
+		unsigned int j;
+
+		cpumask_clear(&per_cpu(cpu_sibling_map, i));
+		if (cpu_data(i).proc_id == -1) {
+			cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
+			continue;
+		}
+
+		for_each_present_cpu(j) {
+			if (cpu_data(i).proc_id ==
+			    cpu_data(j).proc_id)
+				cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
+		}
+	}
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	int ret = smp_boot_one_cpu(cpu, tidle);
+
+	if (!ret) {
+		cpumask_set_cpu(cpu, &smp_commenced_mask);
+		while (!cpu_online(cpu))
+			mb();
+		if (!cpu_online(cpu)) {
+			ret = -ENODEV;
+		} else {
+			/* On SUN4V, writes to %tick and %stick are
+			 * not allowed.
+			 */
+			if (tlb_type != hypervisor)
+				smp_synchronize_one_tick(cpu);
+		}
+	}
+	return ret;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+void cpu_play_dead(void)
+{
+	int cpu = smp_processor_id();
+	unsigned long pstate;
+
+	idle_task_exit();
+
+	if (tlb_type == hypervisor) {
+		struct trap_per_cpu *tb = &trap_block[cpu];
+
+		sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
+				tb->cpu_mondo_pa, 0);
+		sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
+				tb->dev_mondo_pa, 0);
+		sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
+				tb->resum_mondo_pa, 0);
+		sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
+				tb->nonresum_mondo_pa, 0);
+	}
+
+	cpumask_clear_cpu(cpu, &smp_commenced_mask);
+	membar_safe("#Sync");
+
+	local_irq_disable();
+
+	__asm__ __volatile__(
+		"rdpr	%%pstate, %0\n\t"
+		"wrpr	%0, %1, %%pstate"
+		: "=r" (pstate)
+		: "i" (PSTATE_IE));
+
+	while (1)
+		barrier();
+}
+
+int __cpu_disable(void)
+{
+	int cpu = smp_processor_id();
+	cpuinfo_sparc *c;
+	int i;
+
+	for_each_cpu(i, &cpu_core_map[cpu])
+		cpumask_clear_cpu(cpu, &cpu_core_map[i]);
+	cpumask_clear(&cpu_core_map[cpu]);
+
+	for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
+		cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
+	cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
+
+	c = &cpu_data(cpu);
+
+	c->core_id = 0;
+	c->proc_id = -1;
+
+	smp_wmb();
+
+	/* Make sure no interrupts point to this cpu.  */
+	fixup_irqs();
+
+	local_irq_enable();
+	mdelay(1);
+	local_irq_disable();
+
+	set_cpu_online(cpu, false);
+
+	cpu_map_rebuild();
+
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	int i;
+
+	for (i = 0; i < 100; i++) {
+		smp_rmb();
+		if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
+			break;
+		msleep(100);
+	}
+	if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
+		printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+	} else {
+#if defined(CONFIG_SUN_LDOMS)
+		unsigned long hv_err;
+		int limit = 100;
+
+		do {
+			hv_err = sun4v_cpu_stop(cpu);
+			if (hv_err == HV_EOK) {
+				set_cpu_present(cpu, false);
+				break;
+			}
+		} while (--limit > 0);
+		if (limit <= 0) {
+			printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
+			       hv_err);
+		}
+#endif
+	}
+}
+#endif
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+static void send_cpu_ipi(int cpu)
+{
+	xcall_deliver((u64) &xcall_receive_signal,
+			0, 0, cpumask_of(cpu));
+}
+
+void scheduler_poke(void)
+{
+	if (!cpu_poke)
+		return;
+
+	if (!__this_cpu_read(poke))
+		return;
+
+	__this_cpu_write(poke, false);
+	set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
+}
+
+static unsigned long send_cpu_poke(int cpu)
+{
+	unsigned long hv_err;
+
+	per_cpu(poke, cpu) = true;
+	hv_err = sun4v_cpu_poke(cpu);
+	if (hv_err != HV_EOK) {
+		per_cpu(poke, cpu) = false;
+		pr_err_ratelimited("%s: sun4v_cpu_poke() fails err=%lu\n",
+				    __func__, hv_err);
+	}
+
+	return hv_err;
+}
+
+void smp_send_reschedule(int cpu)
+{
+	if (cpu == smp_processor_id()) {
+		WARN_ON_ONCE(preemptible());
+		set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
+		return;
+	}
+
+	/* Use cpu poke to resume idle cpu if supported. */
+	if (cpu_poke && idle_cpu(cpu)) {
+		unsigned long ret;
+
+		ret = send_cpu_poke(cpu);
+		if (ret == HV_EOK)
+			return;
+	}
+
+	/* Use IPI in following cases:
+	 * - cpu poke not supported
+	 * - cpu not idle
+	 * - send_cpu_poke() returns with error
+	 */
+	send_cpu_ipi(cpu);
+}
+
+void smp_init_cpu_poke(void)
+{
+	unsigned long major;
+	unsigned long minor;
+	int ret;
+
+	if (tlb_type != hypervisor)
+		return;
+
+	ret = sun4v_hvapi_get(HV_GRP_CORE, &major, &minor);
+	if (ret) {
+		pr_debug("HV_GRP_CORE is not registered\n");
+		return;
+	}
+
+	if (major == 1 && minor >= 6) {
+		/* CPU POKE is registered. */
+		cpu_poke = true;
+		return;
+	}
+
+	pr_debug("CPU_POKE not supported\n");
+}
+
+void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
+{
+	clear_softint(1 << irq);
+	scheduler_ipi();
+}
+
+static void stop_this_cpu(void *dummy)
+{
+	set_cpu_online(smp_processor_id(), false);
+	prom_stopself();
+}
+
+void smp_send_stop(void)
+{
+	int cpu;
+
+	if (tlb_type == hypervisor) {
+		int this_cpu = smp_processor_id();
+#ifdef CONFIG_SERIAL_SUNHV
+		sunhv_migrate_hvcons_irq(this_cpu);
+#endif
+		for_each_online_cpu(cpu) {
+			if (cpu == this_cpu)
+				continue;
+
+			set_cpu_online(cpu, false);
+#ifdef CONFIG_SUN_LDOMS
+			if (ldom_domaining_enabled) {
+				unsigned long hv_err;
+				hv_err = sun4v_cpu_stop(cpu);
+				if (hv_err)
+					printk(KERN_ERR "sun4v_cpu_stop() "
+					       "failed err=%lu\n", hv_err);
+			} else
+#endif
+				prom_stopcpu_cpuid(cpu);
+		}
+	} else
+		smp_call_function(stop_this_cpu, NULL, 0);
+}
+
+/**
+ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
+ * @cpu: cpu to allocate for
+ * @size: size allocation in bytes
+ * @align: alignment
+ *
+ * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
+ * does the right thing for NUMA regardless of the current
+ * configuration.
+ *
+ * RETURNS:
+ * Pointer to the allocated area on success, NULL on failure.
+ */
+static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
+					size_t align)
+{
+	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int node = cpu_to_node(cpu);
+	void *ptr;
+
+	if (!node_online(node) || !NODE_DATA(node)) {
+		ptr = __alloc_bootmem(size, align, goal);
+		pr_info("cpu %d has no node %d or node-local memory\n",
+			cpu, node);
+		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
+			 cpu, size, __pa(ptr));
+	} else {
+		ptr = __alloc_bootmem_node(NODE_DATA(node),
+					   size, align, goal);
+		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
+			 "%016lx\n", cpu, size, node, __pa(ptr));
+	}
+	return ptr;
+#else
+	return __alloc_bootmem(size, align, goal);
+#endif
+}
+
+static void __init pcpu_free_bootmem(void *ptr, size_t size)
+{
+	free_bootmem(__pa(ptr), size);
+}
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (cpu_to_node(from) == cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
+}
+
+static void __init pcpu_populate_pte(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	pud_t *pud;
+	pmd_t *pmd;
+
+	if (pgd_none(*pgd)) {
+		pud_t *new;
+
+		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		pgd_populate(&init_mm, pgd, new);
+	}
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud)) {
+		pmd_t *new;
+
+		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		pud_populate(&init_mm, pud, new);
+	}
+
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd)) {
+		pte_t *new;
+
+		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		pmd_populate_kernel(&init_mm, pmd, new);
+	}
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long delta;
+	unsigned int cpu;
+	int rc = -EINVAL;
+
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+					    PERCPU_DYNAMIC_RESERVE, 4 << 20,
+					    pcpu_cpu_distance,
+					    pcpu_alloc_bootmem,
+					    pcpu_free_bootmem);
+		if (rc)
+			pr_warning("PERCPU: %s allocator failed (%d), "
+				   "falling back to page size\n",
+				   pcpu_fc_names[pcpu_chosen_fc], rc);
+	}
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
+					   pcpu_alloc_bootmem,
+					   pcpu_free_bootmem,
+					   pcpu_populate_pte);
+	if (rc < 0)
+		panic("cannot initialize percpu area (err=%d)", rc);
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
+
+	/* Setup %g5 for the boot cpu.  */
+	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
+
+	of_fill_in_cpu_data();
+	if (tlb_type == hypervisor)
+		mdesc_fill_in_cpu_data(cpu_all_mask);
+}
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
new file mode 100644
index 0000000..09aa69e
--- /dev/null
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -0,0 +1,12 @@
+/*
+ * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+
+/* This is needed only for drivers/sbus/char/openprom.c */
+EXPORT_SYMBOL(saved_command_line);
diff --git a/arch/sparc/kernel/spiterrs.S b/arch/sparc/kernel/spiterrs.S
new file mode 100644
index 0000000..5427af4
--- /dev/null
+++ b/arch/sparc/kernel/spiterrs.S
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	/* We need to carefully read the error status, ACK the errors,
+	 * prevent recursive traps, and pass the information on to C
+	 * code for logging.
+	 *
+	 * We pass the AFAR in as-is, and we encode the status
+	 * information as described in asm-sparc64/sfafsr.h
+	 */
+	.type		__spitfire_access_error,#function
+__spitfire_access_error:
+	/* Disable ESTATE error reporting so that we do not take
+	 * recursive traps and RED state the processor.
+	 */
+	stxa		%g0, [%g0] ASI_ESTATE_ERROR_EN
+	membar		#Sync
+
+	mov		UDBE_UE, %g1
+	ldxa		[%g0] ASI_AFSR, %g4	! Get AFSR
+
+	/* __spitfire_cee_trap branches here with AFSR in %g4 and
+	 * UDBE_CE in %g1.  It only clears ESTATE_ERR_CE in the ESTATE
+	 * Error Enable register.
+	 */
+__spitfire_cee_trap_continue:
+	ldxa		[%g0] ASI_AFAR, %g5	! Get AFAR
+
+	rdpr		%tt, %g3
+	and		%g3, 0x1ff, %g3		! Paranoia
+	sllx		%g3, SFSTAT_TRAP_TYPE_SHIFT, %g3
+	or		%g4, %g3, %g4
+	rdpr		%tl, %g3
+	cmp		%g3, 1
+	mov		1, %g3
+	bleu		%xcc, 1f
+	 sllx		%g3, SFSTAT_TL_GT_ONE_SHIFT, %g3
+
+	or		%g4, %g3, %g4
+
+	/* Read in the UDB error register state, clearing the sticky
+	 * error bits as-needed.  We only clear them if the UE bit is
+	 * set.  Likewise, __spitfire_cee_trap below will only do so
+	 * if the CE bit is set.
+	 *
+	 * NOTE: UltraSparc-I/II have high and low UDB error
+	 *       registers, corresponding to the two UDB units
+	 *       present on those chips.  UltraSparc-IIi only
+	 *       has a single UDB, called "SDB" in the manual.
+	 *       For IIi the upper UDB register always reads
+	 *       as zero so for our purposes things will just
+	 *       work with the checks below.
+	 */
+1:	ldxa		[%g0] ASI_UDBH_ERROR_R, %g3
+	and		%g3, 0x3ff, %g7		! Paranoia
+	sllx		%g7, SFSTAT_UDBH_SHIFT, %g7
+	or		%g4, %g7, %g4
+	andcc		%g3, %g1, %g3		! UDBE_UE or UDBE_CE
+	be,pn		%xcc, 1f
+	 nop
+	stxa		%g3, [%g0] ASI_UDB_ERROR_W
+	membar		#Sync
+
+1:	mov		0x18, %g3
+	ldxa		[%g3] ASI_UDBL_ERROR_R, %g3
+	and		%g3, 0x3ff, %g7		! Paranoia
+	sllx		%g7, SFSTAT_UDBL_SHIFT, %g7
+	or		%g4, %g7, %g4
+	andcc		%g3, %g1, %g3		! UDBE_UE or UDBE_CE
+	be,pn		%xcc, 1f
+	 nop
+	mov		0x18, %g7
+	stxa		%g3, [%g7] ASI_UDB_ERROR_W
+	membar		#Sync
+
+1:	/* Ok, now that we've latched the error state, clear the
+	 * sticky bits in the AFSR.
+	 */
+	stxa		%g4, [%g0] ASI_AFSR
+	membar		#Sync
+
+	rdpr		%tl, %g2
+	cmp		%g2, 1
+	rdpr		%pil, %g2
+	bleu,pt		%xcc, 1f
+	 wrpr		%g0, PIL_NORMAL_MAX, %pil
+
+	ba,pt		%xcc, etraptl1
+	 rd		%pc, %g7
+
+	ba,a,pt		%xcc, 2f
+	 nop
+
+1:	ba,pt		%xcc, etrap_irq
+	 rd		%pc, %g7
+
+2:
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call	trace_hardirqs_off
+	 nop
+#endif
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_access_error
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		__spitfire_access_error,.-__spitfire_access_error
+
+	/* This is the trap handler entry point for ECC correctable
+	 * errors.  They are corrected, but we listen for the trap so
+	 * that the event can be logged.
+	 *
+	 * Disrupting errors are either:
+	 * 1) single-bit ECC errors during UDB reads to system
+	 *    memory
+	 * 2) data parity errors during write-back events
+	 *
+	 * As far as I can make out from the manual, the CEE trap is
+	 * only for correctable errors during memory read accesses by
+	 * the front-end of the processor.
+	 *
+	 * The code below is only for trap level 1 CEE events, as it
+	 * is the only situation where we can safely record and log.
+	 * For trap level >1 we just clear the CE bit in the AFSR and
+	 * return.
+	 *
+	 * This is just like __spiftire_access_error above, but it
+	 * specifically handles correctable errors.  If an
+	 * uncorrectable error is indicated in the AFSR we will branch
+	 * directly above to __spitfire_access_error to handle it
+	 * instead.  Uncorrectable therefore takes priority over
+	 * correctable, and the error logging C code will notice this
+	 * case by inspecting the trap type.
+	 */
+	.type		__spitfire_cee_trap,#function
+__spitfire_cee_trap:
+	ldxa		[%g0] ASI_AFSR, %g4	! Get AFSR
+	mov		1, %g3
+	sllx		%g3, SFAFSR_UE_SHIFT, %g3
+	andcc		%g4, %g3, %g0		! Check for UE
+	bne,pn		%xcc, __spitfire_access_error
+	 nop
+
+	/* Ok, in this case we only have a correctable error.
+	 * Indicate we only wish to capture that state in register
+	 * %g1, and we only disable CE error reporting unlike UE
+	 * handling which disables all errors.
+	 */
+	ldxa		[%g0] ASI_ESTATE_ERROR_EN, %g3
+	andn		%g3, ESTATE_ERR_CE, %g3
+	stxa		%g3, [%g0] ASI_ESTATE_ERROR_EN
+	membar		#Sync
+
+	/* Preserve AFSR in %g4, indicate UDB state to capture in %g1 */
+	ba,pt		%xcc, __spitfire_cee_trap_continue
+	 mov		UDBE_CE, %g1
+	.size		__spitfire_cee_trap,.-__spitfire_cee_trap
+
+	.type		__spitfire_data_access_exception_tl1,#function
+__spitfire_data_access_exception_tl1:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	mov		DMMU_SFAR, %g5
+	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
+	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
+	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
+	membar		#Sync
+	rdpr		%tt, %g3
+	cmp		%g3, 0x80		! first win spill/fill trap
+	blu,pn		%xcc, 1f
+	 cmp		%g3, 0xff		! last win spill/fill trap
+	bgu,pn		%xcc, 1f
+	 nop
+	ba,pt		%xcc, winfix_dax
+	 rdpr		%tpc, %g3
+1:	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etraptl1
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_data_access_exception_tl1
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		__spitfire_data_access_exception_tl1,.-__spitfire_data_access_exception_tl1
+
+	.type		__spitfire_data_access_exception,#function
+__spitfire_data_access_exception:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	mov		DMMU_SFAR, %g5
+	ldxa		[%g3] ASI_DMMU, %g4	! Get SFSR
+	ldxa		[%g5] ASI_DMMU, %g5	! Get SFAR
+	stxa		%g0, [%g3] ASI_DMMU	! Clear SFSR.FaultValid bit
+	membar		#Sync
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_data_access_exception
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		__spitfire_data_access_exception,.-__spitfire_data_access_exception
+
+	.type		__spitfire_insn_access_exception_tl1,#function
+__spitfire_insn_access_exception_tl1:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	ldxa		[%g3] ASI_IMMU, %g4	! Get SFSR
+	rdpr		%tpc, %g5		! IMMU has no SFAR, use TPC
+	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
+	membar		#Sync
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etraptl1
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_insn_access_exception_tl1
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		__spitfire_insn_access_exception_tl1,.-__spitfire_insn_access_exception_tl1
+
+	.type		__spitfire_insn_access_exception,#function
+__spitfire_insn_access_exception:
+	rdpr		%pstate, %g4
+	wrpr		%g4, PSTATE_MG|PSTATE_AG, %pstate
+	mov		TLB_SFSR, %g3
+	ldxa		[%g3] ASI_IMMU, %g4	! Get SFSR
+	rdpr		%tpc, %g5		! IMMU has no SFAR, use TPC
+	stxa		%g0, [%g3] ASI_IMMU	! Clear FaultValid bit
+	membar		#Sync
+	sethi		%hi(109f), %g7
+	ba,pt		%xcc, etrap
+109:	 or		%g7, %lo(109b), %g7
+	mov		%l4, %o1
+	mov		%l5, %o2
+	call		spitfire_insn_access_exception
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+	.size		__spitfire_insn_access_exception,.-__spitfire_insn_access_exception
diff --git a/arch/sparc/kernel/sstate.c b/arch/sparc/kernel/sstate.c
new file mode 100644
index 0000000..ac8677c
--- /dev/null
+++ b/arch/sparc/kernel/sstate.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* sstate.c: System soft state support.
+ *
+ * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/notifier.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+
+#include <asm/hypervisor.h>
+#include <asm/spitfire.h>
+#include <asm/oplib.h>
+#include <asm/head.h>
+#include <asm/io.h>
+
+#include "kernel.h"
+
+static int hv_supports_soft_state;
+
+static void do_set_sstate(unsigned long state, const char *msg)
+{
+	unsigned long err;
+
+	if (!hv_supports_soft_state)
+		return;
+
+	err = sun4v_mach_set_soft_state(state, kimage_addr_to_ra(msg));
+	if (err) {
+		printk(KERN_WARNING "SSTATE: Failed to set soft-state to "
+		       "state[%lx] msg[%s], err=%lu\n",
+		       state, msg, err);
+	}
+}
+
+static const char booting_msg[32] __attribute__((aligned(32))) =
+	"Linux booting";
+static const char running_msg[32] __attribute__((aligned(32))) =
+	"Linux running";
+static const char halting_msg[32] __attribute__((aligned(32))) =
+	"Linux halting";
+static const char poweroff_msg[32] __attribute__((aligned(32))) =
+	"Linux powering off";
+static const char rebooting_msg[32] __attribute__((aligned(32))) =
+	"Linux rebooting";
+static const char panicking_msg[32] __attribute__((aligned(32))) =
+	"Linux panicking";
+
+static int sstate_reboot_call(struct notifier_block *np, unsigned long type, void *_unused)
+{
+	const char *msg;
+
+	switch (type) {
+	case SYS_DOWN:
+	default:
+		msg = rebooting_msg;
+		break;
+
+	case SYS_HALT:
+		msg = halting_msg;
+		break;
+
+	case SYS_POWER_OFF:
+		msg = poweroff_msg;
+		break;
+	}
+
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, msg);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block sstate_reboot_notifier = {
+	.notifier_call = sstate_reboot_call,
+};
+
+static int sstate_panic_event(struct notifier_block *n, unsigned long event, void *ptr)
+{
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, panicking_msg);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block sstate_panic_block = {
+	.notifier_call	=	sstate_panic_event,
+	.priority	=	INT_MAX,
+};
+
+static int __init sstate_init(void)
+{
+	unsigned long major, minor;
+
+	if (tlb_type != hypervisor)
+		return 0;
+
+	major = 1;
+	minor = 0;
+	if (sun4v_hvapi_register(HV_GRP_SOFT_STATE, major, &minor))
+		return 0;
+
+	hv_supports_soft_state = 1;
+
+	prom_sun4v_guest_soft_state();
+
+	do_set_sstate(HV_SOFT_STATE_TRANSITION, booting_msg);
+
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &sstate_panic_block);
+	register_reboot_notifier(&sstate_reboot_notifier);
+
+	return 0;
+}
+
+core_initcall(sstate_init);
+
+static int __init sstate_running(void)
+{
+	do_set_sstate(HV_SOFT_STATE_NORMAL, running_msg);
+	return 0;
+}
+
+late_initcall(sstate_running);
diff --git a/arch/sparc/kernel/stacktrace.c b/arch/sparc/kernel/stacktrace.c
new file mode 100644
index 0000000..be4c14c
--- /dev/null
+++ b/arch/sparc/kernel/stacktrace.c
@@ -0,0 +1,86 @@
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/stacktrace.h>
+#include <linux/thread_info.h>
+#include <linux/ftrace.h>
+#include <linux/export.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+#include "kstack.h"
+
+static void __save_stack_trace(struct thread_info *tp,
+			       struct stack_trace *trace,
+			       bool skip_sched)
+{
+	unsigned long ksp, fp;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	struct task_struct *t;
+	int graph = 0;
+#endif
+
+	if (tp == current_thread_info()) {
+		stack_trace_flush();
+		__asm__ __volatile__("mov %%fp, %0" : "=r" (ksp));
+	} else {
+		ksp = tp->ksp;
+	}
+
+	fp = ksp + STACK_BIAS;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	t = tp->task;
+#endif
+	do {
+		struct sparc_stackf *sf;
+		struct pt_regs *regs;
+		unsigned long pc;
+
+		if (!kstack_valid(tp, fp))
+			break;
+
+		sf = (struct sparc_stackf *) fp;
+		regs = (struct pt_regs *) (sf + 1);
+
+		if (kstack_is_trap_frame(tp, regs)) {
+			if (!(regs->tstate & TSTATE_PRIV))
+				break;
+			pc = regs->tpc;
+			fp = regs->u_regs[UREG_I6] + STACK_BIAS;
+		} else {
+			pc = sf->callers_pc;
+			fp = (unsigned long)sf->fp + STACK_BIAS;
+		}
+
+		if (trace->skip > 0)
+			trace->skip--;
+		else if (!skip_sched || !in_sched_functions(pc)) {
+			trace->entries[trace->nr_entries++] = pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+			if ((pc + 8UL) == (unsigned long) &return_to_handler) {
+				int index = t->curr_ret_stack;
+				if (t->ret_stack && index >= graph) {
+					pc = t->ret_stack[index - graph].ret;
+					if (trace->nr_entries <
+					    trace->max_entries)
+						trace->entries[trace->nr_entries++] = pc;
+					graph++;
+				}
+			}
+#endif
+		}
+	} while (trace->nr_entries < trace->max_entries);
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	__save_stack_trace(current_thread_info(), trace, false);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	struct thread_info *tp = task_thread_info(tsk);
+
+	__save_stack_trace(tp, trace, true);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/sparc/kernel/starfire.c b/arch/sparc/kernel/starfire.c
new file mode 100644
index 0000000..b8cd57d
--- /dev/null
+++ b/arch/sparc/kernel/starfire.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * starfire.c: Starfire/E10000 support.
+ *
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2000 Anton Blanchard (anton@samba.org)
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include <asm/page.h>
+#include <asm/oplib.h>
+#include <asm/smp.h>
+#include <asm/upa.h>
+#include <asm/starfire.h>
+
+/*
+ * A few places around the kernel check this to see if
+ * they need to call us to do things in a Starfire specific
+ * way.
+ */
+int this_is_starfire = 0;
+
+void check_if_starfire(void)
+{
+	phandle ssnode = prom_finddevice("/ssp-serial");
+	if (ssnode != 0 && (s32)ssnode != -1)
+		this_is_starfire = 1;
+}
+
+/*
+ * Each Starfire board has 32 registers which perform translation
+ * and delivery of traditional interrupt packets into the extended
+ * Starfire hardware format.  Essentially UPAID's now have 2 more
+ * bits than in all previous Sun5 systems.
+ */
+struct starfire_irqinfo {
+	unsigned long imap_slots[32];
+	unsigned long tregs[32];
+	struct starfire_irqinfo *next;
+	int upaid, hwmid;
+};
+
+static struct starfire_irqinfo *sflist = NULL;
+
+/* Beam me up Scott(McNeil)y... */
+void starfire_hookup(int upaid)
+{
+	struct starfire_irqinfo *p;
+	unsigned long treg_base, hwmid, i;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		prom_printf("starfire_hookup: No memory, this is insane.\n");
+		prom_halt();
+	}
+	treg_base = 0x100fc000000UL;
+	hwmid = ((upaid & 0x3c) << 1) |
+		((upaid & 0x40) >> 4) |
+		(upaid & 0x3);
+	p->hwmid = hwmid;
+	treg_base += (hwmid << 33UL);
+	treg_base += 0x200UL;
+	for (i = 0; i < 32; i++) {
+		p->imap_slots[i] = 0UL;
+		p->tregs[i] = treg_base + (i * 0x10UL);
+		/* Lets play it safe and not overwrite existing mappings */
+		if (upa_readl(p->tregs[i]) != 0)
+			p->imap_slots[i] = 0xdeadbeaf;
+	}
+	p->upaid = upaid;
+	p->next = sflist;
+	sflist = p;
+}
+
+unsigned int starfire_translate(unsigned long imap,
+				unsigned int upaid)
+{
+	struct starfire_irqinfo *p;
+	unsigned int bus_hwmid;
+	unsigned int i;
+
+	bus_hwmid = (((unsigned long)imap) >> 33) & 0x7f;
+	for (p = sflist; p != NULL; p = p->next)
+		if (p->hwmid == bus_hwmid)
+			break;
+	if (p == NULL) {
+		prom_printf("XFIRE: Cannot find irqinfo for imap %016lx\n",
+			    ((unsigned long)imap));
+		prom_halt();
+	}
+	for (i = 0; i < 32; i++) {
+		if (p->imap_slots[i] == imap ||
+		    p->imap_slots[i] == 0UL)
+			break;
+	}
+	if (i == 32) {
+		printk("starfire_translate: Are you kidding me?\n");
+		panic("Lucy in the sky....");
+	}
+	p->imap_slots[i] = imap;
+
+	/* map to real upaid */
+	upaid = (((upaid & 0x3c) << 1) |
+		 ((upaid & 0x40) >> 4) |
+		 (upaid & 0x3));
+
+	upa_writel(upaid, p->tregs[i]);
+
+	return i;
+}
diff --git a/arch/sparc/kernel/sun4d_irq.c b/arch/sparc/kernel/sun4d_irq.c
new file mode 100644
index 0000000..d869d40
--- /dev/null
+++ b/arch/sparc/kernel/sun4d_irq.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SS1000/SC2000 interrupt handling.
+ *
+ *  Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *  Heavily based on arch/sparc/kernel/irq.c.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/slab.h>
+#include <linux/seq_file.h>
+
+#include <asm/timer.h>
+#include <asm/traps.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/sbi.h>
+#include <asm/cacheflush.h>
+#include <asm/setup.h>
+#include <asm/oplib.h>
+
+#include "kernel.h"
+#include "irq.h"
+
+/* Sun4d interrupts fall roughly into two categories.  SBUS and
+ * cpu local.  CPU local interrupts cover the timer interrupts
+ * and whatnot, and we encode those as normal PILs between
+ * 0 and 15.
+ * SBUS interrupts are encodes as a combination of board, level and slot.
+ */
+
+struct sun4d_handler_data {
+	unsigned int cpuid;    /* target cpu */
+	unsigned int real_irq; /* interrupt level */
+};
+
+
+static unsigned int sun4d_encode_irq(int board, int lvl, int slot)
+{
+	return (board + 1) << 5 | (lvl << 2) | slot;
+}
+
+struct sun4d_timer_regs {
+	u32	l10_timer_limit;
+	u32	l10_cur_countx;
+	u32	l10_limit_noclear;
+	u32	ctrl;
+	u32	l10_cur_count;
+};
+
+static struct sun4d_timer_regs __iomem *sun4d_timers;
+
+#define SUN4D_TIMER_IRQ        10
+
+/* Specify which cpu handle interrupts from which board.
+ * Index is board - value is cpu.
+ */
+static unsigned char board_to_cpu[32];
+
+static int pil_to_sbus[] = {
+	0,
+	0,
+	1,
+	2,
+	0,
+	3,
+	0,
+	4,
+	0,
+	5,
+	0,
+	6,
+	0,
+	7,
+	0,
+	0,
+};
+
+/* Exported for sun4d_smp.c */
+DEFINE_SPINLOCK(sun4d_imsk_lock);
+
+/* SBUS interrupts are encoded integers including the board number
+ * (plus one), the SBUS level, and the SBUS slot number.  Sun4D
+ * IRQ dispatch is done by:
+ *
+ * 1) Reading the BW local interrupt table in order to get the bus
+ *    interrupt mask.
+ *
+ *    This table is indexed by SBUS interrupt level which can be
+ *    derived from the PIL we got interrupted on.
+ *
+ * 2) For each bus showing interrupt pending from #1, read the
+ *    SBI interrupt state register.  This will indicate which slots
+ *    have interrupts pending for that SBUS interrupt level.
+ *
+ * 3) Call the genreric IRQ support.
+ */
+static void sun4d_sbus_handler_irq(int sbusl)
+{
+	unsigned int bus_mask;
+	unsigned int sbino, slot;
+	unsigned int sbil;
+
+	bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
+	bw_clear_intr_mask(sbusl, bus_mask);
+
+	sbil = (sbusl << 2);
+	/* Loop for each pending SBI */
+	for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1) {
+		unsigned int idx, mask;
+
+		if (!(bus_mask & 1))
+			continue;
+		/* XXX This seems to ACK the irq twice.  acquire_sbi()
+		 * XXX uses swap, therefore this writes 0xf << sbil,
+		 * XXX then later release_sbi() will write the individual
+		 * XXX bits which were set again.
+		 */
+		mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
+		mask &= (0xf << sbil);
+
+		/* Loop for each pending SBI slot */
+		slot = (1 << sbil);
+		for (idx = 0; mask != 0; idx++, slot <<= 1) {
+			unsigned int pil;
+			struct irq_bucket *p;
+
+			if (!(mask & slot))
+				continue;
+
+			mask &= ~slot;
+			pil = sun4d_encode_irq(sbino, sbusl, idx);
+
+			p = irq_map[pil];
+			while (p) {
+				struct irq_bucket *next;
+
+				next = p->next;
+				generic_handle_irq(p->irq);
+				p = next;
+			}
+			release_sbi(SBI2DEVID(sbino), slot);
+		}
+	}
+}
+
+void sun4d_handler_irq(unsigned int pil, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+	/* SBUS IRQ level (1 - 7) */
+	int sbusl = pil_to_sbus[pil];
+
+	/* FIXME: Is this necessary?? */
+	cc_get_ipen();
+
+	cc_set_iclr(1 << pil);
+
+#ifdef CONFIG_SMP
+	/*
+	 * Check IPI data structures after IRQ has been cleared. Hard and Soft
+	 * IRQ can happen at the same time, so both cases are always handled.
+	 */
+	if (pil == SUN4D_IPI_IRQ)
+		sun4d_ipi_interrupt();
+#endif
+
+	old_regs = set_irq_regs(regs);
+	irq_enter();
+	if (sbusl == 0) {
+		/* cpu interrupt */
+		struct irq_bucket *p;
+
+		p = irq_map[pil];
+		while (p) {
+			struct irq_bucket *next;
+
+			next = p->next;
+			generic_handle_irq(p->irq);
+			p = next;
+		}
+	} else {
+		/* SBUS interrupt */
+		sun4d_sbus_handler_irq(sbusl);
+	}
+	irq_exit();
+	set_irq_regs(old_regs);
+}
+
+
+static void sun4d_mask_irq(struct irq_data *data)
+{
+	struct sun4d_handler_data *handler_data = irq_data_get_irq_handler_data(data);
+	unsigned int real_irq;
+#ifdef CONFIG_SMP
+	int cpuid = handler_data->cpuid;
+	unsigned long flags;
+#endif
+	real_irq = handler_data->real_irq;
+#ifdef CONFIG_SMP
+	spin_lock_irqsave(&sun4d_imsk_lock, flags);
+	cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq));
+	spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+	cc_set_imsk(cc_get_imsk() | (1 << real_irq));
+#endif
+}
+
+static void sun4d_unmask_irq(struct irq_data *data)
+{
+	struct sun4d_handler_data *handler_data = irq_data_get_irq_handler_data(data);
+	unsigned int real_irq;
+#ifdef CONFIG_SMP
+	int cpuid = handler_data->cpuid;
+	unsigned long flags;
+#endif
+	real_irq = handler_data->real_irq;
+
+#ifdef CONFIG_SMP
+	spin_lock_irqsave(&sun4d_imsk_lock, flags);
+	cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) & ~(1 << real_irq));
+	spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+	cc_set_imsk(cc_get_imsk() & ~(1 << real_irq));
+#endif
+}
+
+static unsigned int sun4d_startup_irq(struct irq_data *data)
+{
+	irq_link(data->irq);
+	sun4d_unmask_irq(data);
+	return 0;
+}
+
+static void sun4d_shutdown_irq(struct irq_data *data)
+{
+	sun4d_mask_irq(data);
+	irq_unlink(data->irq);
+}
+
+static struct irq_chip sun4d_irq = {
+	.name		= "sun4d",
+	.irq_startup	= sun4d_startup_irq,
+	.irq_shutdown	= sun4d_shutdown_irq,
+	.irq_unmask	= sun4d_unmask_irq,
+	.irq_mask	= sun4d_mask_irq,
+};
+
+#ifdef CONFIG_SMP
+/* Setup IRQ distribution scheme. */
+void __init sun4d_distribute_irqs(void)
+{
+	struct device_node *dp;
+
+	int cpuid = cpu_logical_map(1);
+
+	if (cpuid == -1)
+		cpuid = cpu_logical_map(0);
+	for_each_node_by_name(dp, "sbi") {
+		int devid = of_getintprop_default(dp, "device-id", 0);
+		int board = of_getintprop_default(dp, "board#", 0);
+		board_to_cpu[board] = cpuid;
+		set_sbi_tid(devid, cpuid << 3);
+	}
+	printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid);
+}
+#endif
+
+static void sun4d_clear_clock_irq(void)
+{
+	sbus_readl(&sun4d_timers->l10_timer_limit);
+}
+
+static void sun4d_load_profile_irq(int cpu, unsigned int limit)
+{
+	unsigned int value = limit ? timer_value(limit) : 0;
+	bw_set_prof_limit(cpu, value);
+}
+
+static void __init sun4d_load_profile_irqs(void)
+{
+	int cpu = 0, mid;
+
+	while (!cpu_find_by_instance(cpu, NULL, &mid)) {
+		sun4d_load_profile_irq(mid >> 3, 0);
+		cpu++;
+	}
+}
+
+static unsigned int _sun4d_build_device_irq(unsigned int real_irq,
+                                            unsigned int pil,
+                                            unsigned int board)
+{
+	struct sun4d_handler_data *handler_data;
+	unsigned int irq;
+
+	irq = irq_alloc(real_irq, pil);
+	if (irq == 0) {
+		prom_printf("IRQ: allocate for %d %d %d failed\n",
+			real_irq, pil, board);
+		goto err_out;
+	}
+
+	handler_data = irq_get_handler_data(irq);
+	if (unlikely(handler_data))
+		goto err_out;
+
+	handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC);
+	if (unlikely(!handler_data)) {
+		prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n");
+		prom_halt();
+	}
+	handler_data->cpuid    = board_to_cpu[board];
+	handler_data->real_irq = real_irq;
+	irq_set_chip_and_handler_name(irq, &sun4d_irq,
+	                              handle_level_irq, "level");
+	irq_set_handler_data(irq, handler_data);
+
+err_out:
+	return irq;
+}
+
+
+
+static unsigned int sun4d_build_device_irq(struct platform_device *op,
+                                           unsigned int real_irq)
+{
+	struct device_node *dp = op->dev.of_node;
+	struct device_node *board_parent, *bus = dp->parent;
+	char *bus_connection;
+	const struct linux_prom_registers *regs;
+	unsigned int pil;
+	unsigned int irq;
+	int board, slot;
+	int sbusl;
+
+	irq = real_irq;
+	while (bus) {
+		if (!strcmp(bus->name, "sbi")) {
+			bus_connection = "io-unit";
+			break;
+		}
+
+		if (!strcmp(bus->name, "bootbus")) {
+			bus_connection = "cpu-unit";
+			break;
+		}
+
+		bus = bus->parent;
+	}
+	if (!bus)
+		goto err_out;
+
+	regs = of_get_property(dp, "reg", NULL);
+	if (!regs)
+		goto err_out;
+
+	slot = regs->which_io;
+
+	/*
+	 * If Bus nodes parent is not io-unit/cpu-unit or the io-unit/cpu-unit
+	 * lacks a "board#" property, something is very wrong.
+	 */
+	if (!bus->parent || strcmp(bus->parent->name, bus_connection)) {
+		printk(KERN_ERR "%s: Error, parent is not %s.\n",
+			bus->full_name, bus_connection);
+		goto err_out;
+	}
+	board_parent = bus->parent;
+	board = of_getintprop_default(board_parent, "board#", -1);
+	if (board == -1) {
+		printk(KERN_ERR "%s: Error, lacks board# property.\n",
+			board_parent->full_name);
+		goto err_out;
+	}
+
+	sbusl = pil_to_sbus[real_irq];
+	if (sbusl)
+		pil = sun4d_encode_irq(board, sbusl, slot);
+	else
+		pil = real_irq;
+
+	irq = _sun4d_build_device_irq(real_irq, pil, board);
+err_out:
+	return irq;
+}
+
+static unsigned int sun4d_build_timer_irq(unsigned int board,
+                                          unsigned int real_irq)
+{
+	return _sun4d_build_device_irq(real_irq, real_irq, board);
+}
+
+
+static void __init sun4d_fixup_trap_table(void)
+{
+#ifdef CONFIG_SMP
+	unsigned long flags;
+	struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (14 - 1)];
+
+	/* Adjust so that we jump directly to smp4d_ticker */
+	lvl14_save[2] += smp4d_ticker - real_irq_entry;
+
+	/* For SMP we use the level 14 ticker, however the bootup code
+	 * has copied the firmware's level 14 vector into the boot cpu's
+	 * trap table, we must fix this now or we get squashed.
+	 */
+	local_irq_save(flags);
+	patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */
+	trap_table->inst_one = lvl14_save[0];
+	trap_table->inst_two = lvl14_save[1];
+	trap_table->inst_three = lvl14_save[2];
+	trap_table->inst_four = lvl14_save[3];
+	local_ops->cache_all();
+	local_irq_restore(flags);
+#endif
+}
+
+static void __init sun4d_init_timers(void)
+{
+	struct device_node *dp;
+	struct resource res;
+	unsigned int irq;
+	const u32 *reg;
+	int err;
+	int board;
+
+	dp = of_find_node_by_name(NULL, "cpu-unit");
+	if (!dp) {
+		prom_printf("sun4d_init_timers: Unable to find cpu-unit\n");
+		prom_halt();
+	}
+
+	/* Which cpu-unit we use is arbitrary, we can view the bootbus timer
+	 * registers via any cpu's mapping.  The first 'reg' property is the
+	 * bootbus.
+	 */
+	reg = of_get_property(dp, "reg", NULL);
+	if (!reg) {
+		prom_printf("sun4d_init_timers: No reg property\n");
+		prom_halt();
+	}
+
+	board = of_getintprop_default(dp, "board#", -1);
+	if (board == -1) {
+		prom_printf("sun4d_init_timers: No board# property on cpu-unit\n");
+		prom_halt();
+	}
+
+	of_node_put(dp);
+
+	res.start = reg[1];
+	res.end = reg[2] - 1;
+	res.flags = reg[0] & 0xff;
+	sun4d_timers = of_ioremap(&res, BW_TIMER_LIMIT,
+				  sizeof(struct sun4d_timer_regs), "user timer");
+	if (!sun4d_timers) {
+		prom_printf("sun4d_init_timers: Can't map timer regs\n");
+		prom_halt();
+	}
+
+#ifdef CONFIG_SMP
+	sparc_config.cs_period = SBUS_CLOCK_RATE * 2;  /* 2 seconds */
+#else
+	sparc_config.cs_period = SBUS_CLOCK_RATE / HZ; /* 1/HZ sec  */
+	sparc_config.features |= FEAT_L10_CLOCKEVENT;
+#endif
+	sparc_config.features |= FEAT_L10_CLOCKSOURCE;
+	sbus_writel(timer_value(sparc_config.cs_period),
+		    &sun4d_timers->l10_timer_limit);
+
+	master_l10_counter = &sun4d_timers->l10_cur_count;
+
+	irq = sun4d_build_timer_irq(board, SUN4D_TIMER_IRQ);
+	err = request_irq(irq, timer_interrupt, IRQF_TIMER, "timer", NULL);
+	if (err) {
+		prom_printf("sun4d_init_timers: request_irq() failed with %d\n",
+		             err);
+		prom_halt();
+	}
+	sun4d_load_profile_irqs();
+	sun4d_fixup_trap_table();
+}
+
+void __init sun4d_init_sbi_irq(void)
+{
+	struct device_node *dp;
+	int target_cpu;
+
+	target_cpu = boot_cpu_id;
+	for_each_node_by_name(dp, "sbi") {
+		int devid = of_getintprop_default(dp, "device-id", 0);
+		int board = of_getintprop_default(dp, "board#", 0);
+		unsigned int mask;
+
+		set_sbi_tid(devid, target_cpu << 3);
+		board_to_cpu[board] = target_cpu;
+
+		/* Get rid of pending irqs from PROM */
+		mask = acquire_sbi(devid, 0xffffffff);
+		if (mask) {
+			printk(KERN_ERR "Clearing pending IRQs %08x on SBI %d\n",
+			       mask, board);
+			release_sbi(devid, mask);
+		}
+	}
+}
+
+void __init sun4d_init_IRQ(void)
+{
+	local_irq_disable();
+
+	sparc_config.init_timers      = sun4d_init_timers;
+	sparc_config.build_device_irq = sun4d_build_device_irq;
+	sparc_config.clock_rate       = SBUS_CLOCK_RATE;
+	sparc_config.clear_clock_irq  = sun4d_clear_clock_irq;
+	sparc_config.load_profile_irq = sun4d_load_profile_irq;
+
+	/* Cannot enable interrupts until OBP ticker is disabled. */
+}
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
new file mode 100644
index 0000000..ff30f03
--- /dev/null
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Sparc SS1000/SC2000 SMP support.
+ *
+ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *
+ * Based on sun4m's smp.c, which is:
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/profile.h>
+#include <linux/delay.h>
+#include <linux/sched/mm.h>
+#include <linux/cpu.h>
+
+#include <asm/cacheflush.h>
+#include <asm/switch_to.h>
+#include <asm/tlbflush.h>
+#include <asm/timer.h>
+#include <asm/oplib.h>
+#include <asm/sbi.h>
+#include <asm/mmu.h>
+
+#include "kernel.h"
+#include "irq.h"
+
+#define IRQ_CROSS_CALL		15
+
+static volatile int smp_processors_ready;
+static int smp_highest_cpu;
+
+static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned long val)
+{
+	__asm__ __volatile__("swap [%1], %0\n\t" :
+			     "=&r" (val), "=&r" (ptr) :
+			     "0" (val), "1" (ptr));
+	return val;
+}
+
+static void smp4d_ipi_init(void);
+
+static unsigned char cpu_leds[32];
+
+static inline void show_leds(int cpuid)
+{
+	cpuid &= 0x1e;
+	__asm__ __volatile__ ("stba %0, [%1] %2" : :
+			      "r" ((cpu_leds[cpuid] << 4) | cpu_leds[cpuid+1]),
+			      "r" (ECSR_BASE(cpuid) | BB_LEDS),
+			      "i" (ASI_M_CTL));
+}
+
+void sun4d_cpu_pre_starting(void *arg)
+{
+	int cpuid = hard_smp_processor_id();
+
+	/* Show we are alive */
+	cpu_leds[cpuid] = 0x6;
+	show_leds(cpuid);
+
+	/* Enable level15 interrupt, disable level14 interrupt for now */
+	cc_set_imsk((cc_get_imsk() & ~0x8000) | 0x4000);
+}
+
+void sun4d_cpu_pre_online(void *arg)
+{
+	unsigned long flags;
+	int cpuid;
+
+	cpuid = hard_smp_processor_id();
+
+	/* Unblock the master CPU _only_ when the scheduler state
+	 * of all secondary CPUs will be up-to-date, so after
+	 * the SMP initialization the master will be just allowed
+	 * to call the scheduler code.
+	 */
+	sun4d_swap((unsigned long *)&cpu_callin_map[cpuid], 1);
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	while ((unsigned long)current_set[cpuid] < PAGE_OFFSET)
+		barrier();
+
+	while (current_set[cpuid]->cpu != cpuid)
+		barrier();
+
+	/* Fix idle thread fields. */
+	__asm__ __volatile__("ld [%0], %%g6\n\t"
+			     : : "r" (&current_set[cpuid])
+			     : "memory" /* paranoid */);
+
+	cpu_leds[cpuid] = 0x9;
+	show_leds(cpuid);
+
+	/* Attach to the address space of init_task. */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
+		barrier();
+
+	spin_lock_irqsave(&sun4d_imsk_lock, flags);
+	cc_set_imsk(cc_get_imsk() & ~0x4000); /* Allow PIL 14 as well */
+	spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+}
+
+/*
+ *	Cycle through the processors asking the PROM to start each one.
+ */
+void __init smp4d_boot_cpus(void)
+{
+	smp4d_ipi_init();
+	if (boot_cpu_id)
+		current_set[0] = NULL;
+	local_ops->cache_all();
+}
+
+int smp4d_boot_one_cpu(int i, struct task_struct *idle)
+{
+	unsigned long *entry = &sun4d_cpu_startup;
+	int timeout;
+	int cpu_node;
+
+	cpu_find_by_instance(i, &cpu_node, NULL);
+	current_set[i] = task_thread_info(idle);
+	/*
+	 * Initialize the contexts table
+	 * Since the call to prom_startcpu() trashes the structure,
+	 * we need to re-initialize it for each cpu
+	 */
+	smp_penguin_ctable.which_io = 0;
+	smp_penguin_ctable.phys_addr = (unsigned int) srmmu_ctx_table_phys;
+	smp_penguin_ctable.reg_size = 0;
+
+	/* whirrr, whirrr, whirrrrrrrrr... */
+	printk(KERN_INFO "Starting CPU %d at %p\n", i, entry);
+	local_ops->cache_all();
+	prom_startcpu(cpu_node,
+		      &smp_penguin_ctable, 0, (char *)entry);
+
+	printk(KERN_INFO "prom_startcpu returned :)\n");
+
+	/* wheee... it's going... */
+	for (timeout = 0; timeout < 10000; timeout++) {
+		if (cpu_callin_map[i])
+			break;
+		udelay(200);
+	}
+
+	if (!(cpu_callin_map[i])) {
+		printk(KERN_ERR "Processor %d is stuck.\n", i);
+		return -ENODEV;
+
+	}
+	local_ops->cache_all();
+	return 0;
+}
+
+void __init smp4d_smp_done(void)
+{
+	int i, first;
+	int *prev;
+
+	/* setup cpu list for irq rotation */
+	first = 0;
+	prev = &first;
+	for_each_online_cpu(i) {
+		*prev = i;
+		prev = &cpu_data(i).next;
+	}
+	*prev = first;
+	local_ops->cache_all();
+
+	/* Ok, they are spinning and ready to go. */
+	smp_processors_ready = 1;
+	sun4d_distribute_irqs();
+}
+
+/* Memory structure giving interrupt handler information about IPI generated */
+struct sun4d_ipi_work {
+	int single;
+	int msk;
+	int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work);
+
+/* Initialize IPIs on the SUN4D SMP machine */
+static void __init smp4d_ipi_init(void)
+{
+	int cpu;
+	struct sun4d_ipi_work *work;
+
+	printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ);
+
+	for_each_possible_cpu(cpu) {
+		work = &per_cpu(sun4d_ipi_work, cpu);
+		work->single = work->msk = work->resched = 0;
+	}
+}
+
+void sun4d_ipi_interrupt(void)
+{
+	struct sun4d_ipi_work *work = this_cpu_ptr(&sun4d_ipi_work);
+
+	if (work->single) {
+		work->single = 0;
+		smp_call_function_single_interrupt();
+	}
+	if (work->msk) {
+		work->msk = 0;
+		smp_call_function_interrupt();
+	}
+	if (work->resched) {
+		work->resched = 0;
+		smp_resched_interrupt();
+	}
+}
+
+/* +-------+-------------+-----------+------------------------------------+
+ * | bcast |  devid      |   sid     |              levels mask           |
+ * +-------+-------------+-----------+------------------------------------+
+ *  31      30         23 22       15 14                                 0
+ */
+#define IGEN_MESSAGE(bcast, devid, sid, levels) \
+	(((bcast) << 31) | ((devid) << 23) | ((sid) << 15) | (levels))
+
+static void sun4d_send_ipi(int cpu, int level)
+{
+	cc_set_igen(IGEN_MESSAGE(0, cpu << 3, 6 + ((level >> 1) & 7), 1 << (level - 1)));
+}
+
+static void sun4d_ipi_single(int cpu)
+{
+	struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+	/* Mark work */
+	work->single = 1;
+
+	/* Generate IRQ on the CPU */
+	sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void sun4d_ipi_mask_one(int cpu)
+{
+	struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+	/* Mark work */
+	work->msk = 1;
+
+	/* Generate IRQ on the CPU */
+	sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void sun4d_ipi_resched(int cpu)
+{
+	struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+	/* Mark work */
+	work->resched = 1;
+
+	/* Generate IRQ on the CPU (any IRQ will cause resched) */
+	sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static struct smp_funcall {
+	smpfunc_t func;
+	unsigned long arg1;
+	unsigned long arg2;
+	unsigned long arg3;
+	unsigned long arg4;
+	unsigned long arg5;
+	unsigned char processors_in[NR_CPUS];  /* Set when ipi entered. */
+	unsigned char processors_out[NR_CPUS]; /* Set when ipi exited. */
+} ccall_info __attribute__((aligned(8)));
+
+static DEFINE_SPINLOCK(cross_call_lock);
+
+/* Cross calls must be serialized, at least currently. */
+static void sun4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
+			     unsigned long arg2, unsigned long arg3,
+			     unsigned long arg4)
+{
+	if (smp_processors_ready) {
+		register int high = smp_highest_cpu;
+		unsigned long flags;
+
+		spin_lock_irqsave(&cross_call_lock, flags);
+
+		{
+			/*
+			 * If you make changes here, make sure
+			 * gcc generates proper code...
+			 */
+			register smpfunc_t f asm("i0") = func;
+			register unsigned long a1 asm("i1") = arg1;
+			register unsigned long a2 asm("i2") = arg2;
+			register unsigned long a3 asm("i3") = arg3;
+			register unsigned long a4 asm("i4") = arg4;
+			register unsigned long a5 asm("i5") = 0;
+
+			__asm__ __volatile__(
+				"std %0, [%6]\n\t"
+				"std %2, [%6 + 8]\n\t"
+				"std %4, [%6 + 16]\n\t" : :
+				"r"(f), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5),
+				"r" (&ccall_info.func));
+		}
+
+		/* Init receive/complete mapping, plus fire the IPI's off. */
+		{
+			register int i;
+
+			cpumask_clear_cpu(smp_processor_id(), &mask);
+			cpumask_and(&mask, cpu_online_mask, &mask);
+			for (i = 0; i <= high; i++) {
+				if (cpumask_test_cpu(i, &mask)) {
+					ccall_info.processors_in[i] = 0;
+					ccall_info.processors_out[i] = 0;
+					sun4d_send_ipi(i, IRQ_CROSS_CALL);
+				}
+			}
+		}
+
+		{
+			register int i;
+
+			i = 0;
+			do {
+				if (!cpumask_test_cpu(i, &mask))
+					continue;
+				while (!ccall_info.processors_in[i])
+					barrier();
+			} while (++i <= high);
+
+			i = 0;
+			do {
+				if (!cpumask_test_cpu(i, &mask))
+					continue;
+				while (!ccall_info.processors_out[i])
+					barrier();
+			} while (++i <= high);
+		}
+
+		spin_unlock_irqrestore(&cross_call_lock, flags);
+	}
+}
+
+/* Running cross calls. */
+void smp4d_cross_call_irq(void)
+{
+	int i = hard_smp_processor_id();
+
+	ccall_info.processors_in[i] = 1;
+	ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3,
+			ccall_info.arg4, ccall_info.arg5);
+	ccall_info.processors_out[i] = 1;
+}
+
+void smp4d_percpu_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+	int cpu = hard_smp_processor_id();
+	struct clock_event_device *ce;
+	static int cpu_tick[NR_CPUS];
+	static char led_mask[] = { 0xe, 0xd, 0xb, 0x7, 0xb, 0xd };
+
+	old_regs = set_irq_regs(regs);
+	bw_get_prof_limit(cpu);
+	bw_clear_intr_mask(0, 1);	/* INTR_TABLE[0] & 1 is Profile IRQ */
+
+	cpu_tick[cpu]++;
+	if (!(cpu_tick[cpu] & 15)) {
+		if (cpu_tick[cpu] == 0x60)
+			cpu_tick[cpu] = 0;
+		cpu_leds[cpu] = led_mask[cpu_tick[cpu] >> 4];
+		show_leds(cpu);
+	}
+
+	ce = &per_cpu(sparc32_clockevent, cpu);
+
+	irq_enter();
+	ce->event_handler(ce);
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+static const struct sparc32_ipi_ops sun4d_ipi_ops = {
+	.cross_call = sun4d_cross_call,
+	.resched    = sun4d_ipi_resched,
+	.single     = sun4d_ipi_single,
+	.mask_one   = sun4d_ipi_mask_one,
+};
+
+void __init sun4d_init_smp(void)
+{
+	int i;
+
+	/* Patch ipi15 trap table */
+	t_nmi[1] = t_nmi[1] + (linux_trap_ipi15_sun4d - linux_trap_ipi15_sun4m);
+
+	sparc32_ipi_ops = &sun4d_ipi_ops;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		ccall_info.processors_in[i] = 1;
+		ccall_info.processors_out[i] = 1;
+	}
+}
diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c
new file mode 100644
index 0000000..c01767a
--- /dev/null
+++ b/arch/sparc/kernel/sun4m_irq.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sun4m irq support
+ *
+ *  djhr: Hacked out of irq.c into a CPU dependent version.
+ *
+ *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1995 Miguel de Icaza (miguel@nuclecu.unam.mx)
+ *  Copyright (C) 1995 Pete A. Zaitcev (zaitcev@yahoo.com)
+ *  Copyright (C) 1996 Dave Redman (djhr@tadpole.co.uk)
+ */
+
+#include <linux/slab.h>
+#include <linux/sched/debug.h>
+
+#include <asm/timer.h>
+#include <asm/traps.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/cacheflush.h>
+
+#include "irq.h"
+#include "kernel.h"
+
+/* Sample sun4m IRQ layout:
+ *
+ * 0x22 - Power
+ * 0x24 - ESP SCSI
+ * 0x26 - Lance ethernet
+ * 0x2b - Floppy
+ * 0x2c - Zilog uart
+ * 0x32 - SBUS level 0
+ * 0x33 - Parallel port, SBUS level 1
+ * 0x35 - SBUS level 2
+ * 0x37 - SBUS level 3
+ * 0x39 - Audio, Graphics card, SBUS level 4
+ * 0x3b - SBUS level 5
+ * 0x3d - SBUS level 6
+ *
+ * Each interrupt source has a mask bit in the interrupt registers.
+ * When the mask bit is set, this blocks interrupt deliver.  So you
+ * clear the bit to enable the interrupt.
+ *
+ * Interrupts numbered less than 0x10 are software triggered interrupts
+ * and unused by Linux.
+ *
+ * Interrupt level assignment on sun4m:
+ *
+ *	level		source
+ * ------------------------------------------------------------
+ *	  1		softint-1
+ *	  2		softint-2, VME/SBUS level 1
+ *	  3		softint-3, VME/SBUS level 2
+ *	  4		softint-4, onboard SCSI
+ *	  5		softint-5, VME/SBUS level 3
+ *	  6		softint-6, onboard ETHERNET
+ *	  7		softint-7, VME/SBUS level 4
+ *	  8		softint-8, onboard VIDEO
+ *	  9		softint-9, VME/SBUS level 5, Module Interrupt
+ *	 10		softint-10, system counter/timer
+ *	 11		softint-11, VME/SBUS level 6, Floppy
+ *	 12		softint-12, Keyboard/Mouse, Serial
+ *	 13		softint-13, VME/SBUS level 7, ISDN Audio
+ *	 14		softint-14, per-processor counter/timer
+ *	 15		softint-15, Asynchronous Errors (broadcast)
+ *
+ * Each interrupt source is masked distinctly in the sun4m interrupt
+ * registers.  The PIL level alone is therefore ambiguous, since multiple
+ * interrupt sources map to a single PIL.
+ *
+ * This ambiguity is resolved in the 'intr' property for device nodes
+ * in the OF device tree.  Each 'intr' property entry is composed of
+ * two 32-bit words.  The first word is the IRQ priority value, which
+ * is what we're intersted in.  The second word is the IRQ vector, which
+ * is unused.
+ *
+ * The low 4 bits of the IRQ priority indicate the PIL, and the upper
+ * 4 bits indicate onboard vs. SBUS leveled vs. VME leveled.  0x20
+ * means onboard, 0x30 means SBUS leveled, and 0x40 means VME leveled.
+ *
+ * For example, an 'intr' IRQ priority value of 0x24 is onboard SCSI
+ * whereas a value of 0x33 is SBUS level 2.  Here are some sample
+ * 'intr' property IRQ priority values from ss4, ss5, ss10, ss20, and
+ * Tadpole S3 GX systems.
+ *
+ * esp:		0x24	onboard ESP SCSI
+ * le:		0x26	onboard Lance ETHERNET
+ * p9100:	0x32	SBUS level 1 P9100 video
+ * bpp:		0x33	SBUS level 2 BPP parallel port device
+ * DBRI:	0x39	SBUS level 5 DBRI ISDN audio
+ * SUNW,leo:	0x39	SBUS level 5 LEO video
+ * pcmcia:	0x3b	SBUS level 6 PCMCIA controller
+ * uctrl:	0x3b	SBUS level 6 UCTRL device
+ * modem:	0x3d	SBUS level 7 MODEM
+ * zs:		0x2c	onboard keyboard/mouse/serial
+ * floppy:	0x2b	onboard Floppy
+ * power:	0x22	onboard power device (XXX unknown mask bit XXX)
+ */
+
+
+/* Code in entry.S needs to get at these register mappings.  */
+struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS];
+struct sun4m_irq_global __iomem *sun4m_irq_global;
+
+struct sun4m_handler_data {
+	bool    percpu;
+	long    mask;
+};
+
+/* Dave Redman (djhr@tadpole.co.uk)
+ * The sun4m interrupt registers.
+ */
+#define SUN4M_INT_ENABLE	0x80000000
+#define SUN4M_INT_E14		0x00000080
+#define SUN4M_INT_E10		0x00080000
+
+#define	SUN4M_INT_MASKALL	0x80000000	  /* mask all interrupts */
+#define	SUN4M_INT_MODULE_ERR	0x40000000	  /* module error */
+#define	SUN4M_INT_M2S_WRITE_ERR	0x20000000	  /* write buffer error */
+#define	SUN4M_INT_ECC_ERR	0x10000000	  /* ecc memory error */
+#define	SUN4M_INT_VME_ERR	0x08000000	  /* vme async error */
+#define	SUN4M_INT_FLOPPY	0x00400000	  /* floppy disk */
+#define	SUN4M_INT_MODULE	0x00200000	  /* module interrupt */
+#define	SUN4M_INT_VIDEO		0x00100000	  /* onboard video */
+#define	SUN4M_INT_REALTIME	0x00080000	  /* system timer */
+#define	SUN4M_INT_SCSI		0x00040000	  /* onboard scsi */
+#define	SUN4M_INT_AUDIO		0x00020000	  /* audio/isdn */
+#define	SUN4M_INT_ETHERNET	0x00010000	  /* onboard ethernet */
+#define	SUN4M_INT_SERIAL	0x00008000	  /* serial ports */
+#define	SUN4M_INT_KBDMS		0x00004000	  /* keyboard/mouse */
+#define	SUN4M_INT_SBUSBITS	0x00003F80	  /* sbus int bits */
+#define	SUN4M_INT_VMEBITS	0x0000007F	  /* vme int bits */
+
+#define	SUN4M_INT_ERROR		(SUN4M_INT_MODULE_ERR |    \
+				 SUN4M_INT_M2S_WRITE_ERR | \
+				 SUN4M_INT_ECC_ERR |       \
+				 SUN4M_INT_VME_ERR)
+
+#define SUN4M_INT_SBUS(x)	(1 << (x+7))
+#define SUN4M_INT_VME(x)	(1 << (x))
+
+/* Interrupt levels used by OBP */
+#define	OBP_INT_LEVEL_SOFT	0x10
+#define	OBP_INT_LEVEL_ONBOARD	0x20
+#define	OBP_INT_LEVEL_SBUS	0x30
+#define	OBP_INT_LEVEL_VME	0x40
+
+#define SUN4M_TIMER_IRQ         (OBP_INT_LEVEL_ONBOARD | 10)
+#define SUN4M_PROFILE_IRQ       (OBP_INT_LEVEL_ONBOARD | 14)
+
+static unsigned long sun4m_imask[0x50] = {
+	/* 0x00 - SMP */
+	0,  SUN4M_SOFT_INT(1),
+	SUN4M_SOFT_INT(2),  SUN4M_SOFT_INT(3),
+	SUN4M_SOFT_INT(4),  SUN4M_SOFT_INT(5),
+	SUN4M_SOFT_INT(6),  SUN4M_SOFT_INT(7),
+	SUN4M_SOFT_INT(8),  SUN4M_SOFT_INT(9),
+	SUN4M_SOFT_INT(10), SUN4M_SOFT_INT(11),
+	SUN4M_SOFT_INT(12), SUN4M_SOFT_INT(13),
+	SUN4M_SOFT_INT(14), SUN4M_SOFT_INT(15),
+	/* 0x10 - soft */
+	0,  SUN4M_SOFT_INT(1),
+	SUN4M_SOFT_INT(2),  SUN4M_SOFT_INT(3),
+	SUN4M_SOFT_INT(4),  SUN4M_SOFT_INT(5),
+	SUN4M_SOFT_INT(6),  SUN4M_SOFT_INT(7),
+	SUN4M_SOFT_INT(8),  SUN4M_SOFT_INT(9),
+	SUN4M_SOFT_INT(10), SUN4M_SOFT_INT(11),
+	SUN4M_SOFT_INT(12), SUN4M_SOFT_INT(13),
+	SUN4M_SOFT_INT(14), SUN4M_SOFT_INT(15),
+	/* 0x20 - onboard */
+	0, 0, 0, 0,
+	SUN4M_INT_SCSI,  0, SUN4M_INT_ETHERNET, 0,
+	SUN4M_INT_VIDEO, SUN4M_INT_MODULE,
+	SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY,
+	(SUN4M_INT_SERIAL | SUN4M_INT_KBDMS),
+	SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR,
+	/* 0x30 - sbus */
+	0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1),
+	0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3),
+	0, SUN4M_INT_SBUS(4), 0, SUN4M_INT_SBUS(5),
+	0, SUN4M_INT_SBUS(6), 0, 0,
+	/* 0x40 - vme */
+	0, 0, SUN4M_INT_VME(0), SUN4M_INT_VME(1),
+	0, SUN4M_INT_VME(2), 0, SUN4M_INT_VME(3),
+	0, SUN4M_INT_VME(4), 0, SUN4M_INT_VME(5),
+	0, SUN4M_INT_VME(6), 0, 0
+};
+
+static void sun4m_mask_irq(struct irq_data *data)
+{
+	struct sun4m_handler_data *handler_data;
+	int cpu = smp_processor_id();
+
+	handler_data = irq_data_get_irq_handler_data(data);
+	if (handler_data->mask) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		if (handler_data->percpu) {
+			sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set);
+		} else {
+			sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+static void sun4m_unmask_irq(struct irq_data *data)
+{
+	struct sun4m_handler_data *handler_data;
+	int cpu = smp_processor_id();
+
+	handler_data = irq_data_get_irq_handler_data(data);
+	if (handler_data->mask) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		if (handler_data->percpu) {
+			sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear);
+		} else {
+			sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear);
+		}
+		local_irq_restore(flags);
+	}
+}
+
+static unsigned int sun4m_startup_irq(struct irq_data *data)
+{
+	irq_link(data->irq);
+	sun4m_unmask_irq(data);
+	return 0;
+}
+
+static void sun4m_shutdown_irq(struct irq_data *data)
+{
+	sun4m_mask_irq(data);
+	irq_unlink(data->irq);
+}
+
+static struct irq_chip sun4m_irq = {
+	.name		= "sun4m",
+	.irq_startup	= sun4m_startup_irq,
+	.irq_shutdown	= sun4m_shutdown_irq,
+	.irq_mask	= sun4m_mask_irq,
+	.irq_unmask	= sun4m_unmask_irq,
+};
+
+
+static unsigned int sun4m_build_device_irq(struct platform_device *op,
+					   unsigned int real_irq)
+{
+	struct sun4m_handler_data *handler_data;
+	unsigned int irq;
+	unsigned int pil;
+
+	if (real_irq >= OBP_INT_LEVEL_VME) {
+		prom_printf("Bogus sun4m IRQ %u\n", real_irq);
+		prom_halt();
+	}
+	pil = (real_irq & 0xf);
+	irq = irq_alloc(real_irq, pil);
+
+	if (irq == 0)
+		goto out;
+
+	handler_data = irq_get_handler_data(irq);
+	if (unlikely(handler_data))
+		goto out;
+
+	handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC);
+	if (unlikely(!handler_data)) {
+		prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n");
+		prom_halt();
+	}
+
+	handler_data->mask = sun4m_imask[real_irq];
+	handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD;
+	irq_set_chip_and_handler_name(irq, &sun4m_irq,
+	                              handle_level_irq, "level");
+	irq_set_handler_data(irq, handler_data);
+
+out:
+	return irq;
+}
+
+struct sun4m_timer_percpu {
+	u32		l14_limit;
+	u32		l14_count;
+	u32		l14_limit_noclear;
+	u32		user_timer_start_stop;
+};
+
+static struct sun4m_timer_percpu __iomem *timers_percpu[SUN4M_NCPUS];
+
+struct sun4m_timer_global {
+	u32		l10_limit;
+	u32		l10_count;
+	u32		l10_limit_noclear;
+	u32		reserved;
+	u32		timer_config;
+};
+
+static struct sun4m_timer_global __iomem *timers_global;
+
+static void sun4m_clear_clock_irq(void)
+{
+	sbus_readl(&timers_global->l10_limit);
+}
+
+void sun4m_nmi(struct pt_regs *regs)
+{
+	unsigned long afsr, afar, si;
+
+	printk(KERN_ERR "Aieee: sun4m NMI received!\n");
+	/* XXX HyperSparc hack XXX */
+	__asm__ __volatile__("mov 0x500, %%g1\n\t"
+			     "lda [%%g1] 0x4, %0\n\t"
+			     "mov 0x600, %%g1\n\t"
+			     "lda [%%g1] 0x4, %1\n\t" :
+			     "=r" (afsr), "=r" (afar));
+	printk(KERN_ERR "afsr=%08lx afar=%08lx\n", afsr, afar);
+	si = sbus_readl(&sun4m_irq_global->pending);
+	printk(KERN_ERR "si=%08lx\n", si);
+	if (si & SUN4M_INT_MODULE_ERR)
+		printk(KERN_ERR "Module async error\n");
+	if (si & SUN4M_INT_M2S_WRITE_ERR)
+		printk(KERN_ERR "MBus/SBus async error\n");
+	if (si & SUN4M_INT_ECC_ERR)
+		printk(KERN_ERR "ECC memory error\n");
+	if (si & SUN4M_INT_VME_ERR)
+		printk(KERN_ERR "VME async error\n");
+	printk(KERN_ERR "you lose buddy boy...\n");
+	show_regs(regs);
+	prom_halt();
+}
+
+void sun4m_unmask_profile_irq(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear);
+	local_irq_restore(flags);
+}
+
+void sun4m_clear_profile_irq(int cpu)
+{
+	sbus_readl(&timers_percpu[cpu]->l14_limit);
+}
+
+static void sun4m_load_profile_irq(int cpu, unsigned int limit)
+{
+	unsigned int value = limit ? timer_value(limit) : 0;
+	sbus_writel(value, &timers_percpu[cpu]->l14_limit);
+}
+
+static void __init sun4m_init_timers(void)
+{
+	struct device_node *dp = of_find_node_by_name(NULL, "counter");
+	int i, err, len, num_cpu_timers;
+	unsigned int irq;
+	const u32 *addr;
+
+	if (!dp) {
+		printk(KERN_ERR "sun4m_init_timers: No 'counter' node.\n");
+		return;
+	}
+
+	addr = of_get_property(dp, "address", &len);
+	of_node_put(dp);
+	if (!addr) {
+		printk(KERN_ERR "sun4m_init_timers: No 'address' prop.\n");
+		return;
+	}
+
+	num_cpu_timers = (len / sizeof(u32)) - 1;
+	for (i = 0; i < num_cpu_timers; i++) {
+		timers_percpu[i] = (void __iomem *)
+			(unsigned long) addr[i];
+	}
+	timers_global = (void __iomem *)
+		(unsigned long) addr[num_cpu_timers];
+
+	/* Every per-cpu timer works in timer mode */
+	sbus_writel(0x00000000, &timers_global->timer_config);
+
+#ifdef CONFIG_SMP
+	sparc_config.cs_period = SBUS_CLOCK_RATE * 2;  /* 2 seconds */
+	sparc_config.features |= FEAT_L14_ONESHOT;
+#else
+	sparc_config.cs_period = SBUS_CLOCK_RATE / HZ; /* 1/HZ sec  */
+	sparc_config.features |= FEAT_L10_CLOCKEVENT;
+#endif
+	sparc_config.features |= FEAT_L10_CLOCKSOURCE;
+	sbus_writel(timer_value(sparc_config.cs_period),
+	            &timers_global->l10_limit);
+
+	master_l10_counter = &timers_global->l10_count;
+
+	irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ);
+
+	err = request_irq(irq, timer_interrupt, IRQF_TIMER, "timer", NULL);
+	if (err) {
+		printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n",
+			err);
+		return;
+	}
+
+	for (i = 0; i < num_cpu_timers; i++)
+		sbus_writel(0, &timers_percpu[i]->l14_limit);
+	if (num_cpu_timers == 4)
+		sbus_writel(SUN4M_INT_E14, &sun4m_irq_global->mask_set);
+
+#ifdef CONFIG_SMP
+	{
+		unsigned long flags;
+		struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (14 - 1)];
+
+		/* For SMP we use the level 14 ticker, however the bootup code
+		 * has copied the firmware's level 14 vector into the boot cpu's
+		 * trap table, we must fix this now or we get squashed.
+		 */
+		local_irq_save(flags);
+		trap_table->inst_one = lvl14_save[0];
+		trap_table->inst_two = lvl14_save[1];
+		trap_table->inst_three = lvl14_save[2];
+		trap_table->inst_four = lvl14_save[3];
+		local_ops->cache_all();
+		local_irq_restore(flags);
+	}
+#endif
+}
+
+void __init sun4m_init_IRQ(void)
+{
+	struct device_node *dp = of_find_node_by_name(NULL, "interrupt");
+	int len, i, mid, num_cpu_iregs;
+	const u32 *addr;
+
+	if (!dp) {
+		printk(KERN_ERR "sun4m_init_IRQ: No 'interrupt' node.\n");
+		return;
+	}
+
+	addr = of_get_property(dp, "address", &len);
+	of_node_put(dp);
+	if (!addr) {
+		printk(KERN_ERR "sun4m_init_IRQ: No 'address' prop.\n");
+		return;
+	}
+
+	num_cpu_iregs = (len / sizeof(u32)) - 1;
+	for (i = 0; i < num_cpu_iregs; i++) {
+		sun4m_irq_percpu[i] = (void __iomem *)
+			(unsigned long) addr[i];
+	}
+	sun4m_irq_global = (void __iomem *)
+		(unsigned long) addr[num_cpu_iregs];
+
+	local_irq_disable();
+
+	sbus_writel(~SUN4M_INT_MASKALL, &sun4m_irq_global->mask_set);
+	for (i = 0; !cpu_find_by_instance(i, NULL, &mid); i++)
+		sbus_writel(~0x17fff, &sun4m_irq_percpu[mid]->clear);
+
+	if (num_cpu_iregs == 4)
+		sbus_writel(0, &sun4m_irq_global->interrupt_target);
+
+	sparc_config.init_timers      = sun4m_init_timers;
+	sparc_config.build_device_irq = sun4m_build_device_irq;
+	sparc_config.clock_rate       = SBUS_CLOCK_RATE;
+	sparc_config.clear_clock_irq  = sun4m_clear_clock_irq;
+	sparc_config.load_profile_irq = sun4m_load_profile_irq;
+
+
+	/* Cannot enable interrupts until OBP ticker is disabled. */
+}
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
new file mode 100644
index 0000000..228a652
--- /dev/null
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  sun4m SMP support.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/profile.h>
+#include <linux/delay.h>
+#include <linux/sched/mm.h>
+#include <linux/cpu.h>
+
+#include <asm/cacheflush.h>
+#include <asm/switch_to.h>
+#include <asm/tlbflush.h>
+#include <asm/timer.h>
+#include <asm/oplib.h>
+
+#include "irq.h"
+#include "kernel.h"
+
+#define IRQ_IPI_SINGLE		12
+#define IRQ_IPI_MASK		13
+#define IRQ_IPI_RESCHED		14
+#define IRQ_CROSS_CALL		15
+
+static inline unsigned long
+swap_ulong(volatile unsigned long *ptr, unsigned long val)
+{
+	__asm__ __volatile__("swap [%1], %0\n\t" :
+			     "=&r" (val), "=&r" (ptr) :
+			     "0" (val), "1" (ptr));
+	return val;
+}
+
+void sun4m_cpu_pre_starting(void *arg)
+{
+}
+
+void sun4m_cpu_pre_online(void *arg)
+{
+	int cpuid = hard_smp_processor_id();
+
+	/* Allow master to continue. The master will then give us the
+	 * go-ahead by setting the smp_commenced_mask and will wait without
+	 * timeouts until our setup is completed fully (signified by
+	 * our bit being set in the cpu_online_mask).
+	 */
+	swap_ulong(&cpu_callin_map[cpuid], 1);
+
+	/* XXX: What's up with all the flushes? */
+	local_ops->cache_all();
+	local_ops->tlb_all();
+
+	/* Fix idle thread fields. */
+	__asm__ __volatile__("ld [%0], %%g6\n\t"
+			     : : "r" (&current_set[cpuid])
+			     : "memory" /* paranoid */);
+
+	/* Attach to the address space of init_task. */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+
+	while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
+		mb();
+}
+
+/*
+ *	Cycle through the processors asking the PROM to start each one.
+ */
+void __init smp4m_boot_cpus(void)
+{
+	sun4m_unmask_profile_irq();
+	local_ops->cache_all();
+}
+
+int smp4m_boot_one_cpu(int i, struct task_struct *idle)
+{
+	unsigned long *entry = &sun4m_cpu_startup;
+	int timeout;
+	int cpu_node;
+
+	cpu_find_by_mid(i, &cpu_node);
+	current_set[i] = task_thread_info(idle);
+
+	/* See trampoline.S for details... */
+	entry += ((i - 1) * 3);
+
+	/*
+	 * Initialize the contexts table
+	 * Since the call to prom_startcpu() trashes the structure,
+	 * we need to re-initialize it for each cpu
+	 */
+	smp_penguin_ctable.which_io = 0;
+	smp_penguin_ctable.phys_addr = (unsigned int) srmmu_ctx_table_phys;
+	smp_penguin_ctable.reg_size = 0;
+
+	/* whirrr, whirrr, whirrrrrrrrr... */
+	printk(KERN_INFO "Starting CPU %d at %p\n", i, entry);
+	local_ops->cache_all();
+	prom_startcpu(cpu_node, &smp_penguin_ctable, 0, (char *)entry);
+
+	/* wheee... it's going... */
+	for (timeout = 0; timeout < 10000; timeout++) {
+		if (cpu_callin_map[i])
+			break;
+		udelay(200);
+	}
+
+	if (!(cpu_callin_map[i])) {
+		printk(KERN_ERR "Processor %d is stuck.\n", i);
+		return -ENODEV;
+	}
+
+	local_ops->cache_all();
+	return 0;
+}
+
+void __init smp4m_smp_done(void)
+{
+	int i, first;
+	int *prev;
+
+	/* setup cpu list for irq rotation */
+	first = 0;
+	prev = &first;
+	for_each_online_cpu(i) {
+		*prev = i;
+		prev = &cpu_data(i).next;
+	}
+	*prev = first;
+	local_ops->cache_all();
+
+	/* Ok, they are spinning and ready to go. */
+}
+
+static void sun4m_send_ipi(int cpu, int level)
+{
+	sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set);
+}
+
+static void sun4m_ipi_resched(int cpu)
+{
+	sun4m_send_ipi(cpu, IRQ_IPI_RESCHED);
+}
+
+static void sun4m_ipi_single(int cpu)
+{
+	sun4m_send_ipi(cpu, IRQ_IPI_SINGLE);
+}
+
+static void sun4m_ipi_mask_one(int cpu)
+{
+	sun4m_send_ipi(cpu, IRQ_IPI_MASK);
+}
+
+static struct smp_funcall {
+	smpfunc_t func;
+	unsigned long arg1;
+	unsigned long arg2;
+	unsigned long arg3;
+	unsigned long arg4;
+	unsigned long arg5;
+	unsigned long processors_in[SUN4M_NCPUS];  /* Set when ipi entered. */
+	unsigned long processors_out[SUN4M_NCPUS]; /* Set when ipi exited. */
+} ccall_info;
+
+static DEFINE_SPINLOCK(cross_call_lock);
+
+/* Cross calls must be serialized, at least currently. */
+static void sun4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
+			     unsigned long arg2, unsigned long arg3,
+			     unsigned long arg4)
+{
+		register int ncpus = SUN4M_NCPUS;
+		unsigned long flags;
+
+		spin_lock_irqsave(&cross_call_lock, flags);
+
+		/* Init function glue. */
+		ccall_info.func = func;
+		ccall_info.arg1 = arg1;
+		ccall_info.arg2 = arg2;
+		ccall_info.arg3 = arg3;
+		ccall_info.arg4 = arg4;
+		ccall_info.arg5 = 0;
+
+		/* Init receive/complete mapping, plus fire the IPI's off. */
+		{
+			register int i;
+
+			cpumask_clear_cpu(smp_processor_id(), &mask);
+			cpumask_and(&mask, cpu_online_mask, &mask);
+			for (i = 0; i < ncpus; i++) {
+				if (cpumask_test_cpu(i, &mask)) {
+					ccall_info.processors_in[i] = 0;
+					ccall_info.processors_out[i] = 0;
+					sun4m_send_ipi(i, IRQ_CROSS_CALL);
+				} else {
+					ccall_info.processors_in[i] = 1;
+					ccall_info.processors_out[i] = 1;
+				}
+			}
+		}
+
+		{
+			register int i;
+
+			i = 0;
+			do {
+				if (!cpumask_test_cpu(i, &mask))
+					continue;
+				while (!ccall_info.processors_in[i])
+					barrier();
+			} while (++i < ncpus);
+
+			i = 0;
+			do {
+				if (!cpumask_test_cpu(i, &mask))
+					continue;
+				while (!ccall_info.processors_out[i])
+					barrier();
+			} while (++i < ncpus);
+		}
+		spin_unlock_irqrestore(&cross_call_lock, flags);
+}
+
+/* Running cross calls. */
+void smp4m_cross_call_irq(void)
+{
+	int i = smp_processor_id();
+
+	ccall_info.processors_in[i] = 1;
+	ccall_info.func(ccall_info.arg1, ccall_info.arg2, ccall_info.arg3,
+			ccall_info.arg4, ccall_info.arg5);
+	ccall_info.processors_out[i] = 1;
+}
+
+void smp4m_percpu_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+	struct clock_event_device *ce;
+	int cpu = smp_processor_id();
+
+	old_regs = set_irq_regs(regs);
+
+	ce = &per_cpu(sparc32_clockevent, cpu);
+
+	if (clockevent_state_periodic(ce))
+		sun4m_clear_profile_irq(cpu);
+	else
+		sparc_config.load_profile_irq(cpu, 0); /* Is this needless? */
+
+	irq_enter();
+	ce->event_handler(ce);
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+static const struct sparc32_ipi_ops sun4m_ipi_ops = {
+	.cross_call = sun4m_cross_call,
+	.resched    = sun4m_ipi_resched,
+	.single     = sun4m_ipi_single,
+	.mask_one   = sun4m_ipi_mask_one,
+};
+
+void __init sun4m_init_smp(void)
+{
+	sparc32_ipi_ops = &sun4m_ipi_ops;
+}
diff --git a/arch/sparc/kernel/sun4v_ivec.S b/arch/sparc/kernel/sun4v_ivec.S
new file mode 100644
index 0000000..6478ef4
--- /dev/null
+++ b/arch/sparc/kernel/sun4v_ivec.S
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* sun4v_ivec.S: Sun4v interrupt vector handling.
+ *
+ * Copyright (C) 2006 <davem@davemloft.net>
+ */
+
+#include <asm/cpudata.h>
+#include <asm/intr_queue.h>
+#include <asm/pil.h>
+
+	.text
+	.align	32
+
+sun4v_cpu_mondo:
+	/* Head offset in %g2, tail offset in %g4.
+	 * If they are the same, no work.
+	 */
+	mov	INTRQ_CPU_MONDO_HEAD, %g2
+	ldxa	[%g2] ASI_QUEUE, %g2
+	mov	INTRQ_CPU_MONDO_TAIL, %g4
+	ldxa	[%g4] ASI_QUEUE, %g4
+	cmp	%g2, %g4
+	be,pn	%xcc, sun4v_cpu_mondo_queue_empty
+	 nop
+
+	/* Get &trap_block[smp_processor_id()] into %g4.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g4
+	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
+
+	/* Get smp_processor_id() into %g3 */
+	sethi	%hi(trap_block), %g5
+	or	%g5, %lo(trap_block), %g5
+	sub	%g4, %g5, %g3
+	srlx	%g3, TRAP_BLOCK_SZ_SHIFT, %g3
+
+	/* Increment cpu_mondo_counter[smp_processor_id()] */
+	sethi	%hi(cpu_mondo_counter), %g5
+	or	%g5, %lo(cpu_mondo_counter), %g5
+	sllx	%g3, 3, %g3
+	add	%g5, %g3, %g5
+	ldx	[%g5], %g3
+	add	%g3, 1, %g3
+	stx	%g3, [%g5]
+
+	/* Get CPU mondo queue base phys address into %g7.  */
+	ldx	[%g4 + TRAP_PER_CPU_CPU_MONDO_PA], %g7
+
+	/* Now get the cross-call arguments and handler PC, same
+	 * layout as sun4u:
+	 *
+	 * 1st 64-bit word: low half is 32-bit PC, put into %g3 and jmpl to it
+	 *                  high half is context arg to MMU flushes, into %g5
+	 * 2nd 64-bit word: 64-bit arg, load into %g1
+	 * 3rd 64-bit word: 64-bit arg, load into %g7
+	 */
+	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g3
+	add	%g2, 0x8, %g2
+	srlx	%g3, 32, %g5
+	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g1
+	add	%g2, 0x8, %g2
+	srl	%g3, 0, %g3
+	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g7
+	add	%g2, 0x40 - 0x8 - 0x8, %g2
+
+	/* Update queue head pointer.  */
+	lduw	[%g4 + TRAP_PER_CPU_CPU_MONDO_QMASK], %g4
+	and	%g2, %g4, %g2
+
+	mov	INTRQ_CPU_MONDO_HEAD, %g4
+	stxa	%g2, [%g4] ASI_QUEUE
+	membar	#Sync
+
+	jmpl	%g3, %g0
+	 nop
+
+sun4v_cpu_mondo_queue_empty:
+	retry
+
+sun4v_dev_mondo:
+	/* Head offset in %g2, tail offset in %g4.  */
+	mov	INTRQ_DEVICE_MONDO_HEAD, %g2
+	ldxa	[%g2] ASI_QUEUE, %g2
+	mov	INTRQ_DEVICE_MONDO_TAIL, %g4
+	ldxa	[%g4] ASI_QUEUE, %g4
+	cmp	%g2, %g4
+	be,pn	%xcc, sun4v_dev_mondo_queue_empty
+	 nop
+
+	/* Get &trap_block[smp_processor_id()] into %g4.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g4
+	sub	%g4, TRAP_PER_CPU_FAULT_INFO, %g4
+
+	/* Get DEV mondo queue base phys address into %g5.  */
+	ldx	[%g4 + TRAP_PER_CPU_DEV_MONDO_PA], %g5
+
+	/* Load IVEC into %g3.  */
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	add	%g2, 0x40, %g2
+
+	/* XXX There can be a full 64-byte block of data here.
+	 * XXX This is how we can get at MSI vector data.
+	 * XXX Current we do not capture this, but when we do we'll
+	 * XXX need to add a 64-byte storage area in the struct ino_bucket
+	 * XXX or the struct irq_desc.
+	 */
+
+	/* Update queue head pointer, this frees up some registers.  */
+	lduw	[%g4 + TRAP_PER_CPU_DEV_MONDO_QMASK], %g4
+	and	%g2, %g4, %g2
+
+	mov	INTRQ_DEVICE_MONDO_HEAD, %g4
+	stxa	%g2, [%g4] ASI_QUEUE
+	membar	#Sync
+
+	TRAP_LOAD_IRQ_WORK_PA(%g1, %g4)
+
+	/* For VIRQs, cookie is encoded as ~bucket_phys_addr  */
+	brlz,pt %g3, 1f
+	 xnor	%g3, %g0, %g4
+
+	/* Get __pa(&ivector_table[IVEC]) into %g4.  */
+	sethi	%hi(ivector_table_pa), %g4
+	ldx	[%g4 + %lo(ivector_table_pa)], %g4
+	sllx	%g3, 4, %g3
+	add	%g4, %g3, %g4
+
+1:	ldx	[%g1], %g2
+	stxa	%g2, [%g4] ASI_PHYS_USE_EC
+	stx	%g4, [%g1]
+
+	/* Signal the interrupt by setting (1 << pil) in %softint.  */
+	wr	%g0, 1 << PIL_DEVICE_IRQ, %set_softint
+
+sun4v_dev_mondo_queue_empty:
+	retry
+
+sun4v_res_mondo:
+	/* Head offset in %g2, tail offset in %g4.  */
+	mov	INTRQ_RESUM_MONDO_HEAD, %g2
+	ldxa	[%g2] ASI_QUEUE, %g2
+	mov	INTRQ_RESUM_MONDO_TAIL, %g4
+	ldxa	[%g4] ASI_QUEUE, %g4
+	cmp	%g2, %g4
+	be,pn	%xcc, sun4v_res_mondo_queue_empty
+	 nop
+
+	/* Get &trap_block[smp_processor_id()] into %g3.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g3
+	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
+
+	/* Get RES mondo queue base phys address into %g5.  */
+	ldx	[%g3 + TRAP_PER_CPU_RESUM_MONDO_PA], %g5
+
+	/* Get RES kernel buffer base phys address into %g7.  */
+	ldx	[%g3 + TRAP_PER_CPU_RESUM_KBUF_PA], %g7
+
+	/* If the first word is non-zero, queue is full.  */
+	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g1
+	brnz,pn	%g1, sun4v_res_mondo_queue_full
+	 nop
+
+	lduw	[%g3 + TRAP_PER_CPU_RESUM_QMASK], %g4
+
+	/* Remember this entry's offset in %g1.  */
+	mov	%g2, %g1
+
+	/* Copy 64-byte queue entry into kernel buffer.  */
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+
+	/* Update queue head pointer.  */
+	and	%g2, %g4, %g2
+
+	mov	INTRQ_RESUM_MONDO_HEAD, %g4
+	stxa	%g2, [%g4] ASI_QUEUE
+	membar	#Sync
+
+	/* Disable interrupts and save register state so we can call
+	 * C code.  The etrap handling will leave %g4 in %l4 for us
+	 * when it's done.
+	 */
+	rdpr	%pil, %g2
+	wrpr	%g0, PIL_NORMAL_MAX, %pil
+	mov	%g1, %g4
+	ba,pt	%xcc, etrap_irq
+	 rd	%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	/* Log the event.  */
+	add	%sp, PTREGS_OFF, %o0
+	call	sun4v_resum_error
+	 mov	%l4, %o1
+
+	/* Return from trap.  */
+	ba,pt	%xcc, rtrap_irq
+	 nop
+
+sun4v_res_mondo_queue_empty:
+	retry
+
+sun4v_res_mondo_queue_full:
+	/* The queue is full, consolidate our damage by setting
+	 * the head equal to the tail.  We'll just trap again otherwise.
+	 * Call C code to log the event.
+	 */
+	mov	INTRQ_RESUM_MONDO_HEAD, %g2
+	stxa	%g4, [%g2] ASI_QUEUE
+	membar	#Sync
+
+	rdpr	%pil, %g2
+	wrpr	%g0, PIL_NORMAL_MAX, %pil
+	ba,pt	%xcc, etrap_irq
+	 rd	%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	call	sun4v_resum_overflow
+	 add	%sp, PTREGS_OFF, %o0
+
+	ba,pt	%xcc, rtrap_irq
+	 nop
+
+sun4v_nonres_mondo:
+	/* Head offset in %g2, tail offset in %g4.  */
+	mov	INTRQ_NONRESUM_MONDO_HEAD, %g2
+	ldxa	[%g2] ASI_QUEUE, %g2
+	mov	INTRQ_NONRESUM_MONDO_TAIL, %g4
+	ldxa	[%g4] ASI_QUEUE, %g4
+	cmp	%g2, %g4
+	be,pn	%xcc, sun4v_nonres_mondo_queue_empty
+	 nop
+
+	/* Get &trap_block[smp_processor_id()] into %g3.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g3
+	sub	%g3, TRAP_PER_CPU_FAULT_INFO, %g3
+
+	/* Get RES mondo queue base phys address into %g5.  */
+	ldx	[%g3 + TRAP_PER_CPU_NONRESUM_MONDO_PA], %g5
+
+	/* Get RES kernel buffer base phys address into %g7.  */
+	ldx	[%g3 + TRAP_PER_CPU_NONRESUM_KBUF_PA], %g7
+
+	/* If the first word is non-zero, queue is full.  */
+	ldxa	[%g7 + %g2] ASI_PHYS_USE_EC, %g1
+	brnz,pn	%g1, sun4v_nonres_mondo_queue_full
+	 nop
+
+	lduw	[%g3 + TRAP_PER_CPU_NONRESUM_QMASK], %g4
+
+	/* Remember this entry's offset in %g1.  */
+	mov	%g2, %g1
+
+	/* Copy 64-byte queue entry into kernel buffer.  */
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+	ldxa	[%g5 + %g2] ASI_PHYS_USE_EC, %g3
+	stxa	%g3, [%g7 + %g2] ASI_PHYS_USE_EC
+	add	%g2, 0x08, %g2
+
+	/* Update queue head pointer.  */
+	and	%g2, %g4, %g2
+
+	mov	INTRQ_NONRESUM_MONDO_HEAD, %g4
+	stxa	%g2, [%g4] ASI_QUEUE
+	membar	#Sync
+
+	/* Disable interrupts and save register state so we can call
+	 * C code.  The etrap handling will leave %g4 in %l4 for us
+	 * when it's done.
+	 */
+	rdpr	%pil, %g2
+	wrpr	%g0, PIL_NORMAL_MAX, %pil
+	mov	%g1, %g4
+	ba,pt	%xcc, etrap_irq
+	 rd	%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	/* Log the event.  */
+	add	%sp, PTREGS_OFF, %o0
+	call	sun4v_nonresum_error
+	 mov	%l4, %o1
+
+	/* Return from trap.  */
+	ba,pt	%xcc, rtrap_irq
+	 nop
+
+sun4v_nonres_mondo_queue_empty:
+	retry
+
+sun4v_nonres_mondo_queue_full:
+	/* The queue is full, consolidate our damage by setting
+	 * the head equal to the tail.  We'll just trap again otherwise.
+	 * Call C code to log the event.
+	 */
+	mov	INTRQ_NONRESUM_MONDO_HEAD, %g2
+	stxa	%g4, [%g2] ASI_QUEUE
+	membar	#Sync
+
+	rdpr	%pil, %g2
+	wrpr	%g0, PIL_NORMAL_MAX, %pil
+	ba,pt	%xcc, etrap_irq
+	 rd	%pc, %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	call	sun4v_nonresum_overflow
+	 add	%sp, PTREGS_OFF, %o0
+
+	ba,pt	%xcc, rtrap_irq
+	 nop
diff --git a/arch/sparc/kernel/sun4v_mcd.S b/arch/sparc/kernel/sun4v_mcd.S
new file mode 100644
index 0000000..d6c69eb
--- /dev/null
+++ b/arch/sparc/kernel/sun4v_mcd.S
@@ -0,0 +1,18 @@
+/* sun4v_mcd.S: Sun4v memory corruption detected precise exception handler
+ *
+ * Copyright (c) 2015 Oracle and/or its affiliates. All rights reserved.
+ * Authors: Bob Picco <bob.picco@oracle.com>,
+ *          Khalid Aziz <khalid.aziz@oracle.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+	.text
+	.align 32
+
+sun4v_mcd_detect_precise:
+	mov	%l4, %o1
+	mov 	%l5, %o2
+	call	sun4v_mem_corrupt_detect_precise
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+	 nop
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
new file mode 100644
index 0000000..7ac9f33
--- /dev/null
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -0,0 +1,437 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* sun4v_tlb_miss.S: Sun4v TLB miss handlers.
+ *
+ * Copyright (C) 2006 <davem@davemloft.net>
+ */
+
+	.text
+	.align	32
+
+	/* Load ITLB fault information into VADDR and CTX, using BASE.  */
+#define LOAD_ITLB_INFO(BASE, VADDR, CTX) \
+	ldx	[BASE + HV_FAULT_I_ADDR_OFFSET], VADDR; \
+	ldx	[BASE + HV_FAULT_I_CTX_OFFSET], CTX;
+
+	/* Load DTLB fault information into VADDR and CTX, using BASE.  */
+#define LOAD_DTLB_INFO(BASE, VADDR, CTX) \
+	ldx	[BASE + HV_FAULT_D_ADDR_OFFSET], VADDR; \
+	ldx	[BASE + HV_FAULT_D_CTX_OFFSET], CTX;
+
+	/* DEST = (VADDR >> 22)
+	 *
+	 * Branch to ZERO_CTX_LABEL if context is zero.
+	 */
+#define	COMPUTE_TAG_TARGET(DEST, VADDR, CTX, ZERO_CTX_LABEL) \
+	srlx	VADDR, 22, DEST; \
+	brz,pn	CTX, ZERO_CTX_LABEL; \
+	 nop;
+
+	/* Create TSB pointer.  This is something like:
+	 *
+	 * index_mask = (512 << (tsb_reg & 0x7UL)) - 1UL;
+	 * tsb_base = tsb_reg & ~0x7UL;
+	 * tsb_index = ((vaddr >> HASH_SHIFT) & tsb_mask);
+	 * tsb_ptr = tsb_base + (tsb_index * 16);
+	 */
+#define COMPUTE_TSB_PTR(TSB_PTR, VADDR, HASH_SHIFT, TMP1, TMP2) \
+	and	TSB_PTR, 0x7, TMP1;			\
+	mov	512, TMP2;				\
+	andn	TSB_PTR, 0x7, TSB_PTR;			\
+	sllx	TMP2, TMP1, TMP2;			\
+	srlx	VADDR, HASH_SHIFT, TMP1;		\
+	sub	TMP2, 1, TMP2;				\
+	and	TMP1, TMP2, TMP1;			\
+	sllx	TMP1, 4, TMP1;				\
+	add	TSB_PTR, TMP1, TSB_PTR;
+
+sun4v_itlb_miss:
+	/* Load MMU Miss base into %g2.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	
+	/* Load UTSB reg into %g1.  */
+	mov	SCRATCHPAD_UTSBREG1, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
+
+	LOAD_ITLB_INFO(%g2, %g4, %g5)
+	COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_itlb_4v)
+	COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
+
+	/* Load TSB tag/pte into %g2/%g3 and compare the tag.  */
+	ldda	[%g1] ASI_QUAD_LDD_PHYS_4V, %g2
+	cmp	%g2, %g6
+	bne,a,pn %xcc, tsb_miss_page_table_walk
+	 mov	FAULT_CODE_ITLB, %g3
+	andcc	%g3, _PAGE_EXEC_4V, %g0
+	be,a,pn	%xcc, tsb_do_fault
+	 mov	FAULT_CODE_ITLB, %g3
+
+	/* We have a valid entry, make hypervisor call to load
+	 * I-TLB and return from trap.
+	 *
+	 * %g3:	PTE
+	 * %g4:	vaddr
+	 */
+sun4v_itlb_load:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g6
+	mov	%o0, %g1		! save %o0
+	mov	%o1, %g2		! save %o1
+	mov	%o2, %g5		! save %o2
+	mov	%o3, %g7		! save %o3
+	mov	%g4, %o0		! vaddr
+	ldx	[%g6 + HV_FAULT_I_CTX_OFFSET], %o1	! ctx
+	mov	%g3, %o2		! PTE
+	mov	HV_MMU_IMMU, %o3	! flags
+	ta	HV_MMU_MAP_ADDR_TRAP
+	brnz,pn	%o0, sun4v_itlb_error
+	 mov	%g2, %o1		! restore %o1
+	mov	%g1, %o0		! restore %o0
+	mov	%g5, %o2		! restore %o2
+	mov	%g7, %o3		! restore %o3
+
+	retry
+
+sun4v_dtlb_miss:
+	/* Load MMU Miss base into %g2.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	
+	/* Load UTSB reg into %g1.  */
+	mov	SCRATCHPAD_UTSBREG1, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
+
+	LOAD_DTLB_INFO(%g2, %g4, %g5)
+	COMPUTE_TAG_TARGET(%g6, %g4, %g5, kvmap_dtlb_4v)
+	COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g3, %g7)
+
+	/* Load TSB tag/pte into %g2/%g3 and compare the tag.  */
+	ldda	[%g1] ASI_QUAD_LDD_PHYS_4V, %g2
+	cmp	%g2, %g6
+	bne,a,pn %xcc, tsb_miss_page_table_walk
+	 mov	FAULT_CODE_DTLB, %g3
+
+	/* We have a valid entry, make hypervisor call to load
+	 * D-TLB and return from trap.
+	 *
+	 * %g3:	PTE
+	 * %g4:	vaddr
+	 */
+sun4v_dtlb_load:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g6
+	mov	%o0, %g1		! save %o0
+	mov	%o1, %g2		! save %o1
+	mov	%o2, %g5		! save %o2
+	mov	%o3, %g7		! save %o3
+	mov	%g4, %o0		! vaddr
+	ldx	[%g6 + HV_FAULT_D_CTX_OFFSET], %o1	! ctx
+	mov	%g3, %o2		! PTE
+	mov	HV_MMU_DMMU, %o3	! flags
+	ta	HV_MMU_MAP_ADDR_TRAP
+	brnz,pn	%o0, sun4v_dtlb_error
+	 mov	%g2, %o1		! restore %o1
+	mov	%g1, %o0		! restore %o0
+	mov	%g5, %o2		! restore %o2
+	mov	%g7, %o3		! restore %o3
+
+	retry
+
+sun4v_dtlb_prot:
+	SET_GL(1)
+
+	/* Load MMU Miss base into %g5.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g5
+	
+	ldx	[%g5 + HV_FAULT_D_ADDR_OFFSET], %g5
+	rdpr	%tl, %g1
+	cmp	%g1, 1
+	bgu,pn	%xcc, winfix_trampoline
+	 mov	FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
+	ba,pt	%xcc, sparc64_realfault_common
+	 nop
+
+	/* Called from trap table:
+	 * %g4:	vaddr
+	 * %g5:	context
+	 * %g6: TAG TARGET
+	 */
+sun4v_itsb_miss:
+	mov	SCRATCHPAD_UTSBREG1, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
+	brz,pn	%g5, kvmap_itlb_4v
+	 mov	FAULT_CODE_ITLB, %g3
+	ba,a,pt	%xcc, sun4v_tsb_miss_common
+
+	/* Called from trap table:
+	 * %g4:	vaddr
+	 * %g5:	context
+	 * %g6: TAG TARGET
+	 */
+sun4v_dtsb_miss:
+	mov	SCRATCHPAD_UTSBREG1, %g1
+	ldxa	[%g1] ASI_SCRATCHPAD, %g1
+	brz,pn	%g5, kvmap_dtlb_4v
+	 mov	FAULT_CODE_DTLB, %g3
+
+	/* fallthrough */
+
+sun4v_tsb_miss_common:
+	COMPUTE_TSB_PTR(%g1, %g4, PAGE_SHIFT, %g5, %g7)
+
+	sub	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	mov	SCRATCHPAD_UTSBREG2, %g5
+	ldxa	[%g5] ASI_SCRATCHPAD, %g5
+	cmp	%g5, -1
+	be,pt	%xcc, 80f
+	 nop
+	COMPUTE_TSB_PTR(%g5, %g4, REAL_HPAGE_SHIFT, %g2, %g7)
+
+	/* That clobbered %g2, reload it.  */
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	sub	%g2, TRAP_PER_CPU_FAULT_INFO, %g2
+
+80:	stx	%g5, [%g2 + TRAP_PER_CPU_TSB_HUGE_TEMP]
+#endif
+
+	ba,pt	%xcc, tsb_miss_page_table_walk_sun4v_fastpath
+	 ldx	[%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
+
+sun4v_itlb_error:
+	rdpr	%tl, %g1
+	cmp	%g1, 1
+	ble,pt	%icc, sun4v_bad_ra
+	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1
+
+	sethi	%hi(sun4v_err_itlb_vaddr), %g1
+	stx	%g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
+	sethi	%hi(sun4v_err_itlb_ctx), %g1
+	ldxa	[%g0] ASI_SCRATCHPAD, %g6
+	ldx	[%g6 + HV_FAULT_I_CTX_OFFSET], %o1
+	stx	%o1, [%g1 + %lo(sun4v_err_itlb_ctx)]
+	sethi	%hi(sun4v_err_itlb_pte), %g1
+	stx	%g3, [%g1 + %lo(sun4v_err_itlb_pte)]
+	sethi	%hi(sun4v_err_itlb_error), %g1
+	stx	%o0, [%g1 + %lo(sun4v_err_itlb_error)]
+
+	sethi	%hi(1f), %g7
+	rdpr	%tl, %g4
+	ba,pt	%xcc, etraptl1
+1:	 or	%g7, %lo(1f), %g7
+	mov	%l4, %o1
+	call	sun4v_itlb_error_report
+	 add	%sp, PTREGS_OFF, %o0
+
+	/* NOTREACHED */
+
+sun4v_dtlb_error:
+	rdpr	%tl, %g1
+	cmp	%g1, 1
+	ble,pt	%icc, sun4v_bad_ra
+	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1
+
+	sethi	%hi(sun4v_err_dtlb_vaddr), %g1
+	stx	%g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
+	sethi	%hi(sun4v_err_dtlb_ctx), %g1
+	ldxa	[%g0] ASI_SCRATCHPAD, %g6
+	ldx	[%g6 + HV_FAULT_D_CTX_OFFSET], %o1
+	stx	%o1, [%g1 + %lo(sun4v_err_dtlb_ctx)]
+	sethi	%hi(sun4v_err_dtlb_pte), %g1
+	stx	%g3, [%g1 + %lo(sun4v_err_dtlb_pte)]
+	sethi	%hi(sun4v_err_dtlb_error), %g1
+	stx	%o0, [%g1 + %lo(sun4v_err_dtlb_error)]
+
+	sethi	%hi(1f), %g7
+	rdpr	%tl, %g4
+	ba,pt	%xcc, etraptl1
+1:	 or	%g7, %lo(1f), %g7
+	mov	%l4, %o1
+	call	sun4v_dtlb_error_report
+	 add	%sp, PTREGS_OFF, %o0
+
+	/* NOTREACHED */
+
+sun4v_bad_ra:
+	or	%g0, %g4, %g5
+	ba,pt	%xcc, sparc64_realfault_common
+	 or	%g1, %g0, %g4
+
+	/* NOTREACHED */
+
+	/* Instruction Access Exception, tl0. */
+sun4v_iacc:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5
+	sllx	%g3, 16, %g3
+	or	%g5, %g3, %g5
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	mov	%l4, %o1
+	mov	%l5, %o2
+	call	sun4v_insn_access_exception
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+	/* Instruction Access Exception, tl1. */
+sun4v_iacc_tl1:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_I_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_I_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_I_CTX_OFFSET], %g5
+	sllx	%g3, 16, %g3
+	or	%g5, %g3, %g5
+	ba,pt	%xcc, etraptl1
+	 rd	%pc, %g7
+	mov	%l4, %o1
+	mov	%l5, %o2
+	call	sun4v_insn_access_exception_tl1
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+	/* Data Access Exception, tl0. */
+sun4v_dacc:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+	sllx	%g3, 16, %g3
+	or	%g5, %g3, %g5
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	mov	%l4, %o1
+	mov	%l5, %o2
+	call	sun4v_data_access_exception
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+	/* Data Access Exception, tl1. */
+sun4v_dacc_tl1:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+	sllx	%g3, 16, %g3
+	or	%g5, %g3, %g5
+	ba,pt	%xcc, etraptl1
+	 rd	%pc, %g7
+	mov	%l4, %o1
+	mov	%l5, %o2
+	call	sun4v_data_access_exception_tl1
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+	/* Memory Address Unaligned.  */
+sun4v_mna:
+	/* Window fixup? */
+	rdpr	%tl, %g2
+	cmp	%g2, 1
+	ble,pt	%icc, 1f
+	 nop
+
+	SET_GL(1)
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g5
+	mov	HV_FAULT_TYPE_UNALIGNED, %g3
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g4
+	sllx	%g3, 16, %g3
+	or	%g4, %g3, %g4
+	ba,pt	%xcc, winfix_mna
+	 rdpr	%tpc, %g3
+	/* not reached */
+
+1:	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	mov	HV_FAULT_TYPE_UNALIGNED, %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+	sllx	%g3, 16, %g3
+	or	%g5, %g3, %g5
+
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	mov	%l4, %o1
+	mov	%l5, %o2
+	call	sun4v_do_mna
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+	 nop
+
+	/* Privileged Action.  */
+sun4v_privact:
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	call	do_privact
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+	/* Unaligned ldd float, tl0. */
+sun4v_lddfmna:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+	sllx	%g3, 16, %g3
+	or	%g5, %g3, %g5
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	mov	%l4, %o1
+	mov	%l5, %o2
+	call	handle_lddfmna
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+	/* Unaligned std float, tl0. */
+sun4v_stdfmna:
+	ldxa	[%g0] ASI_SCRATCHPAD, %g2
+	ldx	[%g2 + HV_FAULT_D_TYPE_OFFSET], %g3
+	ldx	[%g2 + HV_FAULT_D_ADDR_OFFSET], %g4
+	ldx	[%g2 + HV_FAULT_D_CTX_OFFSET], %g5
+	sllx	%g3, 16, %g3
+	or	%g5, %g3, %g5
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	mov	%l4, %o1
+	mov	%l5, %o2
+	call	handle_stdfmna
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define SUN4V_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	sun4v_patch_tlb_handlers
+	.type	sun4v_patch_tlb_handlers,#function
+sun4v_patch_tlb_handlers:
+	SUN4V_DO_PATCH(tl0_iamiss, sun4v_itlb_miss)
+	SUN4V_DO_PATCH(tl1_iamiss, sun4v_itlb_miss)
+	SUN4V_DO_PATCH(tl0_damiss, sun4v_dtlb_miss)
+	SUN4V_DO_PATCH(tl1_damiss, sun4v_dtlb_miss)
+	SUN4V_DO_PATCH(tl0_daprot, sun4v_dtlb_prot)
+	SUN4V_DO_PATCH(tl1_daprot, sun4v_dtlb_prot)
+	SUN4V_DO_PATCH(tl0_iax, sun4v_iacc)
+	SUN4V_DO_PATCH(tl1_iax, sun4v_iacc_tl1)
+	SUN4V_DO_PATCH(tl0_dax, sun4v_dacc)
+	SUN4V_DO_PATCH(tl1_dax, sun4v_dacc_tl1)
+	SUN4V_DO_PATCH(tl0_mna, sun4v_mna)
+	SUN4V_DO_PATCH(tl1_mna, sun4v_mna)
+	SUN4V_DO_PATCH(tl0_lddfmna, sun4v_lddfmna)
+	SUN4V_DO_PATCH(tl0_stdfmna, sun4v_stdfmna)
+	SUN4V_DO_PATCH(tl0_privact, sun4v_privact)
+	retl
+	 nop
+	.size	sun4v_patch_tlb_handlers,.-sun4v_patch_tlb_handlers
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
new file mode 100644
index 0000000..489ffab
--- /dev/null
+++ b/arch/sparc/kernel/sys32.S
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sys32.S: I-cache tricks for 32-bit compatibility layer simple
+ *          conversions.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1998 Jakub Jelinek   (jj@ultra.linux.cz)
+ */
+
+#include <asm/errno.h>
+
+/* NOTE: call as jump breaks return stack, we have to avoid that */
+
+	.text
+
+	.globl		sys32_mmap2
+sys32_mmap2:
+	sethi		%hi(sys_mmap), %g1
+	jmpl		%g1 + %lo(sys_mmap), %g0
+	 sllx		%o5, 12, %o5
+
+	.align		32
+	.globl		sys32_socketcall
+sys32_socketcall:	/* %o0=call, %o1=args */
+	cmp		%o0, 1
+	bl,pn		%xcc, do_einval
+	 cmp		%o0, 18
+	bg,pn		%xcc, do_einval
+	 sub		%o0, 1, %o0
+	sllx		%o0, 5, %o0
+	sethi		%hi(__socketcall_table_begin), %g2
+	or		%g2, %lo(__socketcall_table_begin), %g2
+	jmpl		%g2 + %o0, %g0
+	 nop
+do_einval:
+	retl
+	 mov		-EINVAL, %o0
+
+	.align		32
+__socketcall_table_begin:
+
+	/* Each entry is exactly 32 bytes. */
+do_sys_socket: /* sys_socket(int, int, int) */
+1:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_socket), %g1
+2:	ldswa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(sys_socket), %g0
+3:	 ldswa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_bind: /* sys_bind(int fd, struct sockaddr *, int) */
+4:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_bind), %g1
+5:	ldswa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(sys_bind), %g0
+6:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_connect: /* sys_connect(int, struct sockaddr *, int) */
+7:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_connect), %g1
+8:	ldswa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(sys_connect), %g0
+9:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_listen: /* sys_listen(int, int) */
+10:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_listen), %g1
+	jmpl		%g1 + %lo(sys_listen), %g0
+11:	 ldswa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+	nop
+do_sys_accept: /* sys_accept(int, struct sockaddr *, int *) */
+12:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_accept), %g1
+13:	lduwa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(sys_accept), %g0
+14:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_getsockname: /* sys_getsockname(int, struct sockaddr *, int *) */
+15:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_getsockname), %g1
+16:	lduwa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(sys_getsockname), %g0
+17:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_getpeername: /* sys_getpeername(int, struct sockaddr *, int *) */
+18:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_getpeername), %g1
+19:	lduwa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(sys_getpeername), %g0
+20:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_socketpair: /* sys_socketpair(int, int, int, int *) */
+21:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_socketpair), %g1
+22:	ldswa		[%o1 + 0x8] %asi, %o2
+23:	lduwa		[%o1 + 0xc] %asi, %o3
+	jmpl		%g1 + %lo(sys_socketpair), %g0
+24:	 ldswa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+do_sys_send: /* sys_send(int, void *, size_t, unsigned int) */
+25:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_send), %g1
+26:	lduwa		[%o1 + 0x8] %asi, %o2
+27:	lduwa		[%o1 + 0xc] %asi, %o3
+	jmpl		%g1 + %lo(sys_send), %g0
+28:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+do_sys_recv: /* sys_recv(int, void *, size_t, unsigned int) */
+29:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_recv), %g1
+30:	lduwa		[%o1 + 0x8] %asi, %o2
+31:	lduwa		[%o1 + 0xc] %asi, %o3
+	jmpl		%g1 + %lo(sys_recv), %g0
+32:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+do_sys_sendto: /* sys_sendto(int, u32, compat_size_t, unsigned int, u32, int) */
+33:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_sendto), %g1
+34:	lduwa		[%o1 + 0x8] %asi, %o2
+35:	lduwa		[%o1 + 0xc] %asi, %o3
+36:	lduwa		[%o1 + 0x10] %asi, %o4
+37:	ldswa		[%o1 + 0x14] %asi, %o5
+	jmpl		%g1 + %lo(sys_sendto), %g0
+38:	 lduwa		[%o1 + 0x4] %asi, %o1
+do_sys_recvfrom: /* sys_recvfrom(int, u32, compat_size_t, unsigned int, u32, u32) */
+39:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_recvfrom), %g1
+40:	lduwa		[%o1 + 0x8] %asi, %o2
+41:	lduwa		[%o1 + 0xc] %asi, %o3
+42:	lduwa		[%o1 + 0x10] %asi, %o4
+43:	lduwa		[%o1 + 0x14] %asi, %o5
+	jmpl		%g1 + %lo(sys_recvfrom), %g0
+44:	 lduwa		[%o1 + 0x4] %asi, %o1
+do_sys_shutdown: /* sys_shutdown(int, int) */
+45:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_shutdown), %g1
+	jmpl		%g1 + %lo(sys_shutdown), %g0
+46:	 ldswa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+	nop
+do_sys_setsockopt: /* compat_sys_setsockopt(int, int, int, char *, int) */
+47:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(compat_sys_setsockopt), %g1
+48:	ldswa		[%o1 + 0x8] %asi, %o2
+49:	lduwa		[%o1 + 0xc] %asi, %o3
+50:	ldswa		[%o1 + 0x10] %asi, %o4
+	jmpl		%g1 + %lo(compat_sys_setsockopt), %g0
+51:	 ldswa		[%o1 + 0x4] %asi, %o1
+	nop
+do_sys_getsockopt: /* compat_sys_getsockopt(int, int, int, u32, u32) */
+52:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(compat_sys_getsockopt), %g1
+53:	ldswa		[%o1 + 0x8] %asi, %o2
+54:	lduwa		[%o1 + 0xc] %asi, %o3
+55:	lduwa		[%o1 + 0x10] %asi, %o4
+	jmpl		%g1 + %lo(compat_sys_getsockopt), %g0
+56:	 ldswa		[%o1 + 0x4] %asi, %o1
+	nop
+do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
+57:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(compat_sys_sendmsg), %g1
+58:	lduwa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(compat_sys_sendmsg), %g0
+59:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) */
+60:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(compat_sys_recvmsg), %g1
+61:	lduwa		[%o1 + 0x8] %asi, %o2
+	jmpl		%g1 + %lo(compat_sys_recvmsg), %g0
+62:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+	nop
+do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
+63:	ldswa		[%o1 + 0x0] %asi, %o0
+	sethi		%hi(sys_accept4), %g1
+64:	lduwa		[%o1 + 0x8] %asi, %o2
+65:	ldswa		[%o1 + 0xc] %asi, %o3
+	jmpl		%g1 + %lo(sys_accept4), %g0
+66:	 lduwa		[%o1 + 0x4] %asi, %o1
+	nop
+	nop
+
+	.section	__ex_table,"a"
+	.align		4
+	.word		1b, __retl_efault, 2b, __retl_efault
+	.word		3b, __retl_efault, 4b, __retl_efault
+	.word		5b, __retl_efault, 6b, __retl_efault
+	.word		7b, __retl_efault, 8b, __retl_efault
+	.word		9b, __retl_efault, 10b, __retl_efault
+	.word		11b, __retl_efault, 12b, __retl_efault
+	.word		13b, __retl_efault, 14b, __retl_efault
+	.word		15b, __retl_efault, 16b, __retl_efault
+	.word		17b, __retl_efault, 18b, __retl_efault
+	.word		19b, __retl_efault, 20b, __retl_efault
+	.word		21b, __retl_efault, 22b, __retl_efault
+	.word		23b, __retl_efault, 24b, __retl_efault
+	.word		25b, __retl_efault, 26b, __retl_efault
+	.word		27b, __retl_efault, 28b, __retl_efault
+	.word		29b, __retl_efault, 30b, __retl_efault
+	.word		31b, __retl_efault, 32b, __retl_efault
+	.word		33b, __retl_efault, 34b, __retl_efault
+	.word		35b, __retl_efault, 36b, __retl_efault
+	.word		37b, __retl_efault, 38b, __retl_efault
+	.word		39b, __retl_efault, 40b, __retl_efault
+	.word		41b, __retl_efault, 42b, __retl_efault
+	.word		43b, __retl_efault, 44b, __retl_efault
+	.word		45b, __retl_efault, 46b, __retl_efault
+	.word		47b, __retl_efault, 48b, __retl_efault
+	.word		49b, __retl_efault, 50b, __retl_efault
+	.word		51b, __retl_efault, 52b, __retl_efault
+	.word		53b, __retl_efault, 54b, __retl_efault
+	.word		55b, __retl_efault, 56b, __retl_efault
+	.word		57b, __retl_efault, 58b, __retl_efault
+	.word		59b, __retl_efault, 60b, __retl_efault
+	.word		61b, __retl_efault, 62b, __retl_efault
+	.word		63b, __retl_efault, 64b, __retl_efault
+	.word		65b, __retl_efault, 66b, __retl_efault
+	.previous
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
new file mode 100644
index 0000000..b5da3bf
--- /dev/null
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/* sys_sparc32.c: Conversion between 32bit and 64bit native syscalls.
+ *
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * environment.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/capability.h>
+#include <linux/fs.h> 
+#include <linux/mm.h> 
+#include <linux/file.h> 
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/uio.h>
+#include <linux/nfs_fs.h>
+#include <linux/quota.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/filter.h>
+#include <linux/highmem.h>
+#include <linux/highuid.h>
+#include <linux/mman.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/icmpv6.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/dnotify.h>
+#include <linux/security.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+
+#include <asm/types.h>
+#include <linux/uaccess.h>
+#include <asm/fpumacro.h>
+#include <asm/mmu_context.h>
+#include <asm/compat_signal.h>
+
+#include "systbls.h"
+
+COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, path, u32, high, u32, low)
+{
+	return ksys_truncate(path, ((u64)high << 32) | low);
+}
+
+COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, u32, high, u32, low)
+{
+	return ksys_ftruncate(fd, ((u64)high << 32) | low);
+}
+
+static int cp_compat_stat64(struct kstat *stat,
+			    struct compat_stat64 __user *statbuf)
+{
+	int err;
+
+	err  = put_user(huge_encode_dev(stat->dev), &statbuf->st_dev);
+	err |= put_user(stat->ino, &statbuf->st_ino);
+	err |= put_user(stat->mode, &statbuf->st_mode);
+	err |= put_user(stat->nlink, &statbuf->st_nlink);
+	err |= put_user(from_kuid_munged(current_user_ns(), stat->uid), &statbuf->st_uid);
+	err |= put_user(from_kgid_munged(current_user_ns(), stat->gid), &statbuf->st_gid);
+	err |= put_user(huge_encode_dev(stat->rdev), &statbuf->st_rdev);
+	err |= put_user(0, (unsigned long __user *) &statbuf->__pad3[0]);
+	err |= put_user(stat->size, &statbuf->st_size);
+	err |= put_user(stat->blksize, &statbuf->st_blksize);
+	err |= put_user(0, (unsigned int __user *) &statbuf->__pad4[0]);
+	err |= put_user(0, (unsigned int __user *) &statbuf->__pad4[4]);
+	err |= put_user(stat->blocks, &statbuf->st_blocks);
+	err |= put_user(stat->atime.tv_sec, &statbuf->st_atime);
+	err |= put_user(stat->atime.tv_nsec, &statbuf->st_atime_nsec);
+	err |= put_user(stat->mtime.tv_sec, &statbuf->st_mtime);
+	err |= put_user(stat->mtime.tv_nsec, &statbuf->st_mtime_nsec);
+	err |= put_user(stat->ctime.tv_sec, &statbuf->st_ctime);
+	err |= put_user(stat->ctime.tv_nsec, &statbuf->st_ctime_nsec);
+	err |= put_user(0, &statbuf->__unused4);
+	err |= put_user(0, &statbuf->__unused5);
+
+	return err;
+}
+
+COMPAT_SYSCALL_DEFINE2(stat64, const char __user *, filename,
+		struct compat_stat64 __user *, statbuf)
+{
+	struct kstat stat;
+	int error = vfs_stat(filename, &stat);
+
+	if (!error)
+		error = cp_compat_stat64(&stat, statbuf);
+	return error;
+}
+
+COMPAT_SYSCALL_DEFINE2(lstat64, const char __user *, filename,
+		struct compat_stat64 __user *, statbuf)
+{
+	struct kstat stat;
+	int error = vfs_lstat(filename, &stat);
+
+	if (!error)
+		error = cp_compat_stat64(&stat, statbuf);
+	return error;
+}
+
+COMPAT_SYSCALL_DEFINE2(fstat64, unsigned int, fd,
+		struct compat_stat64 __user *, statbuf)
+{
+	struct kstat stat;
+	int error = vfs_fstat(fd, &stat);
+
+	if (!error)
+		error = cp_compat_stat64(&stat, statbuf);
+	return error;
+}
+
+COMPAT_SYSCALL_DEFINE4(fstatat64, unsigned int, dfd,
+		const char __user *, filename,
+		struct compat_stat64 __user *, statbuf, int, flag)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_fstatat(dfd, filename, &stat, flag);
+	if (error)
+		return error;
+	return cp_compat_stat64(&stat, statbuf);
+}
+
+COMPAT_SYSCALL_DEFINE3(sparc_sigaction, int, sig,
+			struct compat_old_sigaction __user *,act,
+			struct compat_old_sigaction __user *,oact)
+{
+	WARN_ON_ONCE(sig >= 0);
+	return compat_sys_sigaction(-sig, act, oact);
+}
+
+COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig,
+			struct compat_sigaction __user *,act,
+			struct compat_sigaction __user *,oact,
+			void __user *,restorer,
+			compat_size_t,sigsetsize)
+{
+        struct k_sigaction new_ka, old_ka;
+        int ret;
+
+        /* XXX: Don't preclude handling different sized sigset_t's.  */
+        if (sigsetsize != sizeof(compat_sigset_t))
+                return -EINVAL;
+
+        if (act) {
+		u32 u_handler, u_restorer;
+
+		new_ka.ka_restorer = restorer;
+		ret = get_user(u_handler, &act->sa_handler);
+		new_ka.sa.sa_handler =  compat_ptr(u_handler);
+		ret |= get_compat_sigset(&new_ka.sa.sa_mask, &act->sa_mask);
+		ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
+		ret |= get_user(u_restorer, &act->sa_restorer);
+		new_ka.sa.sa_restorer = compat_ptr(u_restorer);
+                if (ret)
+                	return -EFAULT;
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler);
+		ret |= put_compat_sigset(&oact->sa_mask, &old_ka.sa.sa_mask,
+					 sizeof(oact->sa_mask));
+		ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+		ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer);
+		if (ret)
+			ret = -EFAULT;
+        }
+
+        return ret;
+}
+
+COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, ubuf,
+			compat_size_t, count, u32, poshi, u32, poslo)
+{
+	return ksys_pread64(fd, ubuf, count, ((u64)poshi << 32) | poslo);
+}
+
+COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, char __user *, ubuf,
+			compat_size_t, count, u32, poshi, u32, poslo)
+{
+	return ksys_pwrite64(fd, ubuf, count, ((u64)poshi << 32) | poslo);
+}
+
+COMPAT_SYSCALL_DEFINE4(readahead, int, fd, u32, offhi, u32, offlo,
+		     compat_size_t, count)
+{
+	return ksys_readahead(fd, ((u64)offhi << 32) | offlo, count);
+}
+
+COMPAT_SYSCALL_DEFINE5(fadvise64, int, fd, u32, offhi, u32, offlo,
+			  compat_size_t, len, int, advice)
+{
+	return ksys_fadvise64_64(fd, ((u64)offhi << 32) | offlo, len, advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, u32, offhi, u32, offlo,
+			     u32, lenhi, u32, lenlo, int, advice)
+{
+	return ksys_fadvise64_64(fd,
+				 ((u64)offhi << 32) | offlo,
+				 ((u64)lenhi << 32) | lenlo,
+				 advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(sync_file_range, unsigned int, fd, u32, off_high, u32, off_low,
+			u32, nb_high, u32, nb_low, unsigned int, flags)
+{
+	return ksys_sync_file_range(fd,
+				    ((u64)off_high << 32) | off_low,
+				    ((u64)nb_high << 32) | nb_low,
+				    flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, u32, offhi, u32, offlo,
+				     u32, lenhi, u32, lenlo)
+{
+	return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
+			      ((loff_t)lenhi << 32) | lenlo);
+}
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
new file mode 100644
index 0000000..452e4d0
--- /dev/null
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/* linux/arch/sparc/kernel/sys_sparc.c
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/sparc
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/utsname.h>
+#include <linux/smp.h>
+#include <linux/ipc.h>
+
+#include <linux/uaccess.h>
+#include <asm/unistd.h>
+
+#include "systbls.h"
+
+/* #define DEBUG_UNIMP_SYSCALL */
+
+/* XXX Make this per-binary type, this way we can detect the type of
+ * XXX a binary.  Every Sparc executable calls this very early on.
+ */
+SYSCALL_DEFINE0(getpagesize)
+{
+	return PAGE_SIZE; /* Possibly older binaries want 8192 on sun4's? */
+}
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct vm_unmapped_area_info info;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
+			return -EINVAL;
+		return addr;
+	}
+
+	/* See asm-sparc/uaccess.h */
+	if (len > TASK_SIZE - PAGE_SIZE)
+		return -ENOMEM;
+	if (!addr)
+		addr = TASK_UNMAPPED_BASE;
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = addr;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = (flags & MAP_SHARED) ?
+		(PAGE_MASK & (SHMLBA - 1)) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	return vm_unmapped_area(&info);
+}
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way unix traditionally does this, though.
+ */
+SYSCALL_DEFINE0(sparc_pipe)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe_flags(fd, 0);
+	if (error)
+		goto out;
+	current_pt_regs()->u_regs[UREG_I1] = fd[1];
+	error = fd[0];
+out:
+	return error;
+}
+
+int sparc_mmap_check(unsigned long addr, unsigned long len)
+{
+	/* See asm-sparc/uaccess.h */
+	if (len > TASK_SIZE - PAGE_SIZE || addr + len > TASK_SIZE - PAGE_SIZE)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Linux version of mmap */
+
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
+	unsigned long, prot, unsigned long, flags, unsigned long, fd,
+	unsigned long, pgoff)
+{
+	/* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE
+	   we have. */
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd,
+			       pgoff >> (PAGE_SHIFT - 12));
+}
+
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+	unsigned long, prot, unsigned long, flags, unsigned long, fd,
+	unsigned long, off)
+{
+	/* no alignment check? */
+	return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
+
+SYSCALL_DEFINE5(sparc_remap_file_pages, unsigned long, start, unsigned long, size,
+			   unsigned long, prot, unsigned long, pgoff,
+			   unsigned long, flags)
+{
+	/* This works on an existing mmap so we don't need to validate
+	 * the range as that was done at the original mmap call.
+	 */
+	return sys_remap_file_pages(start, size, prot,
+				    (pgoff >> (PAGE_SHIFT - 12)), flags);
+}
+
+SYSCALL_DEFINE0(nis_syscall)
+{
+	static int count = 0;
+	struct pt_regs *regs = current_pt_regs();
+
+	if (count++ > 5)
+		return -ENOSYS;
+	printk ("%s[%d]: Unimplemented SPARC system call %d\n",
+		current->comm, task_pid_nr(current), (int)regs->u_regs[1]);
+#ifdef DEBUG_UNIMP_SYSCALL	
+	show_regs (regs);
+#endif
+	return -ENOSYS;
+}
+
+/* #define DEBUG_SPARC_BREAKPOINT */
+
+asmlinkage void
+sparc_breakpoint (struct pt_regs *regs)
+{
+
+#ifdef DEBUG_SPARC_BREAKPOINT
+        printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc);
+#endif
+	force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current);
+
+#ifdef DEBUG_SPARC_BREAKPOINT
+	printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc);
+#endif
+}
+
+SYSCALL_DEFINE3(sparc_sigaction, int, sig,
+		struct old_sigaction __user *,act,
+		struct old_sigaction __user *,oact)
+{
+	WARN_ON_ONCE(sig >= 0);
+	return sys_sigaction(-sig, act, oact);
+}
+
+SYSCALL_DEFINE5(rt_sigaction, int, sig,
+		 const struct sigaction __user *, act,
+		 struct sigaction __user *, oact,
+		 void __user *, restorer,
+		 size_t, sigsetsize)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (act) {
+		new_ka.ka_restorer = restorer;
+		if (copy_from_user(&new_ka.sa, act, sizeof(*act)))
+			return -EFAULT;
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (copy_to_user(oact, &old_ka.sa, sizeof(*oact)))
+			return -EFAULT;
+	}
+
+	return ret;
+}
+
+SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
+{
+	int nlen, err;
+	char tmp[__NEW_UTS_LEN + 1];
+
+	if (len < 0)
+		return -EINVAL;
+
+	down_read(&uts_sem);
+
+	nlen = strlen(utsname()->domainname) + 1;
+	err = -EINVAL;
+	if (nlen > len)
+		goto out_unlock;
+	memcpy(tmp, utsname()->domainname, nlen);
+
+	up_read(&uts_sem);
+
+	if (copy_to_user(name, tmp, nlen))
+		return -EFAULT;
+	return 0;
+
+out_unlock:
+	up_read(&uts_sem);
+	return err;
+}
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
new file mode 100644
index 0000000..274ed0b
--- /dev/null
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0
+/* linux/arch/sparc64/kernel/sys_sparc.c
+ *
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/sparc
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/utsname.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/ipc.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <linux/export.h>
+#include <linux/context_tracking.h>
+
+#include <linux/uaccess.h>
+#include <asm/utrap.h>
+#include <asm/unistd.h>
+
+#include "entry.h"
+#include "kernel.h"
+#include "systbls.h"
+
+/* #define DEBUG_UNIMP_SYSCALL */
+
+SYSCALL_DEFINE0(getpagesize)
+{
+	return PAGE_SIZE;
+}
+
+/* Does addr --> addr+len fall within 4GB of the VA-space hole or
+ * overflow past the end of the 64-bit address space?
+ */
+static inline int invalid_64bit_range(unsigned long addr, unsigned long len)
+{
+	unsigned long va_exclude_start, va_exclude_end;
+
+	va_exclude_start = VA_EXCLUDE_START;
+	va_exclude_end   = VA_EXCLUDE_END;
+
+	if (unlikely(len >= va_exclude_start))
+		return 1;
+
+	if (unlikely((addr + len) < addr))
+		return 1;
+
+	if (unlikely((addr >= va_exclude_start && addr < va_exclude_end) ||
+		     ((addr + len) >= va_exclude_start &&
+		      (addr + len) < va_exclude_end)))
+		return 1;
+
+	return 0;
+}
+
+/* These functions differ from the default implementations in
+ * mm/mmap.c in two ways:
+ *
+ * 1) For file backed MAP_SHARED mmap()'s we D-cache color align,
+ *    for fixed such mappings we just validate what the user gave us.
+ * 2) For 64-bit tasks we avoid mapping anything within 4GB of
+ *    the spitfire/niagara VA-hole.
+ */
+
+static inline unsigned long COLOR_ALIGN(unsigned long addr,
+					 unsigned long pgoff)
+{
+	unsigned long base = (addr+SHMLBA-1)&~(SHMLBA-1);
+	unsigned long off = (pgoff<<PAGE_SHIFT) & (SHMLBA-1);
+
+	return base + off;
+}
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct * vma;
+	unsigned long task_size = TASK_SIZE;
+	int do_color_align;
+	struct vm_unmapped_area_info info;
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (test_thread_flag(TIF_32BIT))
+		task_size = STACK_TOP32;
+	if (unlikely(len > task_size || len >= VA_EXCLUDE_START))
+		return -ENOMEM;
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = 1;
+
+	if (addr) {
+		if (do_color_align)
+			addr = COLOR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (task_size - len >= addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = TASK_UNMAPPED_BASE;
+	info.high_limit = min(task_size, VA_EXCLUDE_START);
+	info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+
+	if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.low_limit = VA_EXCLUDE_END;
+		info.high_limit = task_size;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			  const unsigned long len, const unsigned long pgoff,
+			  const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long task_size = STACK_TOP32;
+	unsigned long addr = addr0;
+	int do_color_align;
+	struct vm_unmapped_area_info info;
+
+	/* This should only ever run for 32-bit processes.  */
+	BUG_ON(!test_thread_flag(TIF_32BIT));
+
+	if (flags & MAP_FIXED) {
+		/* We do not accept a shared mapping if it would violate
+		 * cache aliasing constraints.
+		 */
+		if ((flags & MAP_SHARED) &&
+		    ((addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (unlikely(len > task_size))
+		return -ENOMEM;
+
+	do_color_align = 0;
+	if (filp || (flags & MAP_SHARED))
+		do_color_align = 1;
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_color_align)
+			addr = COLOR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (task_size - len >= addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = mm->mmap_base;
+	info.align_mask = do_color_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = STACK_TOP32;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+/* Try to align mapping such that we align it as much as possible. */
+unsigned long get_fb_unmapped_area(struct file *filp, unsigned long orig_addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	unsigned long align_goal, addr = -ENOMEM;
+	unsigned long (*get_area)(struct file *, unsigned long,
+				  unsigned long, unsigned long, unsigned long);
+
+	get_area = current->mm->get_unmapped_area;
+
+	if (flags & MAP_FIXED) {
+		/* Ok, don't mess with it. */
+		return get_area(NULL, orig_addr, len, pgoff, flags);
+	}
+	flags &= ~MAP_SHARED;
+
+	align_goal = PAGE_SIZE;
+	if (len >= (4UL * 1024 * 1024))
+		align_goal = (4UL * 1024 * 1024);
+	else if (len >= (512UL * 1024))
+		align_goal = (512UL * 1024);
+	else if (len >= (64UL * 1024))
+		align_goal = (64UL * 1024);
+
+	do {
+		addr = get_area(NULL, orig_addr, len + (align_goal - PAGE_SIZE), pgoff, flags);
+		if (!(addr & ~PAGE_MASK)) {
+			addr = (addr + (align_goal - 1UL)) & ~(align_goal - 1UL);
+			break;
+		}
+
+		if (align_goal == (4UL * 1024 * 1024))
+			align_goal = (512UL * 1024);
+		else if (align_goal == (512UL * 1024))
+			align_goal = (64UL * 1024);
+		else
+			align_goal = PAGE_SIZE;
+	} while ((addr & ~PAGE_MASK) && align_goal > PAGE_SIZE);
+
+	/* Mapping is smaller than 64K or larger areas could not
+	 * be obtained.
+	 */
+	if (addr & ~PAGE_MASK)
+		addr = get_area(NULL, orig_addr, len, pgoff, flags);
+
+	return addr;
+}
+EXPORT_SYMBOL(get_fb_unmapped_area);
+
+/* Essentially the same as PowerPC.  */
+static unsigned long mmap_rnd(void)
+{
+	unsigned long rnd = 0UL;
+
+	if (current->flags & PF_RANDOMIZE) {
+		unsigned long val = get_random_long();
+		if (test_thread_flag(TIF_32BIT))
+			rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
+		else
+			rnd = (val % (1UL << (30UL-PAGE_SHIFT)));
+	}
+	return rnd << PAGE_SHIFT;
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+	unsigned long random_factor = mmap_rnd();
+	unsigned long gap;
+
+	/*
+	 * Fall back to the standard layout if the personality
+	 * bit is set, or if the expected stack growth is unlimited:
+	 */
+	gap = rlim_stack->rlim_cur;
+	if (!test_thread_flag(TIF_32BIT) ||
+	    (current->personality & ADDR_COMPAT_LAYOUT) ||
+	    gap == RLIM_INFINITY ||
+	    sysctl_legacy_va_layout) {
+		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+	} else {
+		/* We know it's 32-bit */
+		unsigned long task_size = STACK_TOP32;
+
+		if (gap < 128 * 1024 * 1024)
+			gap = 128 * 1024 * 1024;
+		if (gap > (task_size / 6 * 5))
+			gap = (task_size / 6 * 5);
+
+		mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+	}
+}
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way unix traditionally does this, though.
+ */
+SYSCALL_DEFINE0(sparc_pipe)
+{
+	int fd[2];
+	int error;
+
+	error = do_pipe_flags(fd, 0);
+	if (error)
+		goto out;
+	current_pt_regs()->u_regs[UREG_I1] = fd[1];
+	error = fd[0];
+out:
+	return error;
+}
+
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+ * This is really horribly ugly.
+ */
+
+SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second,
+		unsigned long, third, void __user *, ptr, long, fifth)
+{
+	long err;
+
+	/* No need for backward compatibility. We can start fresh... */
+	if (call <= SEMTIMEDOP) {
+		switch (call) {
+		case SEMOP:
+			err = sys_semtimedop(first, ptr,
+					     (unsigned int)second, NULL);
+			goto out;
+		case SEMTIMEDOP:
+			err = sys_semtimedop(first, ptr, (unsigned int)second,
+				(const struct timespec __user *)
+					     (unsigned long) fifth);
+			goto out;
+		case SEMGET:
+			err = sys_semget(first, (int)second, (int)third);
+			goto out;
+		case SEMCTL: {
+			err = sys_semctl(first, second,
+					 (int)third | IPC_64,
+					 (unsigned long) ptr);
+			goto out;
+		}
+		default:
+			err = -ENOSYS;
+			goto out;
+		}
+	}
+	if (call <= MSGCTL) {
+		switch (call) {
+		case MSGSND:
+			err = sys_msgsnd(first, ptr, (size_t)second,
+					 (int)third);
+			goto out;
+		case MSGRCV:
+			err = sys_msgrcv(first, ptr, (size_t)second, fifth,
+					 (int)third);
+			goto out;
+		case MSGGET:
+			err = sys_msgget((key_t)first, (int)second);
+			goto out;
+		case MSGCTL:
+			err = sys_msgctl(first, (int)second | IPC_64, ptr);
+			goto out;
+		default:
+			err = -ENOSYS;
+			goto out;
+		}
+	}
+	if (call <= SHMCTL) {
+		switch (call) {
+		case SHMAT: {
+			ulong raddr;
+			err = do_shmat(first, ptr, (int)second, &raddr, SHMLBA);
+			if (!err) {
+				if (put_user(raddr,
+					     (ulong __user *) third))
+					err = -EFAULT;
+			}
+			goto out;
+		}
+		case SHMDT:
+			err = sys_shmdt(ptr);
+			goto out;
+		case SHMGET:
+			err = sys_shmget(first, (size_t)second, (int)third);
+			goto out;
+		case SHMCTL:
+			err = sys_shmctl(first, (int)second | IPC_64, ptr);
+			goto out;
+		default:
+			err = -ENOSYS;
+			goto out;
+		}
+	} else {
+		err = -ENOSYS;
+	}
+out:
+	return err;
+}
+
+SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality)
+{
+	long ret;
+
+	if (personality(current->personality) == PER_LINUX32 &&
+	    personality(personality) == PER_LINUX)
+		personality |= PER_LINUX32;
+	ret = sys_personality(personality);
+	if (personality(ret) == PER_LINUX32)
+		ret &= ~PER_LINUX32;
+
+	return ret;
+}
+
+int sparc_mmap_check(unsigned long addr, unsigned long len)
+{
+	if (test_thread_flag(TIF_32BIT)) {
+		if (len >= STACK_TOP32)
+			return -EINVAL;
+
+		if (addr > STACK_TOP32 - len)
+			return -EINVAL;
+	} else {
+		if (len >= VA_EXCLUDE_START)
+			return -EINVAL;
+
+		if (invalid_64bit_range(addr, len))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Linux version of mmap */
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+		unsigned long, prot, unsigned long, flags, unsigned long, fd,
+		unsigned long, off)
+{
+	unsigned long retval = -EINVAL;
+
+	if ((off + PAGE_ALIGN(len)) < off)
+		goto out;
+	if (off & ~PAGE_MASK)
+		goto out;
+	retval = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+out:
+	return retval;
+}
+
+SYSCALL_DEFINE2(64_munmap, unsigned long, addr, size_t, len)
+{
+	if (invalid_64bit_range(addr, len))
+		return -EINVAL;
+
+	return vm_munmap(addr, len);
+}
+                
+SYSCALL_DEFINE5(64_mremap, unsigned long, addr,	unsigned long, old_len,
+		unsigned long, new_len, unsigned long, flags,
+		unsigned long, new_addr)
+{
+	if (test_thread_flag(TIF_32BIT))
+		return -EINVAL;
+	return sys_mremap(addr, old_len, new_len, flags, new_addr);
+}
+
+SYSCALL_DEFINE0(nis_syscall)
+{
+	static int count;
+	struct pt_regs *regs = current_pt_regs();
+	
+	/* Don't make the system unusable, if someone goes stuck */
+	if (count++ > 5)
+		return -ENOSYS;
+
+	printk ("Unimplemented SPARC system call %ld\n",regs->u_regs[1]);
+#ifdef DEBUG_UNIMP_SYSCALL	
+	show_regs (regs);
+#endif
+
+	return -ENOSYS;
+}
+
+/* #define DEBUG_SPARC_BREAKPOINT */
+
+asmlinkage void sparc_breakpoint(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+#ifdef DEBUG_SPARC_BREAKPOINT
+        printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc);
+#endif
+	force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current);
+#ifdef DEBUG_SPARC_BREAKPOINT
+	printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc);
+#endif
+	exception_exit(prev_state);
+}
+
+SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len)
+{
+	int nlen, err;
+	char tmp[__NEW_UTS_LEN + 1];
+
+	if (len < 0)
+		return -EINVAL;
+
+	down_read(&uts_sem);
+
+	nlen = strlen(utsname()->domainname) + 1;
+	err = -EINVAL;
+	if (nlen > len)
+		goto out_unlock;
+	memcpy(tmp, utsname()->domainname, nlen);
+
+	up_read(&uts_sem);
+
+	if (copy_to_user(name, tmp, nlen))
+		return -EFAULT;
+	return 0;
+
+out_unlock:
+	up_read(&uts_sem);
+	return err;
+}
+
+SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
+		utrap_handler_t, new_p, utrap_handler_t, new_d,
+		utrap_handler_t __user *, old_p,
+		utrap_handler_t __user *, old_d)
+{
+	if (type < UT_INSTRUCTION_EXCEPTION || type > UT_TRAP_INSTRUCTION_31)
+		return -EINVAL;
+	if (new_p == (utrap_handler_t)(long)UTH_NOCHANGE) {
+		if (old_p) {
+			if (!current_thread_info()->utraps) {
+				if (put_user(NULL, old_p))
+					return -EFAULT;
+			} else {
+				if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p))
+					return -EFAULT;
+			}
+		}
+		if (old_d) {
+			if (put_user(NULL, old_d))
+				return -EFAULT;
+		}
+		return 0;
+	}
+	if (!current_thread_info()->utraps) {
+		current_thread_info()->utraps =
+			kcalloc(UT_TRAP_INSTRUCTION_31 + 1, sizeof(long),
+				GFP_KERNEL);
+		if (!current_thread_info()->utraps)
+			return -ENOMEM;
+		current_thread_info()->utraps[0] = 1;
+	} else {
+		if ((utrap_handler_t)current_thread_info()->utraps[type] != new_p &&
+		    current_thread_info()->utraps[0] > 1) {
+			unsigned long *p = current_thread_info()->utraps;
+
+			current_thread_info()->utraps =
+				kmalloc_array(UT_TRAP_INSTRUCTION_31 + 1,
+					      sizeof(long),
+					      GFP_KERNEL);
+			if (!current_thread_info()->utraps) {
+				current_thread_info()->utraps = p;
+				return -ENOMEM;
+			}
+			p[0]--;
+			current_thread_info()->utraps[0] = 1;
+			memcpy(current_thread_info()->utraps+1, p+1,
+			       UT_TRAP_INSTRUCTION_31*sizeof(long));
+		}
+	}
+	if (old_p) {
+		if (put_user((utrap_handler_t)(current_thread_info()->utraps[type]), old_p))
+			return -EFAULT;
+	}
+	if (old_d) {
+		if (put_user(NULL, old_d))
+			return -EFAULT;
+	}
+	current_thread_info()->utraps[type] = (long)new_p;
+
+	return 0;
+}
+
+SYSCALL_DEFINE1(memory_ordering, unsigned long, model)
+{
+	struct pt_regs *regs = current_pt_regs();
+	if (model >= 3)
+		return -EINVAL;
+	regs->tstate = (regs->tstate & ~TSTATE_MM) | (model << 14);
+	return 0;
+}
+
+SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
+		struct sigaction __user *, oact, void __user *, restorer,
+		size_t, sigsetsize)
+{
+	struct k_sigaction new_ka, old_ka;
+	int ret;
+
+	/* XXX: Don't preclude handling different sized sigset_t's.  */
+	if (sigsetsize != sizeof(sigset_t))
+		return -EINVAL;
+
+	if (act) {
+		new_ka.ka_restorer = restorer;
+		if (copy_from_user(&new_ka.sa, act, sizeof(*act)))
+			return -EFAULT;
+	}
+
+	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+	if (!ret && oact) {
+		if (copy_to_user(oact, &old_ka.sa, sizeof(*oact)))
+			return -EFAULT;
+	}
+
+	return ret;
+}
+
+SYSCALL_DEFINE0(kern_features)
+{
+	return KERN_FEATURE_MIXED_MODE_STACK;
+}
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
new file mode 100644
index 0000000..db42b4f
--- /dev/null
+++ b/arch/sparc/kernel/syscalls.S
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	/* SunOS's execv() call only specifies the argv argument, the
+	 * environment settings are the same as the calling processes.
+	 */
+sys64_execve:
+	set	sys_execve, %g1
+	jmpl	%g1, %g0
+	 flushw
+
+sys64_execveat:
+	set	sys_execveat, %g1
+	jmpl	%g1, %g0
+	 flushw
+
+#ifdef CONFIG_COMPAT
+sunos_execv:
+	mov	%g0, %o2
+sys32_execve:
+	set	compat_sys_execve, %g1
+	jmpl	%g1, %g0
+	 flushw
+
+sys32_execveat:
+	set	compat_sys_execveat, %g1
+	jmpl	%g1, %g0
+	 flushw
+#endif
+
+	.align	32
+#ifdef CONFIG_COMPAT
+sys32_sigstack:
+	ba,pt	%xcc, do_sys32_sigstack
+	 mov	%i6, %o2
+#endif
+	.align	32
+#ifdef CONFIG_COMPAT
+sys32_sigreturn:
+	add	%sp, PTREGS_OFF, %o0
+	call	do_sigreturn32
+	 add	%o7, 1f-.-4, %o7
+	nop
+#endif
+sys_rt_sigreturn:
+	add	%sp, PTREGS_OFF, %o0
+	call	do_rt_sigreturn
+	 add	%o7, 1f-.-4, %o7
+	nop
+#ifdef CONFIG_COMPAT
+sys32_rt_sigreturn:
+	add	%sp, PTREGS_OFF, %o0
+	call	do_rt_sigreturn32
+	 add	%o7, 1f-.-4, %o7
+	nop
+#endif
+	.align	32
+1:	ldx	[%g6 + TI_FLAGS], %l5
+	andcc	%l5, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
+	be,pt	%icc, rtrap
+	 nop
+	call	syscall_trace_leave
+	 add	%sp, PTREGS_OFF, %o0
+	ba,pt	%xcc, rtrap
+	 nop
+
+	/* This is how fork() was meant to be done, 8 instruction entry.
+	 *
+	 * I questioned the following code briefly, let me clear things
+	 * up so you must not reason on it like I did.
+	 *
+	 * Know the fork_kpsr etc. we use in the sparc32 port?  We don't
+	 * need it here because the only piece of window state we copy to
+	 * the child is the CWP register.  Even if the parent sleeps,
+	 * we are safe because we stuck it into pt_regs of the parent
+	 * so it will not change.
+	 *
+	 * XXX This raises the question, whether we can do the same on
+	 * XXX sparc32 to get rid of fork_kpsr _and_ fork_kwim.  The
+	 * XXX answer is yes.  We stick fork_kpsr in UREG_G0 and
+	 * XXX fork_kwim in UREG_G1 (global registers are considered
+	 * XXX volatile across a system call in the sparc ABI I think
+	 * XXX if it isn't we can use regs->y instead, anyone who depends
+	 * XXX upon the Y register being preserved across a fork deserves
+	 * XXX to lose).
+	 *
+	 * In fact we should take advantage of that fact for other things
+	 * during system calls...
+	 */
+	.align	32
+sys_vfork: /* Under Linux, vfork and fork are just special cases of clone. */
+	sethi	%hi(0x4000 | 0x0100 | SIGCHLD), %o0
+	or	%o0, %lo(0x4000 | 0x0100 | SIGCHLD), %o0
+	ba,pt	%xcc, sys_clone
+sys_fork:
+	 clr	%o1
+	mov	SIGCHLD, %o0
+sys_clone:
+	flushw
+	movrz	%o1, %fp, %o1
+	mov	0, %o3
+	ba,pt	%xcc, sparc_do_fork
+	 add	%sp, PTREGS_OFF, %o2
+
+	.globl	ret_from_fork
+ret_from_fork:
+	/* Clear current_thread_info()->new_child. */
+	stb	%g0, [%g6 + TI_NEW_CHILD]
+	call	schedule_tail
+	 mov	%g7, %o0
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %o0
+	brnz,pt	%o0, ret_sys_call
+	 ldx	[%g6 + TI_FLAGS], %l0
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %l1
+	call	%l1
+	 ldx	[%sp + PTREGS_OFF + PT_V9_G2], %o0
+	ba,pt	%xcc, ret_sys_call
+	 mov	0, %o0
+
+	.globl	sparc_exit_group
+	.type	sparc_exit_group,#function
+sparc_exit_group:
+	sethi	%hi(sys_exit_group), %g7
+	ba,pt	%xcc, 1f
+	 or	%g7, %lo(sys_exit_group), %g7
+	.size	sparc_exit_group,.-sparc_exit_group
+
+	.globl	sparc_exit
+	.type	sparc_exit,#function
+sparc_exit:
+	sethi	%hi(sys_exit), %g7
+	or	%g7, %lo(sys_exit), %g7
+1:	rdpr	%pstate, %g2
+	wrpr	%g2, PSTATE_IE, %pstate
+	rdpr	%otherwin, %g1
+	rdpr	%cansave, %g3
+	add	%g3, %g1, %g3
+	wrpr	%g3, 0x0, %cansave
+	wrpr	%g0, 0x0, %otherwin
+	wrpr	%g2, 0x0, %pstate
+	jmpl	%g7, %g0
+	 stb	%g0, [%g6 + TI_WSAVED]
+	.size	sparc_exit,.-sparc_exit
+
+linux_sparc_ni_syscall:
+	sethi	%hi(sys_ni_syscall), %l7
+	ba,pt	%xcc, 4f
+	 or	%l7, %lo(sys_ni_syscall), %l7
+
+linux_syscall_trace32:
+	call	syscall_trace_enter
+	 add	%sp, PTREGS_OFF, %o0
+	brnz,pn	%o0, 3f
+	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table32), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
+	srl	%i0, 0, %o0
+	lduw	[%l7 + %l4], %l7
+	srl	%i4, 0, %o4
+	srl	%i1, 0, %o1
+	srl	%i2, 0, %o2
+	ba,pt	%xcc, 5f
+	 srl	%i3, 0, %o3
+
+linux_syscall_trace:
+	call	syscall_trace_enter
+	 add	%sp, PTREGS_OFF, %o0
+	brnz,pn	%o0, 3f
+	 mov	-ENOSYS, %o0
+
+	/* Syscall tracing can modify the registers.  */
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %g1
+	sethi	%hi(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %i0
+	or	%l7, %lo(sys_call_table64), %l7
+	ldx	[%sp + PTREGS_OFF + PT_V9_I1], %i1
+	ldx	[%sp + PTREGS_OFF + PT_V9_I2], %i2
+	ldx	[%sp + PTREGS_OFF + PT_V9_I3], %i3
+	ldx	[%sp + PTREGS_OFF + PT_V9_I4], %i4
+	ldx	[%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+	cmp	%g1, NR_syscalls
+	bgeu,pn	%xcc, 3f
+	 mov	-ENOSYS, %o0
+
+	sll	%g1, 2, %l4
+	mov	%i0, %o0
+	lduw	[%l7 + %l4], %l7
+	mov	%i1, %o1
+	mov	%i2, %o2
+	mov	%i3, %o3
+	b,pt	%xcc, 2f
+	 mov	%i4, %o4
+
+
+	/* Linux 32-bit system calls enter here... */
+	.align	32
+	.globl	linux_sparc_syscall32
+linux_sparc_syscall32:
+	/* Direct access to user regs, much faster. */
+	cmp	%g1, NR_syscalls			! IEU1	Group
+	bgeu,pn	%xcc, linux_sparc_ni_syscall		! CTI
+	 srl	%i0, 0, %o0				! IEU0
+	sll	%g1, 2, %l4				! IEU0	Group
+	srl	%i4, 0, %o4				! IEU1
+	lduw	[%l7 + %l4], %l7			! Load
+	srl	%i1, 0, %o1				! IEU0	Group
+	ldx	[%g6 + TI_FLAGS], %l0		! Load
+
+	srl	%i3, 0, %o3				! IEU0
+	srl	%i2, 0, %o2				! IEU0	Group
+	andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
+	bne,pn	%icc, linux_syscall_trace32		! CTI
+	 mov	%i0, %l5				! IEU1
+5:	call	%l7					! CTI	Group brk forced
+	 srl	%i5, 0, %o5				! IEU1
+	ba,pt	%xcc, 3f
+	 sra	%o0, 0, %o0
+
+	/* Linux native system calls enter here... */
+	.align	32
+	.globl	linux_sparc_syscall
+linux_sparc_syscall:
+	/* Direct access to user regs, much faster. */
+	cmp	%g1, NR_syscalls			! IEU1	Group
+	bgeu,pn	%xcc, linux_sparc_ni_syscall		! CTI
+	 mov	%i0, %o0				! IEU0
+	sll	%g1, 2, %l4				! IEU0	Group
+	mov	%i1, %o1				! IEU1
+	lduw	[%l7 + %l4], %l7			! Load
+4:	mov	%i2, %o2				! IEU0	Group
+	ldx	[%g6 + TI_FLAGS], %l0		! Load
+
+	mov	%i3, %o3				! IEU1
+	mov	%i4, %o4				! IEU0	Group
+	andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
+	bne,pn	%icc, linux_syscall_trace		! CTI	Group
+	 mov	%i0, %l5				! IEU0
+2:	call	%l7					! CTI	Group brk forced
+	 mov	%i5, %o5				! IEU0
+	nop
+
+3:	stx	%o0, [%sp + PTREGS_OFF + PT_V9_I0]
+ret_sys_call:
+	ldx	[%sp + PTREGS_OFF + PT_V9_TSTATE], %g3
+	mov	%ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2
+	sllx	%g2, 32, %g2
+
+	cmp	%o0, -ERESTART_RESTARTBLOCK
+	bgeu,pn	%xcc, 1f
+	 andcc	%l0, (_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT|_TIF_SYSCALL_TRACEPOINT|_TIF_NOHZ), %g0
+	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
+
+2:
+	/* System call success, clear Carry condition code. */
+	andn	%g3, %g2, %g3
+3:
+	stx	%g3, [%sp + PTREGS_OFF + PT_V9_TSTATE]	
+	bne,pn	%icc, linux_syscall_trace2
+	 add	%l1, 0x4, %l2			! npc = npc+4
+	stx	%l1, [%sp + PTREGS_OFF + PT_V9_TPC]
+	ba,pt	%xcc, rtrap
+	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
+
+1:
+	/* Check if force_successful_syscall_return()
+	 * was invoked.
+	 */
+	ldub	[%g6 + TI_SYS_NOERROR], %l2
+	brnz,pn %l2, 2b
+	 ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
+	/* System call failure, set Carry condition code.
+	 * Also, get abs(errno) to return to the process.
+	 */
+	sub	%g0, %o0, %o0
+	stx	%o0, [%sp + PTREGS_OFF + PT_V9_I0]
+	ba,pt	%xcc, 3b
+	 or	%g3, %g2, %g3
+
+linux_syscall_trace2:
+	call	syscall_trace_leave
+	 add	%sp, PTREGS_OFF, %o0
+	stx	%l1, [%sp + PTREGS_OFF + PT_V9_TPC]
+	ba,pt	%xcc, rtrap
+	 stx	%l2, [%sp + PTREGS_OFF + PT_V9_TNPC]
diff --git a/arch/sparc/kernel/sysfs.c b/arch/sparc/kernel/sysfs.c
new file mode 100644
index 0000000..6d60d41
--- /dev/null
+++ b/arch/sparc/kernel/sysfs.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/* sysfs.c: Topology sysfs support code for sparc64.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+
+#include <asm/cpudata.h>
+#include <asm/hypervisor.h>
+#include <asm/spitfire.h>
+
+static DEFINE_PER_CPU(struct hv_mmu_statistics, mmu_stats) __attribute__((aligned(64)));
+
+#define SHOW_MMUSTAT_ULONG(NAME) \
+static ssize_t show_##NAME(struct device *dev, \
+			struct device_attribute *attr, char *buf) \
+{ \
+	struct hv_mmu_statistics *p = &per_cpu(mmu_stats, dev->id); \
+	return sprintf(buf, "%lu\n", p->NAME); \
+} \
+static DEVICE_ATTR(NAME, 0444, show_##NAME, NULL)
+
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_8k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_8k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_64k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_64k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_4mb_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_4mb_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctx0_256mb_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctx0_256mb_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_8k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_8k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_64k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_64k_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_4mb_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_4mb_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_hits_ctxnon0_256mb_tte);
+SHOW_MMUSTAT_ULONG(immu_tsb_ticks_ctxnon0_256mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_8k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_8k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_64k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_64k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_4mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_4mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctx0_256mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctx0_256mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_8k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_8k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_64k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_64k_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_4mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_4mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_hits_ctxnon0_256mb_tte);
+SHOW_MMUSTAT_ULONG(dmmu_tsb_ticks_ctxnon0_256mb_tte);
+
+static struct attribute *mmu_stat_attrs[] = {
+	&dev_attr_immu_tsb_hits_ctx0_8k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_8k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctx0_64k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_64k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctx0_4mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_4mb_tte.attr,
+	&dev_attr_immu_tsb_hits_ctx0_256mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctx0_256mb_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_8k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_8k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_64k_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_64k_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_4mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_4mb_tte.attr,
+	&dev_attr_immu_tsb_hits_ctxnon0_256mb_tte.attr,
+	&dev_attr_immu_tsb_ticks_ctxnon0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctx0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctx0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_8k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_64k_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_4mb_tte.attr,
+	&dev_attr_dmmu_tsb_hits_ctxnon0_256mb_tte.attr,
+	&dev_attr_dmmu_tsb_ticks_ctxnon0_256mb_tte.attr,
+	NULL,
+};
+
+static struct attribute_group mmu_stat_group = {
+	.attrs = mmu_stat_attrs,
+	.name = "mmu_stats",
+};
+
+static long read_mmustat_enable(void *data __maybe_unused)
+{
+	unsigned long ra = 0;
+
+	sun4v_mmustat_info(&ra);
+
+	return ra != 0;
+}
+
+static long write_mmustat_enable(void *data)
+{
+	unsigned long ra, orig_ra, *val = data;
+
+	if (*val)
+		ra = __pa(&per_cpu(mmu_stats, smp_processor_id()));
+	else
+		ra = 0UL;
+
+	return sun4v_mmustat_conf(ra, &orig_ra);
+}
+
+static ssize_t show_mmustat_enable(struct device *s,
+				struct device_attribute *attr, char *buf)
+{
+	long val = work_on_cpu(s->id, read_mmustat_enable, NULL);
+
+	return sprintf(buf, "%lx\n", val);
+}
+
+static ssize_t store_mmustat_enable(struct device *s,
+			struct device_attribute *attr, const char *buf,
+			size_t count)
+{
+	unsigned long val;
+	long err;
+	int ret;
+
+	ret = sscanf(buf, "%lu", &val);
+	if (ret != 1)
+		return -EINVAL;
+
+	err = work_on_cpu(s->id, write_mmustat_enable, &val);
+	if (err)
+		return -EIO;
+
+	return count;
+}
+
+static DEVICE_ATTR(mmustat_enable, 0644, show_mmustat_enable, store_mmustat_enable);
+
+static int mmu_stats_supported;
+
+static int register_mmu_stats(struct device *s)
+{
+	if (!mmu_stats_supported)
+		return 0;
+	device_create_file(s, &dev_attr_mmustat_enable);
+	return sysfs_create_group(&s->kobj, &mmu_stat_group);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void unregister_mmu_stats(struct device *s)
+{
+	if (!mmu_stats_supported)
+		return;
+	sysfs_remove_group(&s->kobj, &mmu_stat_group);
+	device_remove_file(s, &dev_attr_mmustat_enable);
+}
+#endif
+
+#define SHOW_CPUDATA_ULONG_NAME(NAME, MEMBER) \
+static ssize_t show_##NAME(struct device *dev, \
+		struct device_attribute *attr, char *buf) \
+{ \
+	cpuinfo_sparc *c = &cpu_data(dev->id); \
+	return sprintf(buf, "%lu\n", c->MEMBER); \
+}
+
+#define SHOW_CPUDATA_UINT_NAME(NAME, MEMBER) \
+static ssize_t show_##NAME(struct device *dev, \
+		struct device_attribute *attr, char *buf) \
+{ \
+	cpuinfo_sparc *c = &cpu_data(dev->id); \
+	return sprintf(buf, "%u\n", c->MEMBER); \
+}
+
+SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick);
+SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size);
+SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size);
+SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size);
+SHOW_CPUDATA_UINT_NAME(l1_icache_line_size, icache_line_size);
+SHOW_CPUDATA_UINT_NAME(l2_cache_size, ecache_size);
+SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size);
+
+static struct device_attribute cpu_core_attrs[] = {
+	__ATTR(clock_tick,          0444, show_clock_tick, NULL),
+	__ATTR(l1_dcache_size,      0444, show_l1_dcache_size, NULL),
+	__ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL),
+	__ATTR(l1_icache_size,      0444, show_l1_icache_size, NULL),
+	__ATTR(l1_icache_line_size, 0444, show_l1_icache_line_size, NULL),
+	__ATTR(l2_cache_size,       0444, show_l2_cache_size, NULL),
+	__ATTR(l2_cache_line_size,  0444, show_l2_cache_line_size, NULL),
+};
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+
+static int register_cpu_online(unsigned int cpu)
+{
+	struct cpu *c = &per_cpu(cpu_devices, cpu);
+	struct device *s = &c->dev;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++)
+		device_create_file(s, &cpu_core_attrs[i]);
+
+	register_mmu_stats(s);
+	return 0;
+}
+
+static int unregister_cpu_online(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	struct cpu *c = &per_cpu(cpu_devices, cpu);
+	struct device *s = &c->dev;
+	int i;
+
+	unregister_mmu_stats(s);
+	for (i = 0; i < ARRAY_SIZE(cpu_core_attrs); i++)
+		device_remove_file(s, &cpu_core_attrs[i]);
+#endif
+	return 0;
+}
+
+static void __init check_mmu_stats(void)
+{
+	unsigned long dummy1, err;
+
+	if (tlb_type != hypervisor)
+		return;
+
+	err = sun4v_mmustat_info(&dummy1);
+	if (!err)
+		mmu_stats_supported = 1;
+}
+
+static void register_nodes(void)
+{
+#ifdef CONFIG_NUMA
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++)
+		register_one_node(i);
+#endif
+}
+
+static int __init topology_init(void)
+{
+	int cpu, ret;
+
+	register_nodes();
+
+	check_mmu_stats();
+
+	for_each_possible_cpu(cpu) {
+		struct cpu *c = &per_cpu(cpu_devices, cpu);
+
+		register_cpu(c, cpu);
+	}
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "sparc/topology:online",
+				register_cpu_online, unregister_cpu_online);
+	WARN_ON(ret < 0);
+	return 0;
+}
+
+subsys_initcall(topology_init);
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
new file mode 100644
index 0000000..bf01426
--- /dev/null
+++ b/arch/sparc/kernel/systbls.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SYSTBLS_H
+#define _SYSTBLS_H
+
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/compat.h>
+#include <linux/types.h>
+
+#include <asm/utrap.h>
+
+asmlinkage long sys_getpagesize(void);
+asmlinkage long sys_sparc_pipe(void);
+asmlinkage long sys_nis_syscall(void);
+asmlinkage long sys_getdomainname(char __user *name, int len);
+void do_rt_sigreturn(struct pt_regs *regs);
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+			 unsigned long prot, unsigned long flags,
+			 unsigned long fd, unsigned long off);
+asmlinkage void sparc_breakpoint(struct pt_regs *regs);
+
+#ifdef CONFIG_SPARC32
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+			  unsigned long prot, unsigned long flags,
+			  unsigned long fd, unsigned long pgoff);
+long sys_sparc_remap_file_pages(unsigned long start, unsigned long size,
+			    unsigned long prot, unsigned long pgoff,
+			    unsigned long flags);
+
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
+asmlinkage long sys_sparc_ipc(unsigned int call, int first,
+			      unsigned long second,
+			      unsigned long third,
+			      void __user *ptr, long fifth);
+asmlinkage long sparc64_personality(unsigned long personality);
+asmlinkage long sys64_munmap(unsigned long addr, size_t len);
+asmlinkage unsigned long sys64_mremap(unsigned long addr,
+				      unsigned long old_len,
+				      unsigned long new_len,
+				      unsigned long flags,
+				      unsigned long new_addr);
+asmlinkage long sys_utrap_install(utrap_entry_t type,
+				  utrap_handler_t new_p,
+				  utrap_handler_t new_d,
+				  utrap_handler_t __user *old_p,
+				  utrap_handler_t __user *old_d);
+asmlinkage long sys_memory_ordering(unsigned long model);
+asmlinkage void sparc64_set_context(struct pt_regs *regs);
+asmlinkage void sparc64_get_context(struct pt_regs *regs);
+asmlinkage long compat_sys_truncate64(const char __user * path,
+				 u32 high,
+				 u32 low);
+asmlinkage long compat_sys_ftruncate64(unsigned int fd,
+				  u32 high,
+				  u32 low);
+struct compat_stat64;
+asmlinkage long compat_sys_stat64(const char __user * filename,
+				  struct compat_stat64 __user *statbuf);
+asmlinkage long compat_sys_lstat64(const char __user * filename,
+				   struct compat_stat64 __user *statbuf);
+asmlinkage long compat_sys_fstat64(unsigned int fd,
+				   struct compat_stat64 __user * statbuf);
+asmlinkage long compat_sys_fstatat64(unsigned int dfd,
+				     const char __user *filename,
+				     struct compat_stat64 __user * statbuf, int flag);
+asmlinkage long compat_sys_pread64(unsigned int fd,
+					char __user *ubuf,
+					compat_size_t count,
+					u32 poshi,
+					u32 poslo);
+asmlinkage long compat_sys_pwrite64(unsigned int fd,
+					 char __user *ubuf,
+					 compat_size_t count,
+					 u32 poshi,
+					 u32 poslo);
+asmlinkage long compat_sys_readahead(int fd,
+				     unsigned offhi,
+				     unsigned offlo,
+				     compat_size_t count);
+long compat_sys_fadvise64(int fd,
+			  unsigned offhi,
+			  unsigned offlo,
+			  compat_size_t len, int advice);
+long compat_sys_fadvise64_64(int fd,
+			     unsigned offhi, unsigned offlo,
+			     unsigned lenhi, unsigned lenlo,
+			     int advice);
+long compat_sys_sync_file_range(unsigned int fd,
+			   unsigned off_high, unsigned off_low,
+			   unsigned nb_high, unsigned nb_low,
+			   unsigned int flags);
+asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
+				     u32 lenhi, u32 lenlo);
+asmlinkage long compat_sys_fstat64(unsigned int fd,
+				   struct compat_stat64 __user * statbuf);
+asmlinkage long compat_sys_fstatat64(unsigned int dfd,
+				     const char __user *filename,
+				     struct compat_stat64 __user * statbuf,
+				     int flag);
+#endif /* CONFIG_SPARC64 */
+#endif /* _SYSTBLS_H */
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
new file mode 100644
index 0000000..621a363
--- /dev/null
+++ b/arch/sparc/kernel/systbls_32.S
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* systbls.S: System call entry point tables for OS compatibility.
+ *            The native Linux system call table lives here also.
+ *
+ * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
+ *
+ * Based upon preliminary work which is:
+ *
+ * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
+ */
+
+
+	.data
+	.align 4
+
+	/* First, the Linux native syscall table. */
+
+	.globl sys_call_table
+sys_call_table:
+/*0*/	.long sys_restart_syscall, sys_exit, sys_fork, sys_read, sys_write
+/*5*/	.long sys_open, sys_close, sys_wait4, sys_creat, sys_link
+/*10*/  .long sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys_mknod
+/*15*/	.long sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, sys_lseek
+/*20*/	.long sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
+/*25*/	.long sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_pause
+/*30*/	.long sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice
+/*35*/	.long sys_chown, sys_sync, sys_kill, sys_newstat, sys_sendfile
+/*40*/	.long sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_getuid
+/*45*/	.long sys_umount, sys_setgid16, sys_getgid16, sys_signal, sys_geteuid16
+/*50*/	.long sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, sys_ioctl
+/*55*/	.long sys_reboot, sys_mmap2, sys_symlink, sys_readlink, sys_execve
+/*60*/	.long sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize
+/*65*/	.long sys_msync, sys_vfork, sys_pread64, sys_pwrite64, sys_geteuid
+/*70*/	.long sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect
+/*75*/	.long sys_madvise, sys_vhangup, sys_truncate64, sys_mincore, sys_getgroups16
+/*80*/	.long sys_setgroups16, sys_getpgrp, sys_setgroups, sys_setitimer, sys_ftruncate64
+/*85*/	.long sys_swapon, sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
+/*90*/	.long sys_dup2, sys_setfsuid, sys_fcntl, sys_select, sys_setfsgid
+/*95*/	.long sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
+/*100*/	.long sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending
+/*105*/	.long sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid
+/*110*/	.long sys_setresgid, sys_getresgid, sys_setregid, sys_recvmsg, sys_sendmsg
+/*115*/	.long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_getsockopt, sys_getcwd
+/*120*/	.long sys_readv, sys_writev, sys_settimeofday, sys_fchown16, sys_fchmod
+/*125*/	.long sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate
+/*130*/	.long sys_ftruncate, sys_flock, sys_lstat64, sys_sendto, sys_shutdown
+/*135*/	.long sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
+/*140*/	.long sys_sendfile64, sys_getpeername, sys_futex, sys_gettid, sys_getrlimit
+/*145*/	.long sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
+/*150*/	.long sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+/*155*/	.long sys_fcntl64, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount
+/*160*/	.long sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
+/*165*/	.long sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr
+/*170*/	.long sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents
+/*175*/	.long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr
+/*180*/	.long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_ni_syscall
+/*185*/	.long sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname
+/*190*/	.long sys_init_module, sys_personality, sys_sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl
+/*195*/	.long sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_sparc_sigaction, sys_sgetmask
+/*200*/	.long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_old_readdir
+/*205*/	.long sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
+/*210*/	.long sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
+/*215*/	.long sys_ipc, sys_sigreturn, sys_clone, sys_ioprio_get, sys_adjtimex
+/*220*/	.long sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
+/*225*/	.long sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
+/*230*/	.long sys_select, sys_time, sys_splice, sys_stime, sys_statfs64
+					  /* "We are the Knights of the Forest of Ni!!" */
+/*235*/	.long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
+/*240*/	.long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
+/*245*/	.long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
+/*250*/	.long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall
+/*255*/	.long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+/*260*/	.long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+/*265*/	.long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy
+/*270*/	.long sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+/*275*/	.long sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+/*280*/	.long sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+/*285*/	.long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
+/*290*/	.long sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
+/*295*/	.long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
+/*300*/	.long sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
+/*305*/	.long sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
+/*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
+/*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
+/*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
+/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
+/*330*/	.long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
+/*335*/	.long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
+/*340*/	.long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
+/*345*/	.long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
+/*350*/	.long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+/*355*/	.long sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
+/*360*/	.long sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
new file mode 100644
index 0000000..ff9389a
--- /dev/null
+++ b/arch/sparc/kernel/systbls_64.S
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* systbls.S: System call entry point tables for OS compatibility.
+ *            The native Linux system call table lives here also.
+ *
+ * Copyright (C) 1995, 1996, 2007 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *
+ * Based upon preliminary work which is:
+ *
+ * Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
+ */
+
+
+	.text
+	.align	4
+
+#ifdef CONFIG_COMPAT
+	/* First, the 32-bit Linux native syscall table. */
+
+	.globl sys_call_table32
+sys_call_table32:
+/*0*/	.word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write
+/*5*/	.word compat_sys_open, sys_close, compat_sys_wait4, sys_creat, sys_link
+/*10*/  .word sys_unlink, sunos_execv, sys_chdir, sys_chown16, sys_mknod
+/*15*/	.word sys_chmod, sys_lchown16, sys_brk, sys_nis_syscall, compat_sys_lseek
+/*20*/	.word sys_getpid, sys_capget, sys_capset, sys_setuid16, sys_getuid16
+/*25*/	.word compat_sys_vmsplice, compat_sys_ptrace, sys_alarm, compat_sys_sigaltstack, sys_pause
+/*30*/	.word compat_sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice
+	.word sys_chown, sys_sync, sys_kill, compat_sys_newstat, compat_sys_sendfile
+/*40*/	.word compat_sys_newlstat, sys_dup, sys_sparc_pipe, compat_sys_times, sys_getuid
+	.word sys_umount, sys_setgid16, sys_getgid16, sys_signal, sys_geteuid16
+/*50*/	.word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl
+	.word sys_reboot, sys32_mmap2, sys_symlink, sys_readlink, sys32_execve
+/*60*/	.word sys_umask, sys_chroot, compat_sys_newfstat, compat_sys_fstat64, sys_getpagesize
+	.word sys_msync, sys_vfork, compat_sys_pread64, compat_sys_pwrite64, sys_geteuid
+/*70*/	.word sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect
+	.word sys_madvise, sys_vhangup, compat_sys_truncate64, sys_mincore, sys_getgroups16
+/*80*/	.word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, compat_sys_ftruncate64
+	.word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid
+/*90*/	.word sys_dup2, sys_setfsuid, compat_sys_fcntl, compat_sys_select, sys_setfsgid
+	.word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
+/*100*/ .word sys_getpriority, sys32_rt_sigreturn, compat_sys_rt_sigaction, compat_sys_rt_sigprocmask, compat_sys_rt_sigpending
+	.word compat_sys_rt_sigtimedwait, compat_sys_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid
+/*110*/	.word sys_setresgid, sys_getresgid, sys_setregid, compat_sys_recvmsg, compat_sys_sendmsg
+	.word sys_getgroups, compat_sys_gettimeofday, compat_sys_getrusage, compat_sys_getsockopt, sys_getcwd
+/*120*/	.word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod
+	.word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
+/*130*/	.word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
+	.word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
+/*140*/	.word sys_sendfile64, sys_getpeername, compat_sys_futex, sys_gettid, compat_sys_getrlimit
+	.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
+/*150*/	.word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+	.word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
+/*160*/	.word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
+	.word sys_quotactl, sys_set_tid_address, compat_sys_mount, compat_sys_ustat, sys_setxattr
+/*170*/	.word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, compat_sys_getdents
+	.word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr
+/*180*/	.word sys_flistxattr, sys_removexattr, sys_lremovexattr, compat_sys_sigpending, sys_ni_syscall
+	.word sys_setpgid, sys_fremovexattr, sys_tkill, sparc_exit_group, sys_newuname
+/*190*/	.word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl
+	.word sys_epoll_wait, sys_ioprio_set, sys_getppid, compat_sys_sparc_sigaction, sys_sgetmask
+/*200*/	.word sys_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir
+	.word compat_sys_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, compat_sys_fadvise64
+/*210*/	.word compat_sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo
+	.word compat_sys_ipc, sys32_sigreturn, sys_clone, sys_ioprio_get, compat_sys_adjtimex
+/*220*/	.word compat_sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
+	.word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16
+/*230*/	.word compat_sys_select, compat_sys_time, sys_splice, compat_sys_stime, compat_sys_statfs64
+	.word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
+/*240*/	.word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
+	.word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, compat_sys_sched_rr_get_interval, compat_sys_nanosleep
+/*250*/	.word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
+	.word compat_sys_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, compat_sys_clock_nanosleep
+/*260*/	.word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_sys_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun
+	.word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy
+/*270*/	.word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, compat_sys_mq_open, sys_mq_unlink
+	.word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid
+/*280*/	.word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat
+	.word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64
+/*290*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
+	.word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare
+/*300*/	.word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages, compat_sys_mbind, compat_sys_get_mempolicy
+	.word compat_sys_set_mempolicy, compat_sys_kexec_load, compat_sys_move_pages, sys_getcpu, compat_sys_epoll_pwait
+/*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
+	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
+	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
+/*330*/	.word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
+	.word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev
+/*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
+	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
+/*350*/	.word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+	.word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2
+/*360*/	.word sys_statx, compat_sys_io_pgetevents
+
+#endif /* CONFIG_COMPAT */
+
+	/* Now the 64-bit native Linux syscall table. */
+
+	.align	4
+	.globl sys_call_table64, sys_call_table
+sys_call_table64:
+sys_call_table:
+/*0*/	.word sys_restart_syscall, sparc_exit, sys_fork, sys_read, sys_write
+/*5*/	.word sys_open, sys_close, sys_wait4, sys_creat, sys_link
+/*10*/  .word sys_unlink, sys_nis_syscall, sys_chdir, sys_chown, sys_mknod
+/*15*/	.word sys_chmod, sys_lchown, sys_brk, sys_nis_syscall, sys_lseek
+/*20*/	.word sys_getpid, sys_capget, sys_capset, sys_setuid, sys_getuid
+/*25*/	.word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall
+/*30*/	.word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice
+	.word sys_nis_syscall, sys_sync, sys_kill, sys_newstat, sys_sendfile64
+/*40*/	.word sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_nis_syscall
+	.word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid
+/*50*/	.word sys_getegid, sys_acct, sys_memory_ordering, sys_nis_syscall, sys_ioctl
+	.word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys64_execve
+/*60*/	.word sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize
+	.word sys_msync, sys_vfork, sys_pread64, sys_pwrite64, sys_nis_syscall
+/*70*/	.word sys_nis_syscall, sys_mmap, sys_nis_syscall, sys_64_munmap, sys_mprotect
+	.word sys_madvise, sys_vhangup, sys_nis_syscall, sys_mincore, sys_getgroups
+/*80*/	.word sys_setgroups, sys_getpgrp, sys_nis_syscall, sys_setitimer, sys_nis_syscall
+	.word sys_swapon, sys_getitimer, sys_nis_syscall, sys_sethostname, sys_nis_syscall
+/*90*/	.word sys_dup2, sys_nis_syscall, sys_fcntl, sys_select, sys_nis_syscall
+	.word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept
+/*100*/	.word sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending
+	.word sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid
+/*110*/	.word sys_setresgid, sys_getresgid, sys_nis_syscall, sys_recvmsg, sys_sendmsg
+	.word sys_nis_syscall, sys_gettimeofday, sys_getrusage, sys_getsockopt, sys_getcwd
+/*120*/	.word sys_readv, sys_writev, sys_settimeofday, sys_fchown, sys_fchmod
+	.word sys_recvfrom, sys_setreuid, sys_setregid, sys_rename, sys_truncate
+/*130*/	.word sys_ftruncate, sys_flock, sys_lstat64, sys_sendto, sys_shutdown
+	.word sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64
+/*140*/	.word sys_sendfile64, sys_getpeername, sys_futex, sys_gettid, sys_getrlimit
+	.word sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
+/*150*/	.word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+	.word sys_nis_syscall, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount
+/*160*/	.word sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_utrap_install
+	.word sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr
+/*170*/	.word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents
+	.word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr
+/*180*/	.word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_nis_syscall, sys_ni_syscall
+	.word sys_setpgid, sys_fremovexattr, sys_tkill, sparc_exit_group, sys_newuname
+/*190*/	.word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl
+	.word sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_nis_syscall, sys_sgetmask
+/*200*/	.word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall
+	.word sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64
+/*210*/	.word sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo
+	.word sys_sparc_ipc, sys_nis_syscall, sys_clone, sys_ioprio_get, sys_adjtimex
+/*220*/	.word sys_nis_syscall, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid
+	.word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid, sys_setfsgid
+/*230*/	.word sys_select, sys_nis_syscall, sys_splice, sys_stime, sys_statfs64
+	.word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall
+/*240*/	.word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler
+	.word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep
+/*250*/	.word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall
+	.word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep
+/*260*/	.word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun
+	.word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy
+/*270*/	.word sys_io_submit, sys_io_cancel, sys_io_getevents, sys_mq_open, sys_mq_unlink
+	.word sys_mq_timedsend, sys_mq_timedreceive, sys_mq_notify, sys_mq_getsetattr, sys_waitid
+/*280*/	.word sys_tee, sys_add_key, sys_request_key, sys_keyctl, sys_openat
+	.word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
+/*290*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
+	.word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
+/*300*/	.word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages, sys_mbind, sys_get_mempolicy
+	.word sys_set_mempolicy, sys_kexec_load, sys_move_pages, sys_getcpu, sys_epoll_pwait
+/*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
+	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
+/*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
+	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
+/*330*/	.word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
+	.word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev
+/*340*/	.word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
+	.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
+/*350*/	.word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
+	.word sys_setsockopt, sys_mlock2, sys_copy_file_range, sys_preadv2, sys_pwritev2
+/*360*/	.word sys_statx, sys_io_pgetevents
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
new file mode 100644
index 0000000..8a08830
--- /dev/null
+++ b/arch/sparc/kernel/time_32.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/* linux/arch/sparc/kernel/time.c
+ *
+ * Copyright (C) 1995 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
+ *
+ * Chris Davis (cdavis@cois.on.ca) 03/27/1998
+ * Added support for the intersil on the sun4/4200
+ *
+ * Gleb Raiko (rajko@mech.math.msu.su) 08/18/1998
+ * Support for MicroSPARC-IIep, PCI CPU.
+ *
+ * This file handles the Sparc specific time handling details.
+ *
+ * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
+ *		"A Kernel Model for Precision Timekeeping" by Dave Mills
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/rtc/m48t59.h>
+#include <linux/timex.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/profile.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <asm/mc146818rtc.h>
+#include <asm/oplib.h>
+#include <asm/timex.h>
+#include <asm/timer.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/idprom.h>
+#include <asm/page.h>
+#include <asm/pcic.h>
+#include <asm/irq_regs.h>
+#include <asm/setup.h>
+
+#include "kernel.h"
+#include "irq.h"
+
+static __cacheline_aligned_in_smp DEFINE_SEQLOCK(timer_cs_lock);
+static __volatile__ u64 timer_cs_internal_counter = 0;
+static char timer_cs_enabled = 0;
+
+static struct clock_event_device timer_ce;
+static char timer_ce_enabled = 0;
+
+#ifdef CONFIG_SMP
+DEFINE_PER_CPU(struct clock_event_device, sparc32_clockevent);
+#endif
+
+DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
+
+unsigned long profile_pc(struct pt_regs *regs)
+{
+	extern char __copy_user_begin[], __copy_user_end[];
+	extern char __bzero_begin[], __bzero_end[];
+
+	unsigned long pc = regs->pc;
+
+	if (in_lock_functions(pc) ||
+	    (pc >= (unsigned long) __copy_user_begin &&
+	     pc < (unsigned long) __copy_user_end) ||
+	    (pc >= (unsigned long) __bzero_begin &&
+	     pc < (unsigned long) __bzero_end))
+		pc = regs->u_regs[UREG_RETPC];
+	return pc;
+}
+
+EXPORT_SYMBOL(profile_pc);
+
+volatile u32 __iomem *master_l10_counter;
+
+irqreturn_t notrace timer_interrupt(int dummy, void *dev_id)
+{
+	if (timer_cs_enabled) {
+		write_seqlock(&timer_cs_lock);
+		timer_cs_internal_counter++;
+		sparc_config.clear_clock_irq();
+		write_sequnlock(&timer_cs_lock);
+	} else {
+		sparc_config.clear_clock_irq();
+	}
+
+	if (timer_ce_enabled)
+		timer_ce.event_handler(&timer_ce);
+
+	return IRQ_HANDLED;
+}
+
+static int timer_ce_shutdown(struct clock_event_device *evt)
+{
+	timer_ce_enabled = 0;
+	smp_mb();
+	return 0;
+}
+
+static int timer_ce_set_periodic(struct clock_event_device *evt)
+{
+	timer_ce_enabled = 1;
+	smp_mb();
+	return 0;
+}
+
+static __init void setup_timer_ce(void)
+{
+	struct clock_event_device *ce = &timer_ce;
+
+	BUG_ON(smp_processor_id() != boot_cpu_id);
+
+	ce->name     = "timer_ce";
+	ce->rating   = 100;
+	ce->features = CLOCK_EVT_FEAT_PERIODIC;
+	ce->set_state_shutdown = timer_ce_shutdown;
+	ce->set_state_periodic = timer_ce_set_periodic;
+	ce->tick_resume = timer_ce_set_periodic;
+	ce->cpumask  = cpu_possible_mask;
+	ce->shift    = 32;
+	ce->mult     = div_sc(sparc_config.clock_rate, NSEC_PER_SEC,
+	                      ce->shift);
+	clockevents_register_device(ce);
+}
+
+static unsigned int sbus_cycles_offset(void)
+{
+	u32 val, offset;
+
+	val = sbus_readl(master_l10_counter);
+	offset = (val >> TIMER_VALUE_SHIFT) & TIMER_VALUE_MASK;
+
+	/* Limit hit? */
+	if (val & TIMER_LIMIT_BIT)
+		offset += sparc_config.cs_period;
+
+	return offset;
+}
+
+static u64 timer_cs_read(struct clocksource *cs)
+{
+	unsigned int seq, offset;
+	u64 cycles;
+
+	do {
+		seq = read_seqbegin(&timer_cs_lock);
+
+		cycles = timer_cs_internal_counter;
+		offset = sparc_config.get_cycles_offset();
+	} while (read_seqretry(&timer_cs_lock, seq));
+
+	/* Count absolute cycles */
+	cycles *= sparc_config.cs_period;
+	cycles += offset;
+
+	return cycles;
+}
+
+static struct clocksource timer_cs = {
+	.name	= "timer_cs",
+	.rating	= 100,
+	.read	= timer_cs_read,
+	.mask	= CLOCKSOURCE_MASK(64),
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static __init int setup_timer_cs(void)
+{
+	timer_cs_enabled = 1;
+	return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
+}
+
+#ifdef CONFIG_SMP
+static int percpu_ce_shutdown(struct clock_event_device *evt)
+{
+	int cpu = cpumask_first(evt->cpumask);
+
+	sparc_config.load_profile_irq(cpu, 0);
+	return 0;
+}
+
+static int percpu_ce_set_periodic(struct clock_event_device *evt)
+{
+	int cpu = cpumask_first(evt->cpumask);
+
+	sparc_config.load_profile_irq(cpu, SBUS_CLOCK_RATE / HZ);
+	return 0;
+}
+
+static int percpu_ce_set_next_event(unsigned long delta,
+				    struct clock_event_device *evt)
+{
+	int cpu = cpumask_first(evt->cpumask);
+	unsigned int next = (unsigned int)delta;
+
+	sparc_config.load_profile_irq(cpu, next);
+	return 0;
+}
+
+void register_percpu_ce(int cpu)
+{
+	struct clock_event_device *ce = &per_cpu(sparc32_clockevent, cpu);
+	unsigned int features = CLOCK_EVT_FEAT_PERIODIC;
+
+	if (sparc_config.features & FEAT_L14_ONESHOT)
+		features |= CLOCK_EVT_FEAT_ONESHOT;
+
+	ce->name           = "percpu_ce";
+	ce->rating         = 200;
+	ce->features       = features;
+	ce->set_state_shutdown = percpu_ce_shutdown;
+	ce->set_state_periodic = percpu_ce_set_periodic;
+	ce->set_state_oneshot = percpu_ce_shutdown;
+	ce->set_next_event = percpu_ce_set_next_event;
+	ce->cpumask        = cpumask_of(cpu);
+	ce->shift          = 32;
+	ce->mult           = div_sc(sparc_config.clock_rate, NSEC_PER_SEC,
+	                            ce->shift);
+	ce->max_delta_ns   = clockevent_delta2ns(sparc_config.clock_rate, ce);
+	ce->max_delta_ticks = (unsigned long)sparc_config.clock_rate;
+	ce->min_delta_ns   = clockevent_delta2ns(100, ce);
+	ce->min_delta_ticks = 100;
+
+	clockevents_register_device(ce);
+}
+#endif
+
+static unsigned char mostek_read_byte(struct device *dev, u32 ofs)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct m48t59_plat_data *pdata = pdev->dev.platform_data;
+
+	return readb(pdata->ioaddr + ofs);
+}
+
+static void mostek_write_byte(struct device *dev, u32 ofs, u8 val)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct m48t59_plat_data *pdata = pdev->dev.platform_data;
+
+	writeb(val, pdata->ioaddr + ofs);
+}
+
+static struct m48t59_plat_data m48t59_data = {
+	.read_byte = mostek_read_byte,
+	.write_byte = mostek_write_byte,
+};
+
+/* resource is set at runtime */
+static struct platform_device m48t59_rtc = {
+	.name		= "rtc-m48t59",
+	.id		= 0,
+	.num_resources	= 1,
+	.dev	= {
+		.platform_data = &m48t59_data,
+	},
+};
+
+static int clock_probe(struct platform_device *op)
+{
+	struct device_node *dp = op->dev.of_node;
+	const char *model = of_get_property(dp, "model", NULL);
+
+	if (!model)
+		return -ENODEV;
+
+	/* Only the primary RTC has an address property */
+	if (!of_find_property(dp, "address", NULL))
+		return -ENODEV;
+
+	m48t59_rtc.resource = &op->resource[0];
+	if (!strcmp(model, "mk48t02")) {
+		/* Map the clock register io area read-only */
+		m48t59_data.ioaddr = of_ioremap(&op->resource[0], 0,
+						2048, "rtc-m48t59");
+		m48t59_data.type = M48T59RTC_TYPE_M48T02;
+	} else if (!strcmp(model, "mk48t08")) {
+		m48t59_data.ioaddr = of_ioremap(&op->resource[0], 0,
+						8192, "rtc-m48t59");
+		m48t59_data.type = M48T59RTC_TYPE_M48T08;
+	} else
+		return -ENODEV;
+
+	if (platform_device_register(&m48t59_rtc) < 0)
+		printk(KERN_ERR "Registering RTC device failed\n");
+
+	return 0;
+}
+
+static const struct of_device_id clock_match[] = {
+	{
+		.name = "eeprom",
+	},
+	{},
+};
+
+static struct platform_driver clock_driver = {
+	.probe		= clock_probe,
+	.driver = {
+		.name = "rtc",
+		.of_match_table = clock_match,
+	},
+};
+
+
+/* Probe for the mostek real time clock chip. */
+static int __init clock_init(void)
+{
+	return platform_driver_register(&clock_driver);
+}
+/* Must be after subsys_initcall() so that busses are probed.  Must
+ * be before device_initcall() because things like the RTC driver
+ * need to see the clock registers.
+ */
+fs_initcall(clock_init);
+
+static void __init sparc32_late_time_init(void)
+{
+	if (sparc_config.features & FEAT_L10_CLOCKEVENT)
+		setup_timer_ce();
+	if (sparc_config.features & FEAT_L10_CLOCKSOURCE)
+		setup_timer_cs();
+#ifdef CONFIG_SMP
+	register_percpu_ce(smp_processor_id());
+#endif
+}
+
+static void __init sbus_time_init(void)
+{
+	sparc_config.get_cycles_offset = sbus_cycles_offset;
+	sparc_config.init_timers();
+}
+
+void __init time_init(void)
+{
+	sparc_config.features = 0;
+	late_time_init = sparc32_late_time_init;
+
+	if (pcic_present())
+		pci_time_init();
+	else
+		sbus_time_init();
+}
+
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
new file mode 100644
index 0000000..f0eba72
--- /dev/null
+++ b/arch/sparc/kernel/time_64.c
@@ -0,0 +1,898 @@
+// SPDX-License-Identifier: GPL-2.0
+/* time.c: UltraSparc timer and TOD clock support.
+ *
+ * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1998 Eddie C. Dost   (ecd@skynet.be)
+ *
+ * Based largely on code which is:
+ *
+ * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/mc146818rtc.h>
+#include <linux/delay.h>
+#include <linux/profile.h>
+#include <linux/bcd.h>
+#include <linux/jiffies.h>
+#include <linux/cpufreq.h>
+#include <linux/percpu.h>
+#include <linux/rtc/m48t59.h>
+#include <linux/kernel_stat.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/platform_device.h>
+#include <linux/ftrace.h>
+
+#include <asm/oplib.h>
+#include <asm/timer.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/starfire.h>
+#include <asm/smp.h>
+#include <asm/sections.h>
+#include <asm/cpudata.h>
+#include <linux/uaccess.h>
+#include <asm/irq_regs.h>
+#include <asm/cacheflush.h>
+
+#include "entry.h"
+#include "kernel.h"
+
+DEFINE_SPINLOCK(rtc_lock);
+
+unsigned int __read_mostly vdso_fix_stick;
+
+#ifdef CONFIG_SMP
+unsigned long profile_pc(struct pt_regs *regs)
+{
+	unsigned long pc = instruction_pointer(regs);
+
+	if (in_lock_functions(pc))
+		return regs->u_regs[UREG_RETPC];
+	return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+#endif
+
+static void tick_disable_protection(void)
+{
+	/* Set things up so user can access tick register for profiling
+	 * purposes.  Also workaround BB_ERRATA_1 by doing a dummy
+	 * read back of %tick after writing it.
+	 */
+	__asm__ __volatile__(
+	"	ba,pt	%%xcc, 1f\n"
+	"	 nop\n"
+	"	.align	64\n"
+	"1:	rd	%%tick, %%g2\n"
+	"	add	%%g2, 6, %%g2\n"
+	"	andn	%%g2, %0, %%g2\n"
+	"	wrpr	%%g2, 0, %%tick\n"
+	"	rdpr	%%tick, %%g0"
+	: /* no outputs */
+	: "r" (TICK_PRIV_BIT)
+	: "g2");
+}
+
+static void tick_disable_irq(void)
+{
+	__asm__ __volatile__(
+	"	ba,pt	%%xcc, 1f\n"
+	"	 nop\n"
+	"	.align	64\n"
+	"1:	wr	%0, 0x0, %%tick_cmpr\n"
+	"	rd	%%tick_cmpr, %%g0"
+	: /* no outputs */
+	: "r" (TICKCMP_IRQ_BIT));
+}
+
+static void tick_init_tick(void)
+{
+	tick_disable_protection();
+	tick_disable_irq();
+}
+
+static unsigned long long tick_get_tick(void)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__("rd	%%tick, %0\n\t"
+			     "mov	%0, %0"
+			     : "=r" (ret));
+
+	return ret & ~TICK_PRIV_BIT;
+}
+
+static int tick_add_compare(unsigned long adj)
+{
+	unsigned long orig_tick, new_tick, new_compare;
+
+	__asm__ __volatile__("rd	%%tick, %0"
+			     : "=r" (orig_tick));
+
+	orig_tick &= ~TICKCMP_IRQ_BIT;
+
+	/* Workaround for Spitfire Errata (#54 I think??), I discovered
+	 * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
+	 * number 103640.
+	 *
+	 * On Blackbird writes to %tick_cmpr can fail, the
+	 * workaround seems to be to execute the wr instruction
+	 * at the start of an I-cache line, and perform a dummy
+	 * read back from %tick_cmpr right after writing to it. -DaveM
+	 */
+	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t"
+			     " add	%1, %2, %0\n\t"
+			     ".align	64\n"
+			     "1:\n\t"
+			     "wr	%0, 0, %%tick_cmpr\n\t"
+			     "rd	%%tick_cmpr, %%g0\n\t"
+			     : "=r" (new_compare)
+			     : "r" (orig_tick), "r" (adj));
+
+	__asm__ __volatile__("rd	%%tick, %0"
+			     : "=r" (new_tick));
+	new_tick &= ~TICKCMP_IRQ_BIT;
+
+	return ((long)(new_tick - (orig_tick+adj))) > 0L;
+}
+
+static unsigned long tick_add_tick(unsigned long adj)
+{
+	unsigned long new_tick;
+
+	/* Also need to handle Blackbird bug here too. */
+	__asm__ __volatile__("rd	%%tick, %0\n\t"
+			     "add	%0, %1, %0\n\t"
+			     "wrpr	%0, 0, %%tick\n\t"
+			     : "=&r" (new_tick)
+			     : "r" (adj));
+
+	return new_tick;
+}
+
+/* Searches for cpu clock frequency with given cpuid in OpenBoot tree */
+static unsigned long cpuid_to_freq(phandle node, int cpuid)
+{
+	bool is_cpu_node = false;
+	unsigned long freq = 0;
+	char type[128];
+
+	if (!node)
+		return freq;
+
+	if (prom_getproperty(node, "device_type", type, sizeof(type)) != -1)
+		is_cpu_node = (strcmp(type, "cpu") == 0);
+
+	/* try upa-portid then cpuid to get cpuid, see prom_64.c */
+	if (is_cpu_node && (prom_getint(node, "upa-portid") == cpuid ||
+			    prom_getint(node, "cpuid") == cpuid))
+		freq = prom_getintdefault(node, "clock-frequency", 0);
+	if (!freq)
+		freq = cpuid_to_freq(prom_getchild(node), cpuid);
+	if (!freq)
+		freq = cpuid_to_freq(prom_getsibling(node), cpuid);
+
+	return freq;
+}
+
+static unsigned long tick_get_frequency(void)
+{
+	return cpuid_to_freq(prom_root_node, hard_smp_processor_id());
+}
+
+static struct sparc64_tick_ops tick_operations __cacheline_aligned = {
+	.name		=	"tick",
+	.init_tick	=	tick_init_tick,
+	.disable_irq	=	tick_disable_irq,
+	.get_tick	=	tick_get_tick,
+	.add_tick	=	tick_add_tick,
+	.add_compare	=	tick_add_compare,
+	.get_frequency	=	tick_get_frequency,
+	.softint_mask	=	1UL << 0,
+};
+
+struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations;
+EXPORT_SYMBOL(tick_ops);
+
+static void stick_disable_irq(void)
+{
+	__asm__ __volatile__(
+	"wr	%0, 0x0, %%asr25"
+	: /* no outputs */
+	: "r" (TICKCMP_IRQ_BIT));
+}
+
+static void stick_init_tick(void)
+{
+	/* Writes to the %tick and %stick register are not
+	 * allowed on sun4v.  The Hypervisor controls that
+	 * bit, per-strand.
+	 */
+	if (tlb_type != hypervisor) {
+		tick_disable_protection();
+		tick_disable_irq();
+
+		/* Let the user get at STICK too. */
+		__asm__ __volatile__(
+		"	rd	%%asr24, %%g2\n"
+		"	andn	%%g2, %0, %%g2\n"
+		"	wr	%%g2, 0, %%asr24"
+		: /* no outputs */
+		: "r" (TICK_PRIV_BIT)
+		: "g1", "g2");
+	}
+
+	stick_disable_irq();
+}
+
+static unsigned long long stick_get_tick(void)
+{
+	unsigned long ret;
+
+	__asm__ __volatile__("rd	%%asr24, %0"
+			     : "=r" (ret));
+
+	return ret & ~TICK_PRIV_BIT;
+}
+
+static unsigned long stick_add_tick(unsigned long adj)
+{
+	unsigned long new_tick;
+
+	__asm__ __volatile__("rd	%%asr24, %0\n\t"
+			     "add	%0, %1, %0\n\t"
+			     "wr	%0, 0, %%asr24\n\t"
+			     : "=&r" (new_tick)
+			     : "r" (adj));
+
+	return new_tick;
+}
+
+static int stick_add_compare(unsigned long adj)
+{
+	unsigned long orig_tick, new_tick;
+
+	__asm__ __volatile__("rd	%%asr24, %0"
+			     : "=r" (orig_tick));
+	orig_tick &= ~TICKCMP_IRQ_BIT;
+
+	__asm__ __volatile__("wr	%0, 0, %%asr25"
+			     : /* no outputs */
+			     : "r" (orig_tick + adj));
+
+	__asm__ __volatile__("rd	%%asr24, %0"
+			     : "=r" (new_tick));
+	new_tick &= ~TICKCMP_IRQ_BIT;
+
+	return ((long)(new_tick - (orig_tick+adj))) > 0L;
+}
+
+static unsigned long stick_get_frequency(void)
+{
+	return prom_getintdefault(prom_root_node, "stick-frequency", 0);
+}
+
+static struct sparc64_tick_ops stick_operations __read_mostly = {
+	.name		=	"stick",
+	.init_tick	=	stick_init_tick,
+	.disable_irq	=	stick_disable_irq,
+	.get_tick	=	stick_get_tick,
+	.add_tick	=	stick_add_tick,
+	.add_compare	=	stick_add_compare,
+	.get_frequency	=	stick_get_frequency,
+	.softint_mask	=	1UL << 16,
+};
+
+/* On Hummingbird the STICK/STICK_CMPR register is implemented
+ * in I/O space.  There are two 64-bit registers each, the
+ * first holds the low 32-bits of the value and the second holds
+ * the high 32-bits.
+ *
+ * Since STICK is constantly updating, we have to access it carefully.
+ *
+ * The sequence we use to read is:
+ * 1) read high
+ * 2) read low
+ * 3) read high again, if it rolled re-read both low and high again.
+ *
+ * Writing STICK safely is also tricky:
+ * 1) write low to zero
+ * 2) write high
+ * 3) write low
+ */
+static unsigned long __hbird_read_stick(void)
+{
+	unsigned long ret, tmp1, tmp2, tmp3;
+	unsigned long addr = HBIRD_STICK_ADDR+8;
+
+	__asm__ __volatile__("ldxa	[%1] %5, %2\n"
+			     "1:\n\t"
+			     "sub	%1, 0x8, %1\n\t"
+			     "ldxa	[%1] %5, %3\n\t"
+			     "add	%1, 0x8, %1\n\t"
+			     "ldxa	[%1] %5, %4\n\t"
+			     "cmp	%4, %2\n\t"
+			     "bne,a,pn	%%xcc, 1b\n\t"
+			     " mov	%4, %2\n\t"
+			     "sllx	%4, 32, %4\n\t"
+			     "or	%3, %4, %0\n\t"
+			     : "=&r" (ret), "=&r" (addr),
+			       "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3)
+			     : "i" (ASI_PHYS_BYPASS_EC_E), "1" (addr));
+
+	return ret;
+}
+
+static void __hbird_write_stick(unsigned long val)
+{
+	unsigned long low = (val & 0xffffffffUL);
+	unsigned long high = (val >> 32UL);
+	unsigned long addr = HBIRD_STICK_ADDR;
+
+	__asm__ __volatile__("stxa	%%g0, [%0] %4\n\t"
+			     "add	%0, 0x8, %0\n\t"
+			     "stxa	%3, [%0] %4\n\t"
+			     "sub	%0, 0x8, %0\n\t"
+			     "stxa	%2, [%0] %4"
+			     : "=&r" (addr)
+			     : "0" (addr), "r" (low), "r" (high),
+			       "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+static void __hbird_write_compare(unsigned long val)
+{
+	unsigned long low = (val & 0xffffffffUL);
+	unsigned long high = (val >> 32UL);
+	unsigned long addr = HBIRD_STICKCMP_ADDR + 0x8UL;
+
+	__asm__ __volatile__("stxa	%3, [%0] %4\n\t"
+			     "sub	%0, 0x8, %0\n\t"
+			     "stxa	%2, [%0] %4"
+			     : "=&r" (addr)
+			     : "0" (addr), "r" (low), "r" (high),
+			       "i" (ASI_PHYS_BYPASS_EC_E));
+}
+
+static void hbtick_disable_irq(void)
+{
+	__hbird_write_compare(TICKCMP_IRQ_BIT);
+}
+
+static void hbtick_init_tick(void)
+{
+	tick_disable_protection();
+
+	/* XXX This seems to be necessary to 'jumpstart' Hummingbird
+	 * XXX into actually sending STICK interrupts.  I think because
+	 * XXX of how we store %tick_cmpr in head.S this somehow resets the
+	 * XXX {TICK + STICK} interrupt mux.  -DaveM
+	 */
+	__hbird_write_stick(__hbird_read_stick());
+
+	hbtick_disable_irq();
+}
+
+static unsigned long long hbtick_get_tick(void)
+{
+	return __hbird_read_stick() & ~TICK_PRIV_BIT;
+}
+
+static unsigned long hbtick_add_tick(unsigned long adj)
+{
+	unsigned long val;
+
+	val = __hbird_read_stick() + adj;
+	__hbird_write_stick(val);
+
+	return val;
+}
+
+static int hbtick_add_compare(unsigned long adj)
+{
+	unsigned long val = __hbird_read_stick();
+	unsigned long val2;
+
+	val &= ~TICKCMP_IRQ_BIT;
+	val += adj;
+	__hbird_write_compare(val);
+
+	val2 = __hbird_read_stick() & ~TICKCMP_IRQ_BIT;
+
+	return ((long)(val2 - val)) > 0L;
+}
+
+static unsigned long hbtick_get_frequency(void)
+{
+	return prom_getintdefault(prom_root_node, "stick-frequency", 0);
+}
+
+static struct sparc64_tick_ops hbtick_operations __read_mostly = {
+	.name		=	"hbtick",
+	.init_tick	=	hbtick_init_tick,
+	.disable_irq	=	hbtick_disable_irq,
+	.get_tick	=	hbtick_get_tick,
+	.add_tick	=	hbtick_add_tick,
+	.add_compare	=	hbtick_add_compare,
+	.get_frequency	=	hbtick_get_frequency,
+	.softint_mask	=	1UL << 0,
+};
+
+unsigned long cmos_regs;
+EXPORT_SYMBOL(cmos_regs);
+
+static struct resource rtc_cmos_resource;
+
+static struct platform_device rtc_cmos_device = {
+	.name		= "rtc_cmos",
+	.id		= -1,
+	.resource	= &rtc_cmos_resource,
+	.num_resources	= 1,
+};
+
+static int rtc_probe(struct platform_device *op)
+{
+	struct resource *r;
+
+	printk(KERN_INFO "%s: RTC regs at 0x%llx\n",
+	       op->dev.of_node->full_name, op->resource[0].start);
+
+	/* The CMOS RTC driver only accepts IORESOURCE_IO, so cons
+	 * up a fake resource so that the probe works for all cases.
+	 * When the RTC is behind an ISA bus it will have IORESOURCE_IO
+	 * already, whereas when it's behind EBUS is will be IORESOURCE_MEM.
+	 */
+
+	r = &rtc_cmos_resource;
+	r->flags = IORESOURCE_IO;
+	r->name = op->resource[0].name;
+	r->start = op->resource[0].start;
+	r->end = op->resource[0].end;
+
+	cmos_regs = op->resource[0].start;
+	return platform_device_register(&rtc_cmos_device);
+}
+
+static const struct of_device_id rtc_match[] = {
+	{
+		.name = "rtc",
+		.compatible = "m5819",
+	},
+	{
+		.name = "rtc",
+		.compatible = "isa-m5819p",
+	},
+	{
+		.name = "rtc",
+		.compatible = "isa-m5823p",
+	},
+	{
+		.name = "rtc",
+		.compatible = "ds1287",
+	},
+	{},
+};
+
+static struct platform_driver rtc_driver = {
+	.probe		= rtc_probe,
+	.driver = {
+		.name = "rtc",
+		.of_match_table = rtc_match,
+	},
+};
+
+static struct platform_device rtc_bq4802_device = {
+	.name		= "rtc-bq4802",
+	.id		= -1,
+	.num_resources	= 1,
+};
+
+static int bq4802_probe(struct platform_device *op)
+{
+
+	printk(KERN_INFO "%s: BQ4802 regs at 0x%llx\n",
+	       op->dev.of_node->full_name, op->resource[0].start);
+
+	rtc_bq4802_device.resource = &op->resource[0];
+	return platform_device_register(&rtc_bq4802_device);
+}
+
+static const struct of_device_id bq4802_match[] = {
+	{
+		.name = "rtc",
+		.compatible = "bq4802",
+	},
+	{},
+};
+
+static struct platform_driver bq4802_driver = {
+	.probe		= bq4802_probe,
+	.driver = {
+		.name = "bq4802",
+		.of_match_table = bq4802_match,
+	},
+};
+
+static unsigned char mostek_read_byte(struct device *dev, u32 ofs)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	void __iomem *regs = (void __iomem *) pdev->resource[0].start;
+
+	return readb(regs + ofs);
+}
+
+static void mostek_write_byte(struct device *dev, u32 ofs, u8 val)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	void __iomem *regs = (void __iomem *) pdev->resource[0].start;
+
+	writeb(val, regs + ofs);
+}
+
+static struct m48t59_plat_data m48t59_data = {
+	.read_byte	= mostek_read_byte,
+	.write_byte	= mostek_write_byte,
+};
+
+static struct platform_device m48t59_rtc = {
+	.name		= "rtc-m48t59",
+	.id		= 0,
+	.num_resources	= 1,
+	.dev	= {
+		.platform_data = &m48t59_data,
+	},
+};
+
+static int mostek_probe(struct platform_device *op)
+{
+	struct device_node *dp = op->dev.of_node;
+
+	/* On an Enterprise system there can be multiple mostek clocks.
+	 * We should only match the one that is on the central FHC bus.
+	 */
+	if (!strcmp(dp->parent->name, "fhc") &&
+	    strcmp(dp->parent->parent->name, "central") != 0)
+		return -ENODEV;
+
+	printk(KERN_INFO "%s: Mostek regs at 0x%llx\n",
+	       dp->full_name, op->resource[0].start);
+
+	m48t59_rtc.resource = &op->resource[0];
+	return platform_device_register(&m48t59_rtc);
+}
+
+static const struct of_device_id mostek_match[] = {
+	{
+		.name = "eeprom",
+	},
+	{},
+};
+
+static struct platform_driver mostek_driver = {
+	.probe		= mostek_probe,
+	.driver = {
+		.name = "mostek",
+		.of_match_table = mostek_match,
+	},
+};
+
+static struct platform_device rtc_sun4v_device = {
+	.name		= "rtc-sun4v",
+	.id		= -1,
+};
+
+static struct platform_device rtc_starfire_device = {
+	.name		= "rtc-starfire",
+	.id		= -1,
+};
+
+static int __init clock_init(void)
+{
+	if (this_is_starfire)
+		return platform_device_register(&rtc_starfire_device);
+
+	if (tlb_type == hypervisor)
+		return platform_device_register(&rtc_sun4v_device);
+
+	(void) platform_driver_register(&rtc_driver);
+	(void) platform_driver_register(&mostek_driver);
+	(void) platform_driver_register(&bq4802_driver);
+
+	return 0;
+}
+
+/* Must be after subsys_initcall() so that busses are probed.  Must
+ * be before device_initcall() because things like the RTC driver
+ * need to see the clock registers.
+ */
+fs_initcall(clock_init);
+
+/* Return true if this is Hummingbird, aka Ultra-IIe */
+static bool is_hummingbird(void)
+{
+	unsigned long ver, manuf, impl;
+
+	__asm__ __volatile__ ("rdpr %%ver, %0"
+			      : "=&r" (ver));
+	manuf = ((ver >> 48) & 0xffff);
+	impl = ((ver >> 32) & 0xffff);
+
+	return (manuf == 0x17 && impl == 0x13);
+}
+
+struct freq_table {
+	unsigned long clock_tick_ref;
+	unsigned int ref_freq;
+};
+static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 };
+
+unsigned long sparc64_get_clock_tick(unsigned int cpu)
+{
+	struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
+
+	if (ft->clock_tick_ref)
+		return ft->clock_tick_ref;
+	return cpu_data(cpu).clock_tick;
+}
+EXPORT_SYMBOL(sparc64_get_clock_tick);
+
+#ifdef CONFIG_CPU_FREQ
+
+static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+				    void *data)
+{
+	struct cpufreq_freqs *freq = data;
+	unsigned int cpu = freq->cpu;
+	struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
+
+	if (!ft->ref_freq) {
+		ft->ref_freq = freq->old;
+		ft->clock_tick_ref = cpu_data(cpu).clock_tick;
+	}
+	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
+	    (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+		cpu_data(cpu).clock_tick =
+			cpufreq_scale(ft->clock_tick_ref,
+				      ft->ref_freq,
+				      freq->new);
+	}
+
+	return 0;
+}
+
+static struct notifier_block sparc64_cpufreq_notifier_block = {
+	.notifier_call	= sparc64_cpufreq_notifier
+};
+
+static int __init register_sparc64_cpufreq_notifier(void)
+{
+
+	cpufreq_register_notifier(&sparc64_cpufreq_notifier_block,
+				  CPUFREQ_TRANSITION_NOTIFIER);
+	return 0;
+}
+
+core_initcall(register_sparc64_cpufreq_notifier);
+
+#endif /* CONFIG_CPU_FREQ */
+
+static int sparc64_next_event(unsigned long delta,
+			      struct clock_event_device *evt)
+{
+	return tick_operations.add_compare(delta) ? -ETIME : 0;
+}
+
+static int sparc64_timer_shutdown(struct clock_event_device *evt)
+{
+	tick_operations.disable_irq();
+	return 0;
+}
+
+static struct clock_event_device sparc64_clockevent = {
+	.features		= CLOCK_EVT_FEAT_ONESHOT,
+	.set_state_shutdown	= sparc64_timer_shutdown,
+	.set_next_event		= sparc64_next_event,
+	.rating			= 100,
+	.shift			= 30,
+	.irq			= -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, sparc64_events);
+
+void __irq_entry timer_interrupt(int irq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	unsigned long tick_mask = tick_operations.softint_mask;
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(sparc64_events, cpu);
+
+	clear_softint(tick_mask);
+
+	irq_enter();
+
+	local_cpu_data().irq0_irqs++;
+	kstat_incr_irq_this_cpu(0);
+
+	if (unlikely(!evt->event_handler)) {
+		printk(KERN_WARNING
+		       "Spurious SPARC64 timer interrupt on cpu %d\n", cpu);
+	} else
+		evt->event_handler(evt);
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+}
+
+void setup_sparc64_timer(void)
+{
+	struct clock_event_device *sevt;
+	unsigned long pstate;
+
+	/* Guarantee that the following sequences execute
+	 * uninterrupted.
+	 */
+	__asm__ __volatile__("rdpr	%%pstate, %0\n\t"
+			     "wrpr	%0, %1, %%pstate"
+			     : "=r" (pstate)
+			     : "i" (PSTATE_IE));
+
+	tick_operations.init_tick();
+
+	/* Restore PSTATE_IE. */
+	__asm__ __volatile__("wrpr	%0, 0x0, %%pstate"
+			     : /* no outputs */
+			     : "r" (pstate));
+
+	sevt = this_cpu_ptr(&sparc64_events);
+
+	memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
+	sevt->cpumask = cpumask_of(smp_processor_id());
+
+	clockevents_register_device(sevt);
+}
+
+#define SPARC64_NSEC_PER_CYC_SHIFT	10UL
+
+static struct clocksource clocksource_tick = {
+	.rating		= 100,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static unsigned long tb_ticks_per_usec __read_mostly;
+
+void __delay(unsigned long loops)
+{
+	unsigned long bclock = get_tick();
+
+	while ((get_tick() - bclock) < loops)
+		;
+}
+EXPORT_SYMBOL(__delay);
+
+void udelay(unsigned long usecs)
+{
+	__delay(tb_ticks_per_usec * usecs);
+}
+EXPORT_SYMBOL(udelay);
+
+static u64 clocksource_tick_read(struct clocksource *cs)
+{
+	return get_tick();
+}
+
+static void __init get_tick_patch(void)
+{
+	unsigned int *addr, *instr, i;
+	struct get_tick_patch *p;
+
+	if (tlb_type == spitfire && is_hummingbird())
+		return;
+
+	for (p = &__get_tick_patch; p < &__get_tick_patch_end; p++) {
+		instr = (tlb_type == spitfire) ? p->tick : p->stick;
+		addr = (unsigned int *)(unsigned long)p->addr;
+		for (i = 0; i < GET_TICK_NINSTR; i++) {
+			addr[i] = instr[i];
+			/* ensure that address is modified before flush */
+			wmb();
+			flushi(&addr[i]);
+		}
+	}
+}
+
+static void __init init_tick_ops(struct sparc64_tick_ops *ops)
+{
+	unsigned long freq, quotient, tick;
+
+	freq = ops->get_frequency();
+	quotient = clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
+	tick = ops->get_tick();
+
+	ops->offset = (tick * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT;
+	ops->ticks_per_nsec_quotient = quotient;
+	ops->frequency = freq;
+	tick_operations = *ops;
+	get_tick_patch();
+}
+
+void __init time_init_early(void)
+{
+	if (tlb_type == spitfire) {
+		if (is_hummingbird()) {
+			init_tick_ops(&hbtick_operations);
+			clocksource_tick.archdata.vclock_mode = VCLOCK_NONE;
+		} else {
+			init_tick_ops(&tick_operations);
+			clocksource_tick.archdata.vclock_mode = VCLOCK_TICK;
+			vdso_fix_stick = 1;
+		}
+	} else {
+		init_tick_ops(&stick_operations);
+		clocksource_tick.archdata.vclock_mode = VCLOCK_STICK;
+	}
+}
+
+void __init time_init(void)
+{
+	unsigned long freq;
+
+	freq = tick_operations.frequency;
+	tb_ticks_per_usec = freq / USEC_PER_SEC;
+
+	clocksource_tick.name = tick_operations.name;
+	clocksource_tick.read = clocksource_tick_read;
+
+	clocksource_register_hz(&clocksource_tick, freq);
+	printk("clocksource: mult[%x] shift[%d]\n",
+	       clocksource_tick.mult, clocksource_tick.shift);
+
+	sparc64_clockevent.name = tick_operations.name;
+	clockevents_calc_mult_shift(&sparc64_clockevent, freq, 4);
+
+	sparc64_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7fffffffffffffffUL, &sparc64_clockevent);
+	sparc64_clockevent.max_delta_ticks = 0x7fffffffffffffffUL;
+	sparc64_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &sparc64_clockevent);
+	sparc64_clockevent.min_delta_ticks = 0xF;
+
+	printk("clockevent: mult[%x] shift[%d]\n",
+	       sparc64_clockevent.mult, sparc64_clockevent.shift);
+
+	setup_sparc64_timer();
+}
+
+unsigned long long sched_clock(void)
+{
+	unsigned long quotient = tick_operations.ticks_per_nsec_quotient;
+	unsigned long offset = tick_operations.offset;
+
+	/* Use barrier so the compiler emits the loads first and overlaps load
+	 * latency with reading tick, because reading %tick/%stick is a
+	 * post-sync instruction that will flush and restart subsequent
+	 * instructions after it commits.
+	 */
+	barrier();
+
+	return ((get_tick() * quotient) >> SPARC64_NSEC_PER_CYC_SHIFT) - offset;
+}
+
+int read_current_timer(unsigned long *timer_val)
+{
+	*timer_val = get_tick();
+	return 0;
+}
diff --git a/arch/sparc/kernel/trampoline_32.S b/arch/sparc/kernel/trampoline_32.S
new file mode 100644
index 0000000..82fafee
--- /dev/null
+++ b/arch/sparc/kernel/trampoline_32.S
@@ -0,0 +1,201 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * trampoline.S: SMP cpu boot-up trampoline code.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <asm/head.h>
+#include <asm/psr.h>
+#include <asm/page.h>
+#include <asm/asi.h>
+#include <asm/ptrace.h>
+#include <asm/vaddrs.h>
+#include <asm/contregs.h>
+#include <asm/thread_info.h>
+
+	.globl sun4m_cpu_startup
+	.globl sun4d_cpu_startup
+
+	.align 4
+
+/* When we start up a cpu for the first time it enters this routine.
+ * This initializes the chip from whatever state the prom left it
+ * in and sets PIL in %psr to 15, no irqs.
+ */
+
+sun4m_cpu_startup:
+cpu1_startup:
+	sethi	%hi(trapbase_cpu1), %g3
+	b	1f
+	 or	%g3, %lo(trapbase_cpu1), %g3
+
+cpu2_startup:
+	sethi	%hi(trapbase_cpu2), %g3
+	b	1f
+	 or	%g3, %lo(trapbase_cpu2), %g3
+
+cpu3_startup:
+	sethi	%hi(trapbase_cpu3), %g3
+	b	1f
+	 or	%g3, %lo(trapbase_cpu3), %g3
+
+1:
+	/* Set up a sane %psr -- PIL<0xf> S<0x1> PS<0x1> CWP<0x0> */
+	set	(PSR_PIL | PSR_S | PSR_PS), %g1
+	wr	%g1, 0x0, %psr		! traps off though
+	WRITE_PAUSE
+
+	/* Our %wim is one behind CWP */
+	mov	2, %g1
+	wr	%g1, 0x0, %wim
+	WRITE_PAUSE
+
+	/* This identifies "this cpu". */
+	wr	%g3, 0x0, %tbr
+	WRITE_PAUSE
+
+	/* Give ourselves a stack and curptr. */
+	set	current_set, %g5
+	srl	%g3, 10, %g4
+	and	%g4, 0xc, %g4
+	ld	[%g5 + %g4], %g6
+
+	sethi	%hi(THREAD_SIZE - STACKFRAME_SZ), %sp
+	or	%sp, %lo(THREAD_SIZE - STACKFRAME_SZ), %sp
+	add	%g6, %sp, %sp
+
+	/* Turn on traps (PSR_ET). */
+	rd	%psr, %g1
+	wr	%g1, PSR_ET, %psr	! traps on
+	WRITE_PAUSE
+
+	/* Init our caches, etc. */
+	set	poke_srmmu, %g5
+	ld	[%g5], %g5
+	call	%g5
+	 nop
+
+	/* Start this processor. */
+	call	smp_callin
+	 nop
+
+	b,a	smp_panic
+
+	.text
+	.align	4
+
+smp_panic:
+	call	cpu_panic
+	 nop
+
+/* CPUID in bootbus can be found at PA 0xff0140000 */
+#define SUN4D_BOOTBUS_CPUID	0xf0140000
+
+	.align	4
+
+sun4d_cpu_startup:
+	/* Set up a sane %psr -- PIL<0xf> S<0x1> PS<0x1> CWP<0x0> */
+	set	(PSR_PIL | PSR_S | PSR_PS), %g1
+	wr	%g1, 0x0, %psr		! traps off though
+	WRITE_PAUSE
+
+	/* Our %wim is one behind CWP */
+	mov	2, %g1
+	wr	%g1, 0x0, %wim
+	WRITE_PAUSE
+
+	/* Set tbr - we use just one trap table. */
+	set	trapbase, %g1
+	wr	%g1, 0x0, %tbr
+	WRITE_PAUSE
+
+	/* Get our CPU id out of bootbus */
+	set	SUN4D_BOOTBUS_CPUID, %g3
+	lduba	[%g3] ASI_M_CTL, %g3
+	and	%g3, 0xf8, %g3
+	srl	%g3, 3, %g1
+	sta	%g1, [%g0] ASI_M_VIKING_TMP1
+
+	/* Give ourselves a stack and curptr. */
+	set	current_set, %g5
+	srl	%g3, 1, %g4
+	ld	[%g5 + %g4], %g6
+
+	sethi	%hi(THREAD_SIZE - STACKFRAME_SZ), %sp
+	or	%sp, %lo(THREAD_SIZE - STACKFRAME_SZ), %sp
+	add	%g6, %sp, %sp
+
+	/* Turn on traps (PSR_ET). */
+	rd	%psr, %g1
+	wr	%g1, PSR_ET, %psr	! traps on
+	WRITE_PAUSE
+
+	/* Init our caches, etc. */
+	set	poke_srmmu, %g5
+	ld	[%g5], %g5
+	call	%g5
+	 nop
+
+	/* Start this processor. */
+	call	smp_callin
+	 nop
+
+	b,a	smp_panic
+
+	.align	4
+        .global leon_smp_cpu_startup, smp_penguin_ctable
+
+leon_smp_cpu_startup:
+
+        set smp_penguin_ctable,%g1
+        ld [%g1+4],%g1
+        srl %g1,4,%g1
+        set 0x00000100,%g5 /* SRMMU_CTXTBL_PTR */
+	sta %g1, [%g5] ASI_LEON_MMUREGS
+
+	/* Set up a sane %psr -- PIL<0xf> S<0x1> PS<0x1> CWP<0x0> */
+	set	(PSR_PIL | PSR_S | PSR_PS), %g1
+	wr	%g1, 0x0, %psr		! traps off though
+	WRITE_PAUSE
+
+	/* Our %wim is one behind CWP */
+	mov	2, %g1
+	wr	%g1, 0x0, %wim
+	WRITE_PAUSE
+
+	/* Set tbr - we use just one trap table. */
+	set	trapbase, %g1
+	wr	%g1, 0x0, %tbr
+	WRITE_PAUSE
+
+	/* Get our CPU id */
+        rd     %asr17,%g3
+
+	/* Give ourselves a stack and curptr. */
+	set	current_set, %g5
+	srl	%g3, 28, %g4
+	sll	%g4, 2, %g4
+	ld	[%g5 + %g4], %g6
+
+	sethi	%hi(THREAD_SIZE - STACKFRAME_SZ), %sp
+	or	%sp, %lo(THREAD_SIZE - STACKFRAME_SZ), %sp
+	add	%g6, %sp, %sp
+
+	/* Turn on traps (PSR_ET). */
+	rd	%psr, %g1
+	wr	%g1, PSR_ET, %psr	! traps on
+	WRITE_PAUSE
+
+	/* Init our caches, etc. */
+	set	poke_srmmu, %g5
+	ld	[%g5], %g5
+	call	%g5
+	 nop
+
+	/* Start this processor. */
+	call	smp_callin
+	 nop
+
+	b,a	smp_panic
diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S
new file mode 100644
index 0000000..fe59122
--- /dev/null
+++ b/arch/sparc/kernel/trampoline_64.S
@@ -0,0 +1,414 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * trampoline.S: Jump start slave processors on sparc64.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+
+#include <asm/head.h>
+#include <asm/asi.h>
+#include <asm/lsu.h>
+#include <asm/dcr.h>
+#include <asm/dcu.h>
+#include <asm/pstate.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/spitfire.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+#include <asm/mmu.h>
+#include <asm/hypervisor.h>
+#include <asm/cpudata.h>
+
+	.data
+	.align	8
+call_method:
+	.asciz	"call-method"
+	.align	8
+itlb_load:
+	.asciz	"SUNW,itlb-load"
+	.align	8
+dtlb_load:
+	.asciz	"SUNW,dtlb-load"
+
+#define TRAMP_STACK_SIZE	1024
+	.align	16
+tramp_stack:
+	.skip	TRAMP_STACK_SIZE
+
+	.align		8
+	.globl		sparc64_cpu_startup, sparc64_cpu_startup_end
+sparc64_cpu_startup:
+	BRANCH_IF_SUN4V(g1, niagara_startup)
+	BRANCH_IF_CHEETAH_BASE(g1, g5, cheetah_startup)
+	BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1, g5, cheetah_plus_startup)
+
+	ba,pt	%xcc, spitfire_startup
+	 nop
+
+cheetah_plus_startup:
+	/* Preserve OBP chosen DCU and DCR register settings.  */
+	ba,pt	%xcc, cheetah_generic_startup
+	 nop
+
+cheetah_startup:
+	mov	DCR_BPE | DCR_RPE | DCR_SI | DCR_IFPOE | DCR_MS, %g1
+	wr	%g1, %asr18
+
+	sethi	%uhi(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
+	or	%g5, %ulo(DCU_ME|DCU_RE|DCU_HPE|DCU_SPE|DCU_SL|DCU_WE), %g5
+	sllx	%g5, 32, %g5
+	or	%g5, DCU_DM | DCU_IM | DCU_DC | DCU_IC, %g5
+	stxa	%g5, [%g0] ASI_DCU_CONTROL_REG
+	membar	#Sync
+	/* fallthru */
+
+cheetah_generic_startup:
+	mov	TSB_EXTENSION_P, %g3
+	stxa	%g0, [%g3] ASI_DMMU
+	stxa	%g0, [%g3] ASI_IMMU
+	membar	#Sync
+
+	mov	TSB_EXTENSION_S, %g3
+	stxa	%g0, [%g3] ASI_DMMU
+	membar	#Sync
+
+	mov	TSB_EXTENSION_N, %g3
+	stxa	%g0, [%g3] ASI_DMMU
+	stxa	%g0, [%g3] ASI_IMMU
+	membar	#Sync
+	/* fallthru */
+
+niagara_startup:
+	/* Disable STICK_INT interrupts. */
+	sethi		%hi(0x80000000), %g5
+	sllx		%g5, 32, %g5
+	wr		%g5, %asr25
+
+	ba,pt		%xcc, startup_continue
+	 nop
+
+spitfire_startup:
+	mov		(LSU_CONTROL_IC | LSU_CONTROL_DC | LSU_CONTROL_IM | LSU_CONTROL_DM), %g1
+	stxa		%g1, [%g0] ASI_LSU_CONTROL
+	membar		#Sync
+
+startup_continue:
+	mov		%o0, %l0
+	BRANCH_IF_SUN4V(g1, niagara_lock_tlb)
+
+	sethi		%hi(0x80000000), %g2
+	sllx		%g2, 32, %g2
+	wr		%g2, 0, %tick_cmpr
+
+	/* Call OBP by hand to lock KERNBASE into i/d tlbs.
+	 * We lock 'num_kernel_image_mappings' consequetive entries.
+	 */
+	sethi		%hi(prom_entry_lock), %g2
+1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
+	brnz,pn		%g1, 1b
+	 nop
+
+	/* Get onto temporary stack which will be in the locked
+	 * kernel image.
+	 */
+	sethi		%hi(tramp_stack), %g1
+	or		%g1, %lo(tramp_stack), %g1
+	add		%g1, TRAMP_STACK_SIZE, %g1
+	sub		%g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
+	flushw
+
+	/* Setup the loop variables:
+	 * %l3: VADDR base
+	 * %l4: TTE base
+	 * %l5: Loop iterator, iterates from 0 to 'num_kernel_image_mappings'
+	 * %l6: Number of TTE entries to map
+	 * %l7: Highest TTE entry number, we count down
+	 */
+	sethi		%hi(KERNBASE), %l3
+	sethi		%hi(kern_locked_tte_data), %l4
+	ldx		[%l4 + %lo(kern_locked_tte_data)], %l4
+	clr		%l5
+	sethi		%hi(num_kernel_image_mappings), %l6
+	lduw		[%l6 + %lo(num_kernel_image_mappings)], %l6
+
+	mov		15, %l7
+	BRANCH_IF_ANY_CHEETAH(g1,g5,2f)
+
+	mov		63, %l7
+2:
+
+3:
+	/* Lock into I-MMU */
+	sethi		%hi(call_method), %g2
+	or		%g2, %lo(call_method), %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x00]
+	mov		5, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x08]
+	mov		1, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x10]
+	sethi		%hi(itlb_load), %g2
+	or		%g2, %lo(itlb_load), %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x18]
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x20]
+
+	/* Each TTE maps 4MB, convert index to offset.  */
+	sllx		%l5, 22, %g1
+
+	add		%l3, %g1, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x28]	! VADDR
+	add		%l4, %g1, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x30]	! TTE
+
+	/* TTE index is highest minus loop index.  */
+	sub		%l7, %l5, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x38]
+
+	sethi		%hi(p1275buf), %g2
+	or		%g2, %lo(p1275buf), %g2
+	ldx		[%g2 + 0x08], %o1
+	call		%o1
+	 add		%sp, (2047 + 128), %o0
+
+	/* Lock into D-MMU */
+	sethi		%hi(call_method), %g2
+	or		%g2, %lo(call_method), %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x00]
+	mov		5, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x08]
+	mov		1, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x10]
+	sethi		%hi(dtlb_load), %g2
+	or		%g2, %lo(dtlb_load), %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x18]
+	sethi		%hi(prom_mmu_ihandle_cache), %g2
+	lduw		[%g2 + %lo(prom_mmu_ihandle_cache)], %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x20]
+
+	/* Each TTE maps 4MB, convert index to offset.  */
+	sllx		%l5, 22, %g1
+
+	add		%l3, %g1, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x28]	! VADDR
+	add		%l4, %g1, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x30]	! TTE
+
+	/* TTE index is highest minus loop index.  */
+	sub		%l7, %l5, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x38]
+
+	sethi		%hi(p1275buf), %g2
+	or		%g2, %lo(p1275buf), %g2
+	ldx		[%g2 + 0x08], %o1
+	call		%o1
+	 add		%sp, (2047 + 128), %o0
+
+	add		%l5, 1, %l5
+	cmp		%l5, %l6
+	bne,pt		%xcc, 3b
+	 nop
+
+	sethi		%hi(prom_entry_lock), %g2
+	stb		%g0, [%g2 + %lo(prom_entry_lock)]
+
+	ba,pt		%xcc, after_lock_tlb
+	 nop
+
+niagara_lock_tlb:
+	sethi		%hi(KERNBASE), %l3
+	sethi		%hi(kern_locked_tte_data), %l4
+	ldx		[%l4 + %lo(kern_locked_tte_data)], %l4
+	clr		%l5
+	sethi		%hi(num_kernel_image_mappings), %l6
+	lduw		[%l6 + %lo(num_kernel_image_mappings)], %l6
+
+1:
+	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
+	sllx		%l5, 22, %g2
+	add		%l3, %g2, %o0
+	clr		%o1
+	add		%l4, %g2, %o2
+	mov		HV_MMU_IMMU, %o3
+	ta		HV_FAST_TRAP
+
+	mov		HV_FAST_MMU_MAP_PERM_ADDR, %o5
+	sllx		%l5, 22, %g2
+	add		%l3, %g2, %o0
+	clr		%o1
+	add		%l4, %g2, %o2
+	mov		HV_MMU_DMMU, %o3
+	ta		HV_FAST_TRAP
+
+	add		%l5, 1, %l5
+	cmp		%l5, %l6
+	bne,pt		%xcc, 1b
+	 nop
+
+after_lock_tlb:
+	wrpr		%g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
+	wr		%g0, 0, %fprs
+
+	wr		%g0, ASI_P, %asi
+
+	mov		PRIMARY_CONTEXT, %g7
+
+661:	stxa		%g0, [%g7] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g0, [%g7] ASI_MMU
+	.previous
+
+	membar		#Sync
+	mov		SECONDARY_CONTEXT, %g7
+
+661:	stxa		%g0, [%g7] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g0, [%g7] ASI_MMU
+	.previous
+
+	membar		#Sync
+
+	/* Everything we do here, until we properly take over the
+	 * trap table, must be done with extreme care.  We cannot
+	 * make any references to %g6 (current thread pointer),
+	 * %g4 (current task pointer), or %g5 (base of current cpu's
+	 * per-cpu area) until we properly take over the trap table
+	 * from the firmware and hypervisor.
+	 *
+	 * Get onto temporary stack which is in the locked kernel image.
+	 */
+	sethi		%hi(tramp_stack), %g1
+	or		%g1, %lo(tramp_stack), %g1
+	add		%g1, TRAMP_STACK_SIZE, %g1
+	sub		%g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
+	mov		0, %fp
+
+	/* Put garbage in these registers to trap any access to them.  */
+	set		0xdeadbeef, %g4
+	set		0xdeadbeef, %g5
+	set		0xdeadbeef, %g6
+
+	call		init_irqwork_curcpu
+	 nop
+
+	sethi		%hi(tlb_type), %g3
+	lduw		[%g3 + %lo(tlb_type)], %g2
+	cmp		%g2, 3
+	bne,pt		%icc, 1f
+	 nop
+
+	call		hard_smp_processor_id
+	 nop
+	
+	call		sun4v_register_mondo_queues
+	 nop
+
+1:	call		init_cur_cpu_trap
+	 ldx		[%l0], %o0
+
+	/* Start using proper page size encodings in ctx register.  */
+	sethi		%hi(sparc64_kern_pri_context), %g3
+	ldx		[%g3 + %lo(sparc64_kern_pri_context)], %g2
+	mov		PRIMARY_CONTEXT, %g1
+
+661:	stxa		%g2, [%g1] ASI_DMMU
+	.section	.sun4v_1insn_patch, "ax"
+	.word		661b
+	stxa		%g2, [%g1] ASI_MMU
+	.previous
+
+	membar		#Sync
+
+	wrpr		%g0, 0, %wstate
+
+	sethi		%hi(prom_entry_lock), %g2
+1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
+	brnz,pn		%g1, 1b
+	 nop
+
+	/* As a hack, put &init_thread_union into %g6.
+	 * prom_world() loads from here to restore the %asi
+	 * register.
+	 */
+	sethi		%hi(init_thread_union), %g6
+	or		%g6, %lo(init_thread_union), %g6
+
+	sethi		%hi(is_sun4v), %o0
+	lduw		[%o0 + %lo(is_sun4v)], %o0
+	brz,pt		%o0, 2f
+	 nop
+
+	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+	add		%g2, TRAP_PER_CPU_FAULT_INFO, %g2
+	stxa		%g2, [%g0] ASI_SCRATCHPAD
+
+	/* Compute physical address:
+	 *
+	 * paddr = kern_base + (mmfsa_vaddr - KERNBASE)
+	 */
+	sethi		%hi(KERNBASE), %g3
+	sub		%g2, %g3, %g2
+	sethi		%hi(kern_base), %g3
+	ldx		[%g3 + %lo(kern_base)], %g3
+	add		%g2, %g3, %o1
+	sethi		%hi(sparc64_ttable_tl0), %o0
+
+	set		prom_set_trap_table_name, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x00]
+	mov		2, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x08]
+	mov		0, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x10]
+	stx		%o0, [%sp + 2047 + 128 + 0x18]
+	stx		%o1, [%sp + 2047 + 128 + 0x20]
+	sethi		%hi(p1275buf), %g2
+	or		%g2, %lo(p1275buf), %g2
+	ldx		[%g2 + 0x08], %o1
+	call		%o1
+	 add		%sp, (2047 + 128), %o0
+
+	ba,pt		%xcc, 3f
+	 nop
+
+2:	sethi		%hi(sparc64_ttable_tl0), %o0
+	set		prom_set_trap_table_name, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x00]
+	mov		1, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x08]
+	mov		0, %g2
+	stx		%g2, [%sp + 2047 + 128 + 0x10]
+	stx		%o0, [%sp + 2047 + 128 + 0x18]
+	sethi		%hi(p1275buf), %g2
+	or		%g2, %lo(p1275buf), %g2
+	ldx		[%g2 + 0x08], %o1
+	call		%o1
+	 add		%sp, (2047 + 128), %o0
+
+3:	sethi		%hi(prom_entry_lock), %g2
+	stb		%g0, [%g2 + %lo(prom_entry_lock)]
+
+	ldx		[%l0], %g6
+	ldx		[%g6 + TI_TASK], %g4
+
+	mov		1, %g5
+	sllx		%g5, THREAD_SHIFT, %g5
+	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
+	add		%g6, %g5, %sp
+
+	rdpr		%pstate, %o1
+	or		%o1, PSTATE_IE, %o1
+	wrpr		%o1, 0, %pstate
+
+	call		smp_callin
+	 nop
+
+	call		cpu_panic
+	 nop
+1:	b,a,pt		%xcc, 1b
+
+	.align		8
+sparc64_cpu_startup_end:
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
new file mode 100644
index 0000000..bcdfc61
--- /dev/null
+++ b/arch/sparc/kernel/traps_32.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sparc/kernel/traps.c
+ *
+ * Copyright 1995, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright 2000 Jakub Jelinek (jakub@redhat.com)
+ */
+
+/*
+ * I hate traps on the sparc, grrr...
+ */
+
+#include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
+#include <linux/mm_types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/kdebug.h>
+#include <linux/export.h>
+
+#include <asm/delay.h>
+#include <asm/ptrace.h>
+#include <asm/oplib.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/unistd.h>
+#include <asm/traps.h>
+
+#include "entry.h"
+#include "kernel.h"
+
+/* #define TRAP_DEBUG */
+
+static void instruction_dump(unsigned long *pc)
+{
+	int i;
+	
+	if((((unsigned long) pc) & 3))
+                return;
+
+	for(i = -3; i < 6; i++)
+		printk("%c%08lx%c",i?' ':'<',pc[i],i?' ':'>');
+	printk("\n");
+}
+
+#define __SAVE __asm__ __volatile__("save %sp, -0x40, %sp\n\t")
+#define __RESTORE __asm__ __volatile__("restore %g0, %g0, %g0\n\t")
+
+void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
+{
+	static int die_counter;
+	int count = 0;
+
+	/* Amuse the user. */
+	printk(
+"              \\|/ ____ \\|/\n"
+"              \"@'/ ,. \\`@\"\n"
+"              /_| \\__/ |_\\\n"
+"                 \\__U_/\n");
+
+	printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
+	show_regs(regs);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+
+	__SAVE; __SAVE; __SAVE; __SAVE;
+	__SAVE; __SAVE; __SAVE; __SAVE;
+	__RESTORE; __RESTORE; __RESTORE; __RESTORE;
+	__RESTORE; __RESTORE; __RESTORE; __RESTORE;
+
+	{
+		struct reg_window32 *rw = (struct reg_window32 *)regs->u_regs[UREG_FP];
+
+		/* Stop the back trace when we hit userland or we
+		 * find some badly aligned kernel stack. Set an upper
+		 * bound in case our stack is trashed and we loop.
+		 */
+		while(rw					&&
+		      count++ < 30				&&
+                      (((unsigned long) rw) >= PAGE_OFFSET)	&&
+		      !(((unsigned long) rw) & 0x7)) {
+			printk("Caller[%08lx]: %pS\n", rw->ins[7],
+			       (void *) rw->ins[7]);
+			rw = (struct reg_window32 *)rw->ins[6];
+		}
+	}
+	printk("Instruction DUMP:");
+	instruction_dump ((unsigned long *) regs->pc);
+	if(regs->psr & PSR_PS)
+		do_exit(SIGKILL);
+	do_exit(SIGSEGV);
+}
+
+void do_hw_interrupt(struct pt_regs *regs, unsigned long type)
+{
+	if(type < 0x80) {
+		/* Sun OS's puke from bad traps, Linux survives! */
+		printk("Unimplemented Sparc TRAP, type = %02lx\n", type);
+		die_if_kernel("Whee... Hello Mr. Penguin", regs);
+	}	
+
+	if(regs->psr & PSR_PS)
+		die_if_kernel("Kernel bad trap", regs);
+
+	force_sig_fault(SIGILL, ILL_ILLTRP,
+			(void __user *)regs->pc, type - 0x80, current);
+}
+
+void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+			    unsigned long psr)
+{
+	if(psr & PSR_PS)
+		die_if_kernel("Kernel illegal instruction", regs);
+#ifdef TRAP_DEBUG
+	printk("Ill instr. at pc=%08lx instruction is %08lx\n",
+	       regs->pc, *(unsigned long *)regs->pc);
+#endif
+
+	send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
+}
+
+void do_priv_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+			 unsigned long psr)
+{
+	if(psr & PSR_PS)
+		die_if_kernel("Penguin instruction from Penguin mode??!?!", regs);
+	send_sig_fault(SIGILL, ILL_PRVOPC, (void __user *)pc, 0, current);
+}
+
+/* XXX User may want to be allowed to do this. XXX */
+
+void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+			    unsigned long psr)
+{
+	if(regs->psr & PSR_PS) {
+		printk("KERNEL MNA at pc %08lx npc %08lx called by %08lx\n", pc, npc,
+		       regs->u_regs[UREG_RETPC]);
+		die_if_kernel("BOGUS", regs);
+		/* die_if_kernel("Kernel MNA access", regs); */
+	}
+#if 0
+	show_regs (regs);
+	instruction_dump ((unsigned long *) regs->pc);
+	printk ("do_MNA!\n");
+#endif
+	send_sig_fault(SIGBUS, BUS_ADRALN,
+		       /* FIXME: Should dig out mna address */ (void *)0,
+		       0, current);
+}
+
+static unsigned long init_fsr = 0x0UL;
+static unsigned long init_fregs[32] __attribute__ ((aligned (8))) =
+                { ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL,
+		  ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL,
+		  ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL,
+		  ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL, ~0UL };
+
+void do_fpd_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+		 unsigned long psr)
+{
+	/* Sanity check... */
+	if(psr & PSR_PS)
+		die_if_kernel("Kernel gets FloatingPenguinUnit disabled trap", regs);
+
+	put_psr(get_psr() | PSR_EF);    /* Allow FPU ops. */
+	regs->psr |= PSR_EF;
+#ifndef CONFIG_SMP
+	if(last_task_used_math == current)
+		return;
+	if(last_task_used_math) {
+		/* Other processes fpu state, save away */
+		struct task_struct *fptask = last_task_used_math;
+		fpsave(&fptask->thread.float_regs[0], &fptask->thread.fsr,
+		       &fptask->thread.fpqueue[0], &fptask->thread.fpqdepth);
+	}
+	last_task_used_math = current;
+	if(used_math()) {
+		fpload(&current->thread.float_regs[0], &current->thread.fsr);
+	} else {
+		/* Set initial sane state. */
+		fpload(&init_fregs[0], &init_fsr);
+		set_used_math();
+	}
+#else
+	if(!used_math()) {
+		fpload(&init_fregs[0], &init_fsr);
+		set_used_math();
+	} else {
+		fpload(&current->thread.float_regs[0], &current->thread.fsr);
+	}
+	set_thread_flag(TIF_USEDFPU);
+#endif
+}
+
+static unsigned long fake_regs[32] __attribute__ ((aligned (8)));
+static unsigned long fake_fsr;
+static unsigned long fake_queue[32] __attribute__ ((aligned (8)));
+static unsigned long fake_depth;
+
+void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+		 unsigned long psr)
+{
+	static int calls;
+	unsigned long fsr;
+	int ret = 0;
+	int code;
+#ifndef CONFIG_SMP
+	struct task_struct *fpt = last_task_used_math;
+#else
+	struct task_struct *fpt = current;
+#endif
+	put_psr(get_psr() | PSR_EF);
+	/* If nobody owns the fpu right now, just clear the
+	 * error into our fake static buffer and hope it don't
+	 * happen again.  Thank you crashme...
+	 */
+#ifndef CONFIG_SMP
+	if(!fpt) {
+#else
+	if (!test_tsk_thread_flag(fpt, TIF_USEDFPU)) {
+#endif
+		fpsave(&fake_regs[0], &fake_fsr, &fake_queue[0], &fake_depth);
+		regs->psr &= ~PSR_EF;
+		return;
+	}
+	fpsave(&fpt->thread.float_regs[0], &fpt->thread.fsr,
+	       &fpt->thread.fpqueue[0], &fpt->thread.fpqdepth);
+#ifdef DEBUG_FPU
+	printk("Hmm, FP exception, fsr was %016lx\n", fpt->thread.fsr);
+#endif
+
+	switch ((fpt->thread.fsr & 0x1c000)) {
+	/* switch on the contents of the ftt [floating point trap type] field */
+#ifdef DEBUG_FPU
+	case (1 << 14):
+		printk("IEEE_754_exception\n");
+		break;
+#endif
+	case (2 << 14):  /* unfinished_FPop (underflow & co) */
+	case (3 << 14):  /* unimplemented_FPop (quad stuff, maybe sqrt) */
+		ret = do_mathemu(regs, fpt);
+		break;
+#ifdef DEBUG_FPU
+	case (4 << 14):
+		printk("sequence_error (OS bug...)\n");
+		break;
+	case (5 << 14):
+		printk("hardware_error (uhoh!)\n");
+		break;
+	case (6 << 14):
+		printk("invalid_fp_register (user error)\n");
+		break;
+#endif /* DEBUG_FPU */
+	}
+	/* If we successfully emulated the FPop, we pretend the trap never happened :-> */
+	if (ret) {
+		fpload(&current->thread.float_regs[0], &current->thread.fsr);
+		return;
+	}
+	/* nope, better SIGFPE the offending process... */
+	       
+#ifdef CONFIG_SMP
+	clear_tsk_thread_flag(fpt, TIF_USEDFPU);
+#endif
+	if(psr & PSR_PS) {
+		/* The first fsr store/load we tried trapped,
+		 * the second one will not (we hope).
+		 */
+		printk("WARNING: FPU exception from kernel mode. at pc=%08lx\n",
+		       regs->pc);
+		regs->pc = regs->npc;
+		regs->npc += 4;
+		calls++;
+		if(calls > 2)
+			die_if_kernel("Too many Penguin-FPU traps from kernel mode",
+				      regs);
+		return;
+	}
+
+	fsr = fpt->thread.fsr;
+	code = FPE_FLTUNK;
+	if ((fsr & 0x1c000) == (1 << 14)) {
+		if (fsr & 0x10)
+			code = FPE_FLTINV;
+		else if (fsr & 0x08)
+			code = FPE_FLTOVF;
+		else if (fsr & 0x04)
+			code = FPE_FLTUND;
+		else if (fsr & 0x02)
+			code = FPE_FLTDIV;
+		else if (fsr & 0x01)
+			code = FPE_FLTRES;
+	}
+	send_sig_fault(SIGFPE, code, (void __user *)pc, 0, fpt);
+#ifndef CONFIG_SMP
+	last_task_used_math = NULL;
+#endif
+	regs->psr &= ~PSR_EF;
+	if(calls > 0)
+		calls=0;
+}
+
+void handle_tag_overflow(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+			 unsigned long psr)
+{
+	if(psr & PSR_PS)
+		die_if_kernel("Penguin overflow trap from kernel mode", regs);
+	send_sig_fault(SIGEMT, EMT_TAGOVF, (void __user *)pc, 0, current);
+}
+
+void handle_watchpoint(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+		       unsigned long psr)
+{
+#ifdef TRAP_DEBUG
+	printk("Watchpoint detected at PC %08lx NPC %08lx PSR %08lx\n",
+	       pc, npc, psr);
+#endif
+	if(psr & PSR_PS)
+		panic("Tell me what a watchpoint trap is, and I'll then deal "
+		      "with such a beast...");
+}
+
+void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+		       unsigned long psr)
+{
+#ifdef TRAP_DEBUG
+	printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n",
+	       pc, npc, psr);
+#endif
+	force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current);
+}
+
+void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+			unsigned long psr)
+{
+	send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current);
+}
+
+void handle_cp_exception(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+			 unsigned long psr)
+{
+#ifdef TRAP_DEBUG
+	printk("Co-Processor Exception at PC %08lx NPC %08lx PSR %08lx\n",
+	       pc, npc, psr);
+#endif
+	send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current);
+}
+
+void handle_hw_divzero(struct pt_regs *regs, unsigned long pc, unsigned long npc,
+		       unsigned long psr)
+{
+	send_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)pc, 0, current);
+}
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+void do_BUG(const char *file, int line)
+{
+        // bust_spinlocks(1);   XXX Not in our original BUG()
+        printk("kernel BUG at %s:%d!\n", file, line);
+}
+EXPORT_SYMBOL(do_BUG);
+#endif
+
+/* Since we have our mappings set up, on multiprocessors we can spin them
+ * up here so that timer interrupts work during initialization.
+ */
+
+void trap_init(void)
+{
+	extern void thread_info_offsets_are_bolixed_pete(void);
+
+	/* Force linker to barf if mismatched */
+	if (TI_UWINMASK    != offsetof(struct thread_info, uwinmask) ||
+	    TI_TASK        != offsetof(struct thread_info, task) ||
+	    TI_FLAGS       != offsetof(struct thread_info, flags) ||
+	    TI_CPU         != offsetof(struct thread_info, cpu) ||
+	    TI_PREEMPT     != offsetof(struct thread_info, preempt_count) ||
+	    TI_SOFTIRQ     != offsetof(struct thread_info, softirq_count) ||
+	    TI_HARDIRQ     != offsetof(struct thread_info, hardirq_count) ||
+	    TI_KSP         != offsetof(struct thread_info, ksp) ||
+	    TI_KPC         != offsetof(struct thread_info, kpc) ||
+	    TI_KPSR        != offsetof(struct thread_info, kpsr) ||
+	    TI_KWIM        != offsetof(struct thread_info, kwim) ||
+	    TI_REG_WINDOW  != offsetof(struct thread_info, reg_window) ||
+	    TI_RWIN_SPTRS  != offsetof(struct thread_info, rwbuf_stkptrs) ||
+	    TI_W_SAVED     != offsetof(struct thread_info, w_saved))
+		thread_info_offsets_are_bolixed_pete();
+
+	/* Attach to the address space of init_task. */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+
+	/* NOTE: Other cpus have this done as they are started
+	 *       up on SMP.
+	 */
+}
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
new file mode 100644
index 0000000..aa624ed
--- /dev/null
+++ b/arch/sparc/kernel/traps_64.c
@@ -0,0 +1,2936 @@
+/* arch/sparc64/kernel/traps.c
+ *
+ * Copyright (C) 1995,1997,2008,2009,2012 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997,1999,2000 Jakub Jelinek (jakub@redhat.com)
+ */
+
+/*
+ * I like traps on v9, :))))
+ */
+
+#include <linux/extable.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/debug.h>
+#include <linux/linkage.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kdebug.h>
+#include <linux/ftrace.h>
+#include <linux/reboot.h>
+#include <linux/gfp.h>
+#include <linux/context_tracking.h>
+
+#include <asm/smp.h>
+#include <asm/delay.h>
+#include <asm/ptrace.h>
+#include <asm/oplib.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/unistd.h>
+#include <linux/uaccess.h>
+#include <asm/fpumacro.h>
+#include <asm/lsu.h>
+#include <asm/dcu.h>
+#include <asm/estate.h>
+#include <asm/chafsr.h>
+#include <asm/sfafsr.h>
+#include <asm/psrcompat.h>
+#include <asm/processor.h>
+#include <asm/timer.h>
+#include <asm/head.h>
+#include <asm/prom.h>
+#include <asm/memctrl.h>
+#include <asm/cacheflush.h>
+#include <asm/setup.h>
+
+#include "entry.h"
+#include "kernel.h"
+#include "kstack.h"
+
+/* When an irrecoverable trap occurs at tl > 0, the trap entry
+ * code logs the trap state registers at every level in the trap
+ * stack.  It is found at (pt_regs + sizeof(pt_regs)) and the layout
+ * is as follows:
+ */
+struct tl1_traplog {
+	struct {
+		unsigned long tstate;
+		unsigned long tpc;
+		unsigned long tnpc;
+		unsigned long tt;
+	} trapstack[4];
+	unsigned long tl;
+};
+
+static void dump_tl1_traplog(struct tl1_traplog *p)
+{
+	int i, limit;
+
+	printk(KERN_EMERG "TRAPLOG: Error at trap level 0x%lx, "
+	       "dumping track stack.\n", p->tl);
+
+	limit = (tlb_type == hypervisor) ? 2 : 4;
+	for (i = 0; i < limit; i++) {
+		printk(KERN_EMERG
+		       "TRAPLOG: Trap level %d TSTATE[%016lx] TPC[%016lx] "
+		       "TNPC[%016lx] TT[%lx]\n",
+		       i + 1,
+		       p->trapstack[i].tstate, p->trapstack[i].tpc,
+		       p->trapstack[i].tnpc, p->trapstack[i].tt);
+		printk("TRAPLOG: TPC<%pS>\n", (void *) p->trapstack[i].tpc);
+	}
+}
+
+void bad_trap(struct pt_regs *regs, long lvl)
+{
+	char buffer[36];
+
+	if (notify_die(DIE_TRAP, "bad trap", regs,
+		       0, lvl, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	if (lvl < 0x100) {
+		sprintf(buffer, "Bad hw trap %lx at tl0\n", lvl);
+		die_if_kernel(buffer, regs);
+	}
+
+	lvl -= 0x100;
+	if (regs->tstate & TSTATE_PRIV) {
+		sprintf(buffer, "Kernel bad sw trap %lx", lvl);
+		die_if_kernel(buffer, regs);
+	}
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGILL, ILL_ILLTRP,
+			(void __user *)regs->tpc, lvl, current);
+}
+
+void bad_trap_tl1(struct pt_regs *regs, long lvl)
+{
+	char buffer[36];
+	
+	if (notify_die(DIE_TRAP_TL1, "bad trap tl1", regs,
+		       0, lvl, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+
+	sprintf (buffer, "Bad trap %lx at tl>0", lvl);
+	die_if_kernel (buffer, regs);
+}
+
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+void do_BUG(const char *file, int line)
+{
+	bust_spinlocks(1);
+	printk("kernel BUG at %s:%d!\n", file, line);
+}
+EXPORT_SYMBOL(do_BUG);
+#endif
+
+static DEFINE_SPINLOCK(dimm_handler_lock);
+static dimm_printer_t dimm_handler;
+
+static int sprintf_dimm(int synd_code, unsigned long paddr, char *buf, int buflen)
+{
+	unsigned long flags;
+	int ret = -ENODEV;
+
+	spin_lock_irqsave(&dimm_handler_lock, flags);
+	if (dimm_handler) {
+		ret = dimm_handler(synd_code, paddr, buf, buflen);
+	} else if (tlb_type == spitfire) {
+		if (prom_getunumber(synd_code, paddr, buf, buflen) == -1)
+			ret = -EINVAL;
+		else
+			ret = 0;
+	} else
+		ret = -ENODEV;
+	spin_unlock_irqrestore(&dimm_handler_lock, flags);
+
+	return ret;
+}
+
+int register_dimm_printer(dimm_printer_t func)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&dimm_handler_lock, flags);
+	if (!dimm_handler)
+		dimm_handler = func;
+	else
+		ret = -EEXIST;
+	spin_unlock_irqrestore(&dimm_handler_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(register_dimm_printer);
+
+void unregister_dimm_printer(dimm_printer_t func)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dimm_handler_lock, flags);
+	if (dimm_handler == func)
+		dimm_handler = NULL;
+	spin_unlock_irqrestore(&dimm_handler_lock, flags);
+}
+EXPORT_SYMBOL_GPL(unregister_dimm_printer);
+
+void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, "instruction access exception", regs,
+		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
+		goto out;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		printk("spitfire_insn_access_exception: SFSR[%016lx] "
+		       "SFAR[%016lx], going.\n", sfsr, sfar);
+		die_if_kernel("Iax", regs);
+	}
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGSEGV, SEGV_MAPERR,
+			(void __user *)regs->tpc, 0, current);
+out:
+	exception_exit(prev_state);
+}
+
+void spitfire_insn_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+{
+	if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
+		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	spitfire_insn_access_exception(regs, sfsr, sfar);
+}
+
+void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+	unsigned short type = (type_ctx >> 16);
+	unsigned short ctx  = (type_ctx & 0xffff);
+
+	if (notify_die(DIE_TRAP, "instruction access exception", regs,
+		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		printk("sun4v_insn_access_exception: ADDR[%016lx] "
+		       "CTX[%04x] TYPE[%04x], going.\n",
+		       addr, ctx, type);
+		die_if_kernel("Iax", regs);
+	}
+
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current);
+}
+
+void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+	if (notify_die(DIE_TRAP_TL1, "instruction access exception tl1", regs,
+		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	sun4v_insn_access_exception(regs, addr, type_ctx);
+}
+
+bool is_no_fault_exception(struct pt_regs *regs)
+{
+	unsigned char asi;
+	u32 insn;
+
+	if (get_user(insn, (u32 __user *)regs->tpc) == -EFAULT)
+		return false;
+
+	/*
+	 * Must do a little instruction decoding here in order to
+	 * decide on a course of action. The bits of interest are:
+	 *  insn[31:30] = op, where 3 indicates the load/store group
+	 *  insn[24:19] = op3, which identifies individual opcodes
+	 *  insn[13] indicates an immediate offset
+	 *  op3[4]=1 identifies alternate space instructions
+	 *  op3[5:4]=3 identifies floating point instructions
+	 *  op3[2]=1 identifies stores
+	 * See "Opcode Maps" in the appendix of any Sparc V9
+	 * architecture spec for full details.
+	 */
+	if ((insn & 0xc0800000) == 0xc0800000) {    /* op=3, op3[4]=1   */
+		if (insn & 0x2000)		    /* immediate offset */
+			asi = (regs->tstate >> 24); /* saved %asi       */
+		else
+			asi = (insn >> 5);	    /* immediate asi    */
+		if ((asi & 0xf2) == ASI_PNF) {
+			if (insn & 0x1000000) {     /* op3[5:4]=3       */
+				handle_ldf_stq(insn, regs);
+				return true;
+			} else if (insn & 0x200000) { /* op3[2], stores */
+				return false;
+			}
+			handle_ld_nf(insn, regs);
+			return true;
+		}
+	}
+	return false;
+}
+
+void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, "data access exception", regs,
+		       0, 0x30, SIGTRAP) == NOTIFY_STOP)
+		goto out;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		/* Test if this comes from uaccess places. */
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			/* Ouch, somebody is trying VM hole tricks on us... */
+#ifdef DEBUG_EXCEPTIONS
+			printk("Exception: PC<%016lx> faddr<UNKNOWN>\n", regs->tpc);
+			printk("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
+			       regs->tpc, entry->fixup);
+#endif
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			goto out;
+		}
+		/* Shit... */
+		printk("spitfire_data_access_exception: SFSR[%016lx] "
+		       "SFAR[%016lx], going.\n", sfsr, sfar);
+		die_if_kernel("Dax", regs);
+	}
+
+	if (is_no_fault_exception(regs))
+		return;
+
+	force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current);
+out:
+	exception_exit(prev_state);
+}
+
+void spitfire_data_access_exception_tl1(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar)
+{
+	if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
+		       0, 0x30, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	spitfire_data_access_exception(regs, sfsr, sfar);
+}
+
+void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+	unsigned short type = (type_ctx >> 16);
+	unsigned short ctx  = (type_ctx & 0xffff);
+
+	if (notify_die(DIE_TRAP, "data access exception", regs,
+		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		/* Test if this comes from uaccess places. */
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			/* Ouch, somebody is trying VM hole tricks on us... */
+#ifdef DEBUG_EXCEPTIONS
+			printk("Exception: PC<%016lx> faddr<UNKNOWN>\n", regs->tpc);
+			printk("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
+			       regs->tpc, entry->fixup);
+#endif
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return;
+		}
+		printk("sun4v_data_access_exception: ADDR[%016lx] "
+		       "CTX[%04x] TYPE[%04x], going.\n",
+		       addr, ctx, type);
+		die_if_kernel("Dax", regs);
+	}
+
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	if (is_no_fault_exception(regs))
+		return;
+
+	/* MCD (Memory Corruption Detection) disabled trap (TT=0x19) in HV
+	 * is vectored thorugh data access exception trap with fault type
+	 * set to HV_FAULT_TYPE_MCD_DIS. Check for MCD disabled trap.
+	 * Accessing an address with invalid ASI for the address, for
+	 * example setting an ADI tag on an address with ASI_MCD_PRIMARY
+	 * when TTE.mcd is not set for the VA, is also vectored into
+	 * kerbel by HV as data access exception with fault type set to
+	 * HV_FAULT_TYPE_INV_ASI.
+	 */
+	switch (type) {
+	case HV_FAULT_TYPE_INV_ASI:
+		force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0,
+				current);
+		break;
+	case HV_FAULT_TYPE_MCD_DIS:
+		force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0,
+				current);
+		break;
+	default:
+		force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0,
+				current);
+		break;
+	}
+}
+
+void sun4v_data_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+	if (notify_die(DIE_TRAP_TL1, "data access exception tl1", regs,
+		       0, 0x8, SIGTRAP) == NOTIFY_STOP)
+		return;
+
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	sun4v_data_access_exception(regs, addr, type_ctx);
+}
+
+#ifdef CONFIG_PCI
+#include "pci_impl.h"
+#endif
+
+/* When access exceptions happen, we must do this. */
+static void spitfire_clean_and_reenable_l1_caches(void)
+{
+	unsigned long va;
+
+	if (tlb_type != spitfire)
+		BUG();
+
+	/* Clean 'em. */
+	for (va =  0; va < (PAGE_SIZE << 1); va += 32) {
+		spitfire_put_icache_tag(va, 0x0);
+		spitfire_put_dcache_tag(va, 0x0);
+	}
+
+	/* Re-enable in LSU. */
+	__asm__ __volatile__("flush %%g6\n\t"
+			     "membar #Sync\n\t"
+			     "stxa %0, [%%g0] %1\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (LSU_CONTROL_IC | LSU_CONTROL_DC |
+				    LSU_CONTROL_IM | LSU_CONTROL_DM),
+			     "i" (ASI_LSU_CONTROL)
+			     : "memory");
+}
+
+static void spitfire_enable_estate_errors(void)
+{
+	__asm__ __volatile__("stxa	%0, [%%g0] %1\n\t"
+			     "membar	#Sync"
+			     : /* no outputs */
+			     : "r" (ESTATE_ERR_ALL),
+			       "i" (ASI_ESTATE_ERROR_EN));
+}
+
+static char ecc_syndrome_table[] = {
+	0x4c, 0x40, 0x41, 0x48, 0x42, 0x48, 0x48, 0x49,
+	0x43, 0x48, 0x48, 0x49, 0x48, 0x49, 0x49, 0x4a,
+	0x44, 0x48, 0x48, 0x20, 0x48, 0x39, 0x4b, 0x48,
+	0x48, 0x25, 0x31, 0x48, 0x28, 0x48, 0x48, 0x2c,
+	0x45, 0x48, 0x48, 0x21, 0x48, 0x3d, 0x04, 0x48,
+	0x48, 0x4b, 0x35, 0x48, 0x2d, 0x48, 0x48, 0x29,
+	0x48, 0x00, 0x01, 0x48, 0x0a, 0x48, 0x48, 0x4b,
+	0x0f, 0x48, 0x48, 0x4b, 0x48, 0x49, 0x49, 0x48,
+	0x46, 0x48, 0x48, 0x2a, 0x48, 0x3b, 0x27, 0x48,
+	0x48, 0x4b, 0x33, 0x48, 0x22, 0x48, 0x48, 0x2e,
+	0x48, 0x19, 0x1d, 0x48, 0x1b, 0x4a, 0x48, 0x4b,
+	0x1f, 0x48, 0x4a, 0x4b, 0x48, 0x4b, 0x4b, 0x48,
+	0x48, 0x4b, 0x24, 0x48, 0x07, 0x48, 0x48, 0x36,
+	0x4b, 0x48, 0x48, 0x3e, 0x48, 0x30, 0x38, 0x48,
+	0x49, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x16, 0x48,
+	0x48, 0x12, 0x4b, 0x48, 0x49, 0x48, 0x48, 0x4b,
+	0x47, 0x48, 0x48, 0x2f, 0x48, 0x3f, 0x4b, 0x48,
+	0x48, 0x06, 0x37, 0x48, 0x23, 0x48, 0x48, 0x2b,
+	0x48, 0x05, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x32,
+	0x26, 0x48, 0x48, 0x3a, 0x48, 0x34, 0x3c, 0x48,
+	0x48, 0x11, 0x15, 0x48, 0x13, 0x4a, 0x48, 0x4b,
+	0x17, 0x48, 0x4a, 0x4b, 0x48, 0x4b, 0x4b, 0x48,
+	0x49, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x1e, 0x48,
+	0x48, 0x1a, 0x4b, 0x48, 0x49, 0x48, 0x48, 0x4b,
+	0x48, 0x08, 0x0d, 0x48, 0x02, 0x48, 0x48, 0x49,
+	0x03, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x4b, 0x48,
+	0x49, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x10, 0x48,
+	0x48, 0x14, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x4b,
+	0x49, 0x48, 0x48, 0x49, 0x48, 0x4b, 0x18, 0x48,
+	0x48, 0x1c, 0x4b, 0x48, 0x4b, 0x48, 0x48, 0x4b,
+	0x4a, 0x0c, 0x09, 0x48, 0x0e, 0x48, 0x48, 0x4b,
+	0x0b, 0x48, 0x48, 0x4b, 0x48, 0x4b, 0x4b, 0x4a
+};
+
+static char *syndrome_unknown = "<Unknown>";
+
+static void spitfire_log_udb_syndrome(unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long bit)
+{
+	unsigned short scode;
+	char memmod_str[64], *p;
+
+	if (udbl & bit) {
+		scode = ecc_syndrome_table[udbl & 0xff];
+		if (sprintf_dimm(scode, afar, memmod_str, sizeof(memmod_str)) < 0)
+			p = syndrome_unknown;
+		else
+			p = memmod_str;
+		printk(KERN_WARNING "CPU[%d]: UDBL Syndrome[%x] "
+		       "Memory Module \"%s\"\n",
+		       smp_processor_id(), scode, p);
+	}
+
+	if (udbh & bit) {
+		scode = ecc_syndrome_table[udbh & 0xff];
+		if (sprintf_dimm(scode, afar, memmod_str, sizeof(memmod_str)) < 0)
+			p = syndrome_unknown;
+		else
+			p = memmod_str;
+		printk(KERN_WARNING "CPU[%d]: UDBH Syndrome[%x] "
+		       "Memory Module \"%s\"\n",
+		       smp_processor_id(), scode, p);
+	}
+
+}
+
+static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, int tl1, struct pt_regs *regs)
+{
+
+	printk(KERN_WARNING "CPU[%d]: Correctable ECC Error "
+	       "AFSR[%lx] AFAR[%016lx] UDBL[%lx] UDBH[%lx] TL>1[%d]\n",
+	       smp_processor_id(), afsr, afar, udbl, udbh, tl1);
+
+	spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_CE);
+
+	/* We always log it, even if someone is listening for this
+	 * trap.
+	 */
+	notify_die(DIE_TRAP, "Correctable ECC Error", regs,
+		   0, TRAP_TYPE_CEE, SIGTRAP);
+
+	/* The Correctable ECC Error trap does not disable I/D caches.  So
+	 * we only have to restore the ESTATE Error Enable register.
+	 */
+	spitfire_enable_estate_errors();
+}
+
+static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs)
+{
+	printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] "
+	       "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n",
+	       smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1);
+
+	/* XXX add more human friendly logging of the error status
+	 * XXX as is implemented for cheetah
+	 */
+
+	spitfire_log_udb_syndrome(afar, udbh, udbl, UDBE_UE);
+
+	/* We always log it, even if someone is listening for this
+	 * trap.
+	 */
+	notify_die(DIE_TRAP, "Uncorrectable Error", regs,
+		   0, tt, SIGTRAP);
+
+	if (regs->tstate & TSTATE_PRIV) {
+		if (tl1)
+			dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+		die_if_kernel("UE", regs);
+	}
+
+	/* XXX need more intelligent processing here, such as is implemented
+	 * XXX for cheetah errors, in fact if the E-cache still holds the
+	 * XXX line with bad parity this will loop
+	 */
+
+	spitfire_clean_and_reenable_l1_caches();
+	spitfire_enable_estate_errors();
+
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current);
+}
+
+void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar)
+{
+	unsigned long afsr, tt, udbh, udbl;
+	int tl1;
+
+	afsr = (status_encoded & SFSTAT_AFSR_MASK) >> SFSTAT_AFSR_SHIFT;
+	tt = (status_encoded & SFSTAT_TRAP_TYPE) >> SFSTAT_TRAP_TYPE_SHIFT;
+	tl1 = (status_encoded & SFSTAT_TL_GT_ONE) ? 1 : 0;
+	udbl = (status_encoded & SFSTAT_UDBL_MASK) >> SFSTAT_UDBL_SHIFT;
+	udbh = (status_encoded & SFSTAT_UDBH_MASK) >> SFSTAT_UDBH_SHIFT;
+
+#ifdef CONFIG_PCI
+	if (tt == TRAP_TYPE_DAE &&
+	    pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
+		spitfire_clean_and_reenable_l1_caches();
+		spitfire_enable_estate_errors();
+
+		pci_poke_faulted = 1;
+		regs->tnpc = regs->tpc + 4;
+		return;
+	}
+#endif
+
+	if (afsr & SFAFSR_UE)
+		spitfire_ue_log(afsr, afar, udbh, udbl, tt, tl1, regs);
+
+	if (tt == TRAP_TYPE_CEE) {
+		/* Handle the case where we took a CEE trap, but ACK'd
+		 * only the UE state in the UDB error registers.
+		 */
+		if (afsr & SFAFSR_UE) {
+			if (udbh & UDBE_CE) {
+				__asm__ __volatile__(
+					"stxa	%0, [%1] %2\n\t"
+					"membar	#Sync"
+					: /* no outputs */
+					: "r" (udbh & UDBE_CE),
+					  "r" (0x0), "i" (ASI_UDB_ERROR_W));
+			}
+			if (udbl & UDBE_CE) {
+				__asm__ __volatile__(
+					"stxa	%0, [%1] %2\n\t"
+					"membar	#Sync"
+					: /* no outputs */
+					: "r" (udbl & UDBE_CE),
+					  "r" (0x18), "i" (ASI_UDB_ERROR_W));
+			}
+		}
+
+		spitfire_cee_log(afsr, afar, udbh, udbl, tl1, regs);
+	}
+}
+
+int cheetah_pcache_forced_on;
+
+void cheetah_enable_pcache(void)
+{
+	unsigned long dcr;
+
+	printk("CHEETAH: Enabling P-Cache on cpu %d.\n",
+	       smp_processor_id());
+
+	__asm__ __volatile__("ldxa [%%g0] %1, %0"
+			     : "=r" (dcr)
+			     : "i" (ASI_DCU_CONTROL_REG));
+	dcr |= (DCU_PE | DCU_HPE | DCU_SPE | DCU_SL);
+	__asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (dcr), "i" (ASI_DCU_CONTROL_REG));
+}
+
+/* Cheetah error trap handling. */
+static unsigned long ecache_flush_physbase;
+static unsigned long ecache_flush_linesize;
+static unsigned long ecache_flush_size;
+
+/* This table is ordered in priority of errors and matches the
+ * AFAR overwrite policy as well.
+ */
+
+struct afsr_error_table {
+	unsigned long mask;
+	const char *name;
+};
+
+static const char CHAFSR_PERR_msg[] =
+	"System interface protocol error";
+static const char CHAFSR_IERR_msg[] =
+	"Internal processor error";
+static const char CHAFSR_ISAP_msg[] =
+	"System request parity error on incoming address";
+static const char CHAFSR_UCU_msg[] =
+	"Uncorrectable E-cache ECC error for ifetch/data";
+static const char CHAFSR_UCC_msg[] =
+	"SW Correctable E-cache ECC error for ifetch/data";
+static const char CHAFSR_UE_msg[] =
+	"Uncorrectable system bus data ECC error for read";
+static const char CHAFSR_EDU_msg[] =
+	"Uncorrectable E-cache ECC error for stmerge/blkld";
+static const char CHAFSR_EMU_msg[] =
+	"Uncorrectable system bus MTAG error";
+static const char CHAFSR_WDU_msg[] =
+	"Uncorrectable E-cache ECC error for writeback";
+static const char CHAFSR_CPU_msg[] =
+	"Uncorrectable ECC error for copyout";
+static const char CHAFSR_CE_msg[] =
+	"HW corrected system bus data ECC error for read";
+static const char CHAFSR_EDC_msg[] =
+	"HW corrected E-cache ECC error for stmerge/blkld";
+static const char CHAFSR_EMC_msg[] =
+	"HW corrected system bus MTAG ECC error";
+static const char CHAFSR_WDC_msg[] =
+	"HW corrected E-cache ECC error for writeback";
+static const char CHAFSR_CPC_msg[] =
+	"HW corrected ECC error for copyout";
+static const char CHAFSR_TO_msg[] =
+	"Unmapped error from system bus";
+static const char CHAFSR_BERR_msg[] =
+	"Bus error response from system bus";
+static const char CHAFSR_IVC_msg[] =
+	"HW corrected system bus data ECC error for ivec read";
+static const char CHAFSR_IVU_msg[] =
+	"Uncorrectable system bus data ECC error for ivec read";
+static struct afsr_error_table __cheetah_error_table[] = {
+	{	CHAFSR_PERR,	CHAFSR_PERR_msg		},
+	{	CHAFSR_IERR,	CHAFSR_IERR_msg		},
+	{	CHAFSR_ISAP,	CHAFSR_ISAP_msg		},
+	{	CHAFSR_UCU,	CHAFSR_UCU_msg		},
+	{	CHAFSR_UCC,	CHAFSR_UCC_msg		},
+	{	CHAFSR_UE,	CHAFSR_UE_msg		},
+	{	CHAFSR_EDU,	CHAFSR_EDU_msg		},
+	{	CHAFSR_EMU,	CHAFSR_EMU_msg		},
+	{	CHAFSR_WDU,	CHAFSR_WDU_msg		},
+	{	CHAFSR_CPU,	CHAFSR_CPU_msg		},
+	{	CHAFSR_CE,	CHAFSR_CE_msg		},
+	{	CHAFSR_EDC,	CHAFSR_EDC_msg		},
+	{	CHAFSR_EMC,	CHAFSR_EMC_msg		},
+	{	CHAFSR_WDC,	CHAFSR_WDC_msg		},
+	{	CHAFSR_CPC,	CHAFSR_CPC_msg		},
+	{	CHAFSR_TO,	CHAFSR_TO_msg		},
+	{	CHAFSR_BERR,	CHAFSR_BERR_msg		},
+	/* These two do not update the AFAR. */
+	{	CHAFSR_IVC,	CHAFSR_IVC_msg		},
+	{	CHAFSR_IVU,	CHAFSR_IVU_msg		},
+	{	0,		NULL			},
+};
+static const char CHPAFSR_DTO_msg[] =
+	"System bus unmapped error for prefetch/storequeue-read";
+static const char CHPAFSR_DBERR_msg[] =
+	"System bus error for prefetch/storequeue-read";
+static const char CHPAFSR_THCE_msg[] =
+	"Hardware corrected E-cache Tag ECC error";
+static const char CHPAFSR_TSCE_msg[] =
+	"SW handled correctable E-cache Tag ECC error";
+static const char CHPAFSR_TUE_msg[] =
+	"Uncorrectable E-cache Tag ECC error";
+static const char CHPAFSR_DUE_msg[] =
+	"System bus uncorrectable data ECC error due to prefetch/store-fill";
+static struct afsr_error_table __cheetah_plus_error_table[] = {
+	{	CHAFSR_PERR,	CHAFSR_PERR_msg		},
+	{	CHAFSR_IERR,	CHAFSR_IERR_msg		},
+	{	CHAFSR_ISAP,	CHAFSR_ISAP_msg		},
+	{	CHAFSR_UCU,	CHAFSR_UCU_msg		},
+	{	CHAFSR_UCC,	CHAFSR_UCC_msg		},
+	{	CHAFSR_UE,	CHAFSR_UE_msg		},
+	{	CHAFSR_EDU,	CHAFSR_EDU_msg		},
+	{	CHAFSR_EMU,	CHAFSR_EMU_msg		},
+	{	CHAFSR_WDU,	CHAFSR_WDU_msg		},
+	{	CHAFSR_CPU,	CHAFSR_CPU_msg		},
+	{	CHAFSR_CE,	CHAFSR_CE_msg		},
+	{	CHAFSR_EDC,	CHAFSR_EDC_msg		},
+	{	CHAFSR_EMC,	CHAFSR_EMC_msg		},
+	{	CHAFSR_WDC,	CHAFSR_WDC_msg		},
+	{	CHAFSR_CPC,	CHAFSR_CPC_msg		},
+	{	CHAFSR_TO,	CHAFSR_TO_msg		},
+	{	CHAFSR_BERR,	CHAFSR_BERR_msg		},
+	{	CHPAFSR_DTO,	CHPAFSR_DTO_msg		},
+	{	CHPAFSR_DBERR,	CHPAFSR_DBERR_msg	},
+	{	CHPAFSR_THCE,	CHPAFSR_THCE_msg	},
+	{	CHPAFSR_TSCE,	CHPAFSR_TSCE_msg	},
+	{	CHPAFSR_TUE,	CHPAFSR_TUE_msg		},
+	{	CHPAFSR_DUE,	CHPAFSR_DUE_msg		},
+	/* These two do not update the AFAR. */
+	{	CHAFSR_IVC,	CHAFSR_IVC_msg		},
+	{	CHAFSR_IVU,	CHAFSR_IVU_msg		},
+	{	0,		NULL			},
+};
+static const char JPAFSR_JETO_msg[] =
+	"System interface protocol error, hw timeout caused";
+static const char JPAFSR_SCE_msg[] =
+	"Parity error on system snoop results";
+static const char JPAFSR_JEIC_msg[] =
+	"System interface protocol error, illegal command detected";
+static const char JPAFSR_JEIT_msg[] =
+	"System interface protocol error, illegal ADTYPE detected";
+static const char JPAFSR_OM_msg[] =
+	"Out of range memory error has occurred";
+static const char JPAFSR_ETP_msg[] =
+	"Parity error on L2 cache tag SRAM";
+static const char JPAFSR_UMS_msg[] =
+	"Error due to unsupported store";
+static const char JPAFSR_RUE_msg[] =
+	"Uncorrectable ECC error from remote cache/memory";
+static const char JPAFSR_RCE_msg[] =
+	"Correctable ECC error from remote cache/memory";
+static const char JPAFSR_BP_msg[] =
+	"JBUS parity error on returned read data";
+static const char JPAFSR_WBP_msg[] =
+	"JBUS parity error on data for writeback or block store";
+static const char JPAFSR_FRC_msg[] =
+	"Foreign read to DRAM incurring correctable ECC error";
+static const char JPAFSR_FRU_msg[] =
+	"Foreign read to DRAM incurring uncorrectable ECC error";
+static struct afsr_error_table __jalapeno_error_table[] = {
+	{	JPAFSR_JETO,	JPAFSR_JETO_msg		},
+	{	JPAFSR_SCE,	JPAFSR_SCE_msg		},
+	{	JPAFSR_JEIC,	JPAFSR_JEIC_msg		},
+	{	JPAFSR_JEIT,	JPAFSR_JEIT_msg		},
+	{	CHAFSR_PERR,	CHAFSR_PERR_msg		},
+	{	CHAFSR_IERR,	CHAFSR_IERR_msg		},
+	{	CHAFSR_ISAP,	CHAFSR_ISAP_msg		},
+	{	CHAFSR_UCU,	CHAFSR_UCU_msg		},
+	{	CHAFSR_UCC,	CHAFSR_UCC_msg		},
+	{	CHAFSR_UE,	CHAFSR_UE_msg		},
+	{	CHAFSR_EDU,	CHAFSR_EDU_msg		},
+	{	JPAFSR_OM,	JPAFSR_OM_msg		},
+	{	CHAFSR_WDU,	CHAFSR_WDU_msg		},
+	{	CHAFSR_CPU,	CHAFSR_CPU_msg		},
+	{	CHAFSR_CE,	CHAFSR_CE_msg		},
+	{	CHAFSR_EDC,	CHAFSR_EDC_msg		},
+	{	JPAFSR_ETP,	JPAFSR_ETP_msg		},
+	{	CHAFSR_WDC,	CHAFSR_WDC_msg		},
+	{	CHAFSR_CPC,	CHAFSR_CPC_msg		},
+	{	CHAFSR_TO,	CHAFSR_TO_msg		},
+	{	CHAFSR_BERR,	CHAFSR_BERR_msg		},
+	{	JPAFSR_UMS,	JPAFSR_UMS_msg		},
+	{	JPAFSR_RUE,	JPAFSR_RUE_msg		},
+	{	JPAFSR_RCE,	JPAFSR_RCE_msg		},
+	{	JPAFSR_BP,	JPAFSR_BP_msg		},
+	{	JPAFSR_WBP,	JPAFSR_WBP_msg		},
+	{	JPAFSR_FRC,	JPAFSR_FRC_msg		},
+	{	JPAFSR_FRU,	JPAFSR_FRU_msg		},
+	/* These two do not update the AFAR. */
+	{	CHAFSR_IVU,	CHAFSR_IVU_msg		},
+	{	0,		NULL			},
+};
+static struct afsr_error_table *cheetah_error_table;
+static unsigned long cheetah_afsr_errors;
+
+struct cheetah_err_info *cheetah_error_log;
+
+static inline struct cheetah_err_info *cheetah_get_error_log(unsigned long afsr)
+{
+	struct cheetah_err_info *p;
+	int cpu = smp_processor_id();
+
+	if (!cheetah_error_log)
+		return NULL;
+
+	p = cheetah_error_log + (cpu * 2);
+	if ((afsr & CHAFSR_TL1) != 0UL)
+		p++;
+
+	return p;
+}
+
+extern unsigned int tl0_icpe[], tl1_icpe[];
+extern unsigned int tl0_dcpe[], tl1_dcpe[];
+extern unsigned int tl0_fecc[], tl1_fecc[];
+extern unsigned int tl0_cee[], tl1_cee[];
+extern unsigned int tl0_iae[], tl1_iae[];
+extern unsigned int tl0_dae[], tl1_dae[];
+extern unsigned int cheetah_plus_icpe_trap_vector[], cheetah_plus_icpe_trap_vector_tl1[];
+extern unsigned int cheetah_plus_dcpe_trap_vector[], cheetah_plus_dcpe_trap_vector_tl1[];
+extern unsigned int cheetah_fecc_trap_vector[], cheetah_fecc_trap_vector_tl1[];
+extern unsigned int cheetah_cee_trap_vector[], cheetah_cee_trap_vector_tl1[];
+extern unsigned int cheetah_deferred_trap_vector[], cheetah_deferred_trap_vector_tl1[];
+
+void __init cheetah_ecache_flush_init(void)
+{
+	unsigned long largest_size, smallest_linesize, order, ver;
+	int i, sz;
+
+	/* Scan all cpu device tree nodes, note two values:
+	 * 1) largest E-cache size
+	 * 2) smallest E-cache line size
+	 */
+	largest_size = 0UL;
+	smallest_linesize = ~0UL;
+
+	for (i = 0; i < NR_CPUS; i++) {
+		unsigned long val;
+
+		val = cpu_data(i).ecache_size;
+		if (!val)
+			continue;
+
+		if (val > largest_size)
+			largest_size = val;
+
+		val = cpu_data(i).ecache_line_size;
+		if (val < smallest_linesize)
+			smallest_linesize = val;
+
+	}
+
+	if (largest_size == 0UL || smallest_linesize == ~0UL) {
+		prom_printf("cheetah_ecache_flush_init: Cannot probe cpu E-cache "
+			    "parameters.\n");
+		prom_halt();
+	}
+
+	ecache_flush_size = (2 * largest_size);
+	ecache_flush_linesize = smallest_linesize;
+
+	ecache_flush_physbase = find_ecache_flush_span(ecache_flush_size);
+
+	if (ecache_flush_physbase == ~0UL) {
+		prom_printf("cheetah_ecache_flush_init: Cannot find %ld byte "
+			    "contiguous physical memory.\n",
+			    ecache_flush_size);
+		prom_halt();
+	}
+
+	/* Now allocate error trap reporting scoreboard. */
+	sz = NR_CPUS * (2 * sizeof(struct cheetah_err_info));
+	for (order = 0; order < MAX_ORDER; order++) {
+		if ((PAGE_SIZE << order) >= sz)
+			break;
+	}
+	cheetah_error_log = (struct cheetah_err_info *)
+		__get_free_pages(GFP_KERNEL, order);
+	if (!cheetah_error_log) {
+		prom_printf("cheetah_ecache_flush_init: Failed to allocate "
+			    "error logging scoreboard (%d bytes).\n", sz);
+		prom_halt();
+	}
+	memset(cheetah_error_log, 0, PAGE_SIZE << order);
+
+	/* Mark all AFSRs as invalid so that the trap handler will
+	 * log new new information there.
+	 */
+	for (i = 0; i < 2 * NR_CPUS; i++)
+		cheetah_error_log[i].afsr = CHAFSR_INVALID;
+
+	__asm__ ("rdpr %%ver, %0" : "=r" (ver));
+	if ((ver >> 32) == __JALAPENO_ID ||
+	    (ver >> 32) == __SERRANO_ID) {
+		cheetah_error_table = &__jalapeno_error_table[0];
+		cheetah_afsr_errors = JPAFSR_ERRORS;
+	} else if ((ver >> 32) == 0x003e0015) {
+		cheetah_error_table = &__cheetah_plus_error_table[0];
+		cheetah_afsr_errors = CHPAFSR_ERRORS;
+	} else {
+		cheetah_error_table = &__cheetah_error_table[0];
+		cheetah_afsr_errors = CHAFSR_ERRORS;
+	}
+
+	/* Now patch trap tables. */
+	memcpy(tl0_fecc, cheetah_fecc_trap_vector, (8 * 4));
+	memcpy(tl1_fecc, cheetah_fecc_trap_vector_tl1, (8 * 4));
+	memcpy(tl0_cee, cheetah_cee_trap_vector, (8 * 4));
+	memcpy(tl1_cee, cheetah_cee_trap_vector_tl1, (8 * 4));
+	memcpy(tl0_iae, cheetah_deferred_trap_vector, (8 * 4));
+	memcpy(tl1_iae, cheetah_deferred_trap_vector_tl1, (8 * 4));
+	memcpy(tl0_dae, cheetah_deferred_trap_vector, (8 * 4));
+	memcpy(tl1_dae, cheetah_deferred_trap_vector_tl1, (8 * 4));
+	if (tlb_type == cheetah_plus) {
+		memcpy(tl0_dcpe, cheetah_plus_dcpe_trap_vector, (8 * 4));
+		memcpy(tl1_dcpe, cheetah_plus_dcpe_trap_vector_tl1, (8 * 4));
+		memcpy(tl0_icpe, cheetah_plus_icpe_trap_vector, (8 * 4));
+		memcpy(tl1_icpe, cheetah_plus_icpe_trap_vector_tl1, (8 * 4));
+	}
+	flushi(PAGE_OFFSET);
+}
+
+static void cheetah_flush_ecache(void)
+{
+	unsigned long flush_base = ecache_flush_physbase;
+	unsigned long flush_linesize = ecache_flush_linesize;
+	unsigned long flush_size = ecache_flush_size;
+
+	__asm__ __volatile__("1: subcc	%0, %4, %0\n\t"
+			     "   bne,pt	%%xcc, 1b\n\t"
+			     "    ldxa	[%2 + %0] %3, %%g0\n\t"
+			     : "=&r" (flush_size)
+			     : "0" (flush_size), "r" (flush_base),
+			       "i" (ASI_PHYS_USE_EC), "r" (flush_linesize));
+}
+
+static void cheetah_flush_ecache_line(unsigned long physaddr)
+{
+	unsigned long alias;
+
+	physaddr &= ~(8UL - 1UL);
+	physaddr = (ecache_flush_physbase +
+		    (physaddr & ((ecache_flush_size>>1UL) - 1UL)));
+	alias = physaddr + (ecache_flush_size >> 1UL);
+	__asm__ __volatile__("ldxa [%0] %2, %%g0\n\t"
+			     "ldxa [%1] %2, %%g0\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (physaddr), "r" (alias),
+			       "i" (ASI_PHYS_USE_EC));
+}
+
+/* Unfortunately, the diagnostic access to the I-cache tags we need to
+ * use to clear the thing interferes with I-cache coherency transactions.
+ *
+ * So we must only flush the I-cache when it is disabled.
+ */
+static void __cheetah_flush_icache(void)
+{
+	unsigned int icache_size, icache_line_size;
+	unsigned long addr;
+
+	icache_size = local_cpu_data().icache_size;
+	icache_line_size = local_cpu_data().icache_line_size;
+
+	/* Clear the valid bits in all the tags. */
+	for (addr = 0; addr < icache_size; addr += icache_line_size) {
+		__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+				     "membar #Sync"
+				     : /* no outputs */
+				     : "r" (addr | (2 << 3)),
+				       "i" (ASI_IC_TAG));
+	}
+}
+
+static void cheetah_flush_icache(void)
+{
+	unsigned long dcu_save;
+
+	/* Save current DCU, disable I-cache. */
+	__asm__ __volatile__("ldxa [%%g0] %1, %0\n\t"
+			     "or %0, %2, %%g1\n\t"
+			     "stxa %%g1, [%%g0] %1\n\t"
+			     "membar #Sync"
+			     : "=r" (dcu_save)
+			     : "i" (ASI_DCU_CONTROL_REG), "i" (DCU_IC)
+			     : "g1");
+
+	__cheetah_flush_icache();
+
+	/* Restore DCU register */
+	__asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "r" (dcu_save), "i" (ASI_DCU_CONTROL_REG));
+}
+
+static void cheetah_flush_dcache(void)
+{
+	unsigned int dcache_size, dcache_line_size;
+	unsigned long addr;
+
+	dcache_size = local_cpu_data().dcache_size;
+	dcache_line_size = local_cpu_data().dcache_line_size;
+
+	for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
+		__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+				     "membar #Sync"
+				     : /* no outputs */
+				     : "r" (addr), "i" (ASI_DCACHE_TAG));
+	}
+}
+
+/* In order to make the even parity correct we must do two things.
+ * First, we clear DC_data_parity and set DC_utag to an appropriate value.
+ * Next, we clear out all 32-bytes of data for that line.  Data of
+ * all-zero + tag parity value of zero == correct parity.
+ */
+static void cheetah_plus_zap_dcache_parity(void)
+{
+	unsigned int dcache_size, dcache_line_size;
+	unsigned long addr;
+
+	dcache_size = local_cpu_data().dcache_size;
+	dcache_line_size = local_cpu_data().dcache_line_size;
+
+	for (addr = 0; addr < dcache_size; addr += dcache_line_size) {
+		unsigned long tag = (addr >> 14);
+		unsigned long line;
+
+		__asm__ __volatile__("membar	#Sync\n\t"
+				     "stxa	%0, [%1] %2\n\t"
+				     "membar	#Sync"
+				     : /* no outputs */
+				     : "r" (tag), "r" (addr),
+				       "i" (ASI_DCACHE_UTAG));
+		for (line = addr; line < addr + dcache_line_size; line += 8)
+			__asm__ __volatile__("membar	#Sync\n\t"
+					     "stxa	%%g0, [%0] %1\n\t"
+					     "membar	#Sync"
+					     : /* no outputs */
+					     : "r" (line),
+					       "i" (ASI_DCACHE_DATA));
+	}
+}
+
+/* Conversion tables used to frob Cheetah AFSR syndrome values into
+ * something palatable to the memory controller driver get_unumber
+ * routine.
+ */
+#define MT0	137
+#define MT1	138
+#define MT2	139
+#define NONE	254
+#define MTC0	140
+#define MTC1	141
+#define MTC2	142
+#define MTC3	143
+#define C0	128
+#define C1	129
+#define C2	130
+#define C3	131
+#define C4	132
+#define C5	133
+#define C6	134
+#define C7	135
+#define C8	136
+#define M2	144
+#define M3	145
+#define M4	146
+#define M	147
+static unsigned char cheetah_ecc_syntab[] = {
+/*00*/NONE, C0, C1, M2, C2, M2, M3, 47, C3, M2, M2, 53, M2, 41, 29, M,
+/*01*/C4, M, M, 50, M2, 38, 25, M2, M2, 33, 24, M2, 11, M, M2, 16,
+/*02*/C5, M, M, 46, M2, 37, 19, M2, M, 31, 32, M, 7, M2, M2, 10,
+/*03*/M2, 40, 13, M2, 59, M, M2, 66, M, M2, M2, 0, M2, 67, 71, M,
+/*04*/C6, M, M, 43, M, 36, 18, M, M2, 49, 15, M, 63, M2, M2, 6,
+/*05*/M2, 44, 28, M2, M, M2, M2, 52, 68, M2, M2, 62, M2, M3, M3, M4,
+/*06*/M2, 26, 106, M2, 64, M, M2, 2, 120, M, M2, M3, M, M3, M3, M4,
+/*07*/116, M2, M2, M3, M2, M3, M, M4, M2, 58, 54, M2, M, M4, M4, M3,
+/*08*/C7, M2, M, 42, M, 35, 17, M2, M, 45, 14, M2, 21, M2, M2, 5,
+/*09*/M, 27, M, M, 99, M, M, 3, 114, M2, M2, 20, M2, M3, M3, M,
+/*0a*/M2, 23, 113, M2, 112, M2, M, 51, 95, M, M2, M3, M2, M3, M3, M2,
+/*0b*/103, M, M2, M3, M2, M3, M3, M4, M2, 48, M, M, 73, M2, M, M3,
+/*0c*/M2, 22, 110, M2, 109, M2, M, 9, 108, M2, M, M3, M2, M3, M3, M,
+/*0d*/102, M2, M, M, M2, M3, M3, M, M2, M3, M3, M2, M, M4, M, M3,
+/*0e*/98, M, M2, M3, M2, M, M3, M4, M2, M3, M3, M4, M3, M, M, M,
+/*0f*/M2, M3, M3, M, M3, M, M, M, 56, M4, M, M3, M4, M, M, M,
+/*10*/C8, M, M2, 39, M, 34, 105, M2, M, 30, 104, M, 101, M, M, 4,
+/*11*/M, M, 100, M, 83, M, M2, 12, 87, M, M, 57, M2, M, M3, M,
+/*12*/M2, 97, 82, M2, 78, M2, M2, 1, 96, M, M, M, M, M, M3, M2,
+/*13*/94, M, M2, M3, M2, M, M3, M, M2, M, 79, M, 69, M, M4, M,
+/*14*/M2, 93, 92, M, 91, M, M2, 8, 90, M2, M2, M, M, M, M, M4,
+/*15*/89, M, M, M3, M2, M3, M3, M, M, M, M3, M2, M3, M2, M, M3,
+/*16*/86, M, M2, M3, M2, M, M3, M, M2, M, M3, M, M3, M, M, M3,
+/*17*/M, M, M3, M2, M3, M2, M4, M, 60, M, M2, M3, M4, M, M, M2,
+/*18*/M2, 88, 85, M2, 84, M, M2, 55, 81, M2, M2, M3, M2, M3, M3, M4,
+/*19*/77, M, M, M, M2, M3, M, M, M2, M3, M3, M4, M3, M2, M, M,
+/*1a*/74, M, M2, M3, M, M, M3, M, M, M, M3, M, M3, M, M4, M3,
+/*1b*/M2, 70, 107, M4, 65, M2, M2, M, 127, M, M, M, M2, M3, M3, M,
+/*1c*/80, M2, M2, 72, M, 119, 118, M, M2, 126, 76, M, 125, M, M4, M3,
+/*1d*/M2, 115, 124, M, 75, M, M, M3, 61, M, M4, M, M4, M, M, M,
+/*1e*/M, 123, 122, M4, 121, M4, M, M3, 117, M2, M2, M3, M4, M3, M, M,
+/*1f*/111, M, M, M, M4, M3, M3, M, M, M, M3, M, M3, M2, M, M
+};
+static unsigned char cheetah_mtag_syntab[] = {
+       NONE, MTC0,
+       MTC1, NONE,
+       MTC2, NONE,
+       NONE, MT0,
+       MTC3, NONE,
+       NONE, MT1,
+       NONE, MT2,
+       NONE, NONE
+};
+
+/* Return the highest priority error conditon mentioned. */
+static inline unsigned long cheetah_get_hipri(unsigned long afsr)
+{
+	unsigned long tmp = 0;
+	int i;
+
+	for (i = 0; cheetah_error_table[i].mask; i++) {
+		if ((tmp = (afsr & cheetah_error_table[i].mask)) != 0UL)
+			return tmp;
+	}
+	return tmp;
+}
+
+static const char *cheetah_get_string(unsigned long bit)
+{
+	int i;
+
+	for (i = 0; cheetah_error_table[i].mask; i++) {
+		if ((bit & cheetah_error_table[i].mask) != 0UL)
+			return cheetah_error_table[i].name;
+	}
+	return "???";
+}
+
+static void cheetah_log_errors(struct pt_regs *regs, struct cheetah_err_info *info,
+			       unsigned long afsr, unsigned long afar, int recoverable)
+{
+	unsigned long hipri;
+	char unum[256];
+
+	printk("%s" "ERROR(%d): Cheetah error trap taken afsr[%016lx] afar[%016lx] TL1(%d)\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       afsr, afar,
+	       (afsr & CHAFSR_TL1) ? 1 : 0);
+	printk("%s" "ERROR(%d): TPC[%lx] TNPC[%lx] O7[%lx] TSTATE[%lx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       regs->tpc, regs->tnpc, regs->u_regs[UREG_I7], regs->tstate);
+	printk("%s" "ERROR(%d): ",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id());
+	printk("TPC<%pS>\n", (void *) regs->tpc);
+	printk("%s" "ERROR(%d): M_SYND(%lx),  E_SYND(%lx)%s%s\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT,
+	       (afsr & CHAFSR_E_SYNDROME) >> CHAFSR_E_SYNDROME_SHIFT,
+	       (afsr & CHAFSR_ME) ? ", Multiple Errors" : "",
+	       (afsr & CHAFSR_PRIV) ? ", Privileged" : "");
+	hipri = cheetah_get_hipri(afsr);
+	printk("%s" "ERROR(%d): Highest priority error (%016lx) \"%s\"\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       hipri, cheetah_get_string(hipri));
+
+	/* Try to get unumber if relevant. */
+#define ESYND_ERRORS	(CHAFSR_IVC | CHAFSR_IVU | \
+			 CHAFSR_CPC | CHAFSR_CPU | \
+			 CHAFSR_UE  | CHAFSR_CE  | \
+			 CHAFSR_EDC | CHAFSR_EDU  | \
+			 CHAFSR_UCC | CHAFSR_UCU  | \
+			 CHAFSR_WDU | CHAFSR_WDC)
+#define MSYND_ERRORS	(CHAFSR_EMC | CHAFSR_EMU)
+	if (afsr & ESYND_ERRORS) {
+		int syndrome;
+		int ret;
+
+		syndrome = (afsr & CHAFSR_E_SYNDROME) >> CHAFSR_E_SYNDROME_SHIFT;
+		syndrome = cheetah_ecc_syntab[syndrome];
+		ret = sprintf_dimm(syndrome, afar, unum, sizeof(unum));
+		if (ret != -1)
+			printk("%s" "ERROR(%d): AFAR E-syndrome [%s]\n",
+			       (recoverable ? KERN_WARNING : KERN_CRIT),
+			       smp_processor_id(), unum);
+	} else if (afsr & MSYND_ERRORS) {
+		int syndrome;
+		int ret;
+
+		syndrome = (afsr & CHAFSR_M_SYNDROME) >> CHAFSR_M_SYNDROME_SHIFT;
+		syndrome = cheetah_mtag_syntab[syndrome];
+		ret = sprintf_dimm(syndrome, afar, unum, sizeof(unum));
+		if (ret != -1)
+			printk("%s" "ERROR(%d): AFAR M-syndrome [%s]\n",
+			       (recoverable ? KERN_WARNING : KERN_CRIT),
+			       smp_processor_id(), unum);
+	}
+
+	/* Now dump the cache snapshots. */
+	printk("%s" "ERROR(%d): D-cache idx[%x] tag[%016llx] utag[%016llx] stag[%016llx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       (int) info->dcache_index,
+	       info->dcache_tag,
+	       info->dcache_utag,
+	       info->dcache_stag);
+	printk("%s" "ERROR(%d): D-cache data0[%016llx] data1[%016llx] data2[%016llx] data3[%016llx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       info->dcache_data[0],
+	       info->dcache_data[1],
+	       info->dcache_data[2],
+	       info->dcache_data[3]);
+	printk("%s" "ERROR(%d): I-cache idx[%x] tag[%016llx] utag[%016llx] stag[%016llx] "
+	       "u[%016llx] l[%016llx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       (int) info->icache_index,
+	       info->icache_tag,
+	       info->icache_utag,
+	       info->icache_stag,
+	       info->icache_upper,
+	       info->icache_lower);
+	printk("%s" "ERROR(%d): I-cache INSN0[%016llx] INSN1[%016llx] INSN2[%016llx] INSN3[%016llx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       info->icache_data[0],
+	       info->icache_data[1],
+	       info->icache_data[2],
+	       info->icache_data[3]);
+	printk("%s" "ERROR(%d): I-cache INSN4[%016llx] INSN5[%016llx] INSN6[%016llx] INSN7[%016llx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       info->icache_data[4],
+	       info->icache_data[5],
+	       info->icache_data[6],
+	       info->icache_data[7]);
+	printk("%s" "ERROR(%d): E-cache idx[%x] tag[%016llx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       (int) info->ecache_index, info->ecache_tag);
+	printk("%s" "ERROR(%d): E-cache data0[%016llx] data1[%016llx] data2[%016llx] data3[%016llx]\n",
+	       (recoverable ? KERN_WARNING : KERN_CRIT), smp_processor_id(),
+	       info->ecache_data[0],
+	       info->ecache_data[1],
+	       info->ecache_data[2],
+	       info->ecache_data[3]);
+
+	afsr = (afsr & ~hipri) & cheetah_afsr_errors;
+	while (afsr != 0UL) {
+		unsigned long bit = cheetah_get_hipri(afsr);
+
+		printk("%s" "ERROR: Multiple-error (%016lx) \"%s\"\n",
+		       (recoverable ? KERN_WARNING : KERN_CRIT),
+		       bit, cheetah_get_string(bit));
+
+		afsr &= ~bit;
+	}
+
+	if (!recoverable)
+		printk(KERN_CRIT "ERROR: This condition is not recoverable.\n");
+}
+
+static int cheetah_recheck_errors(struct cheetah_err_info *logp)
+{
+	unsigned long afsr, afar;
+	int ret = 0;
+
+	__asm__ __volatile__("ldxa [%%g0] %1, %0\n\t"
+			     : "=r" (afsr)
+			     : "i" (ASI_AFSR));
+	if ((afsr & cheetah_afsr_errors) != 0) {
+		if (logp != NULL) {
+			__asm__ __volatile__("ldxa [%%g0] %1, %0\n\t"
+					     : "=r" (afar)
+					     : "i" (ASI_AFAR));
+			logp->afsr = afsr;
+			logp->afar = afar;
+		}
+		ret = 1;
+	}
+	__asm__ __volatile__("stxa %0, [%%g0] %1\n\t"
+			     "membar #Sync\n\t"
+			     : : "r" (afsr), "i" (ASI_AFSR));
+
+	return ret;
+}
+
+void cheetah_fecc_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar)
+{
+	struct cheetah_err_info local_snapshot, *p;
+	int recoverable;
+
+	/* Flush E-cache */
+	cheetah_flush_ecache();
+
+	p = cheetah_get_error_log(afsr);
+	if (!p) {
+		prom_printf("ERROR: Early Fast-ECC error afsr[%016lx] afar[%016lx]\n",
+			    afsr, afar);
+		prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
+			    smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
+		prom_halt();
+	}
+
+	/* Grab snapshot of logged error. */
+	memcpy(&local_snapshot, p, sizeof(local_snapshot));
+
+	/* If the current trap snapshot does not match what the
+	 * trap handler passed along into our args, big trouble.
+	 * In such a case, mark the local copy as invalid.
+	 *
+	 * Else, it matches and we mark the afsr in the non-local
+	 * copy as invalid so we may log new error traps there.
+	 */
+	if (p->afsr != afsr || p->afar != afar)
+		local_snapshot.afsr = CHAFSR_INVALID;
+	else
+		p->afsr = CHAFSR_INVALID;
+
+	cheetah_flush_icache();
+	cheetah_flush_dcache();
+
+	/* Re-enable I-cache/D-cache */
+	__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+			     "or %%g1, %1, %%g1\n\t"
+			     "stxa %%g1, [%%g0] %0\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "i" (ASI_DCU_CONTROL_REG),
+			       "i" (DCU_DC | DCU_IC)
+			     : "g1");
+
+	/* Re-enable error reporting */
+	__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+			     "or %%g1, %1, %%g1\n\t"
+			     "stxa %%g1, [%%g0] %0\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "i" (ASI_ESTATE_ERROR_EN),
+			       "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN)
+			     : "g1");
+
+	/* Decide if we can continue after handling this trap and
+	 * logging the error.
+	 */
+	recoverable = 1;
+	if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
+		recoverable = 0;
+
+	/* Re-check AFSR/AFAR.  What we are looking for here is whether a new
+	 * error was logged while we had error reporting traps disabled.
+	 */
+	if (cheetah_recheck_errors(&local_snapshot)) {
+		unsigned long new_afsr = local_snapshot.afsr;
+
+		/* If we got a new asynchronous error, die... */
+		if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU |
+				CHAFSR_WDU | CHAFSR_CPU |
+				CHAFSR_IVU | CHAFSR_UE |
+				CHAFSR_BERR | CHAFSR_TO))
+			recoverable = 0;
+	}
+
+	/* Log errors. */
+	cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable);
+
+	if (!recoverable)
+		panic("Irrecoverable Fast-ECC error trap.\n");
+
+	/* Flush E-cache to kick the error trap handlers out. */
+	cheetah_flush_ecache();
+}
+
+/* Try to fix a correctable error by pushing the line out from
+ * the E-cache.  Recheck error reporting registers to see if the
+ * problem is intermittent.
+ */
+static int cheetah_fix_ce(unsigned long physaddr)
+{
+	unsigned long orig_estate;
+	unsigned long alias1, alias2;
+	int ret;
+
+	/* Make sure correctable error traps are disabled. */
+	__asm__ __volatile__("ldxa	[%%g0] %2, %0\n\t"
+			     "andn	%0, %1, %%g1\n\t"
+			     "stxa	%%g1, [%%g0] %2\n\t"
+			     "membar	#Sync"
+			     : "=&r" (orig_estate)
+			     : "i" (ESTATE_ERROR_CEEN),
+			       "i" (ASI_ESTATE_ERROR_EN)
+			     : "g1");
+
+	/* We calculate alias addresses that will force the
+	 * cache line in question out of the E-cache.  Then
+	 * we bring it back in with an atomic instruction so
+	 * that we get it in some modified/exclusive state,
+	 * then we displace it again to try and get proper ECC
+	 * pushed back into the system.
+	 */
+	physaddr &= ~(8UL - 1UL);
+	alias1 = (ecache_flush_physbase +
+		  (physaddr & ((ecache_flush_size >> 1) - 1)));
+	alias2 = alias1 + (ecache_flush_size >> 1);
+	__asm__ __volatile__("ldxa	[%0] %3, %%g0\n\t"
+			     "ldxa	[%1] %3, %%g0\n\t"
+			     "casxa	[%2] %3, %%g0, %%g0\n\t"
+			     "ldxa	[%0] %3, %%g0\n\t"
+			     "ldxa	[%1] %3, %%g0\n\t"
+			     "membar	#Sync"
+			     : /* no outputs */
+			     : "r" (alias1), "r" (alias2),
+			       "r" (physaddr), "i" (ASI_PHYS_USE_EC));
+
+	/* Did that trigger another error? */
+	if (cheetah_recheck_errors(NULL)) {
+		/* Try one more time. */
+		__asm__ __volatile__("ldxa [%0] %1, %%g0\n\t"
+				     "membar #Sync"
+				     : : "r" (physaddr), "i" (ASI_PHYS_USE_EC));
+		if (cheetah_recheck_errors(NULL))
+			ret = 2;
+		else
+			ret = 1;
+	} else {
+		/* No new error, intermittent problem. */
+		ret = 0;
+	}
+
+	/* Restore error enables. */
+	__asm__ __volatile__("stxa	%0, [%%g0] %1\n\t"
+			     "membar	#Sync"
+			     : : "r" (orig_estate), "i" (ASI_ESTATE_ERROR_EN));
+
+	return ret;
+}
+
+/* Return non-zero if PADDR is a valid physical memory address. */
+static int cheetah_check_main_memory(unsigned long paddr)
+{
+	unsigned long vaddr = PAGE_OFFSET + paddr;
+
+	if (vaddr > (unsigned long) high_memory)
+		return 0;
+
+	return kern_addr_valid(vaddr);
+}
+
+void cheetah_cee_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar)
+{
+	struct cheetah_err_info local_snapshot, *p;
+	int recoverable, is_memory;
+
+	p = cheetah_get_error_log(afsr);
+	if (!p) {
+		prom_printf("ERROR: Early CEE error afsr[%016lx] afar[%016lx]\n",
+			    afsr, afar);
+		prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
+			    smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
+		prom_halt();
+	}
+
+	/* Grab snapshot of logged error. */
+	memcpy(&local_snapshot, p, sizeof(local_snapshot));
+
+	/* If the current trap snapshot does not match what the
+	 * trap handler passed along into our args, big trouble.
+	 * In such a case, mark the local copy as invalid.
+	 *
+	 * Else, it matches and we mark the afsr in the non-local
+	 * copy as invalid so we may log new error traps there.
+	 */
+	if (p->afsr != afsr || p->afar != afar)
+		local_snapshot.afsr = CHAFSR_INVALID;
+	else
+		p->afsr = CHAFSR_INVALID;
+
+	is_memory = cheetah_check_main_memory(afar);
+
+	if (is_memory && (afsr & CHAFSR_CE) != 0UL) {
+		/* XXX Might want to log the results of this operation
+		 * XXX somewhere... -DaveM
+		 */
+		cheetah_fix_ce(afar);
+	}
+
+	{
+		int flush_all, flush_line;
+
+		flush_all = flush_line = 0;
+		if ((afsr & CHAFSR_EDC) != 0UL) {
+			if ((afsr & cheetah_afsr_errors) == CHAFSR_EDC)
+				flush_line = 1;
+			else
+				flush_all = 1;
+		} else if ((afsr & CHAFSR_CPC) != 0UL) {
+			if ((afsr & cheetah_afsr_errors) == CHAFSR_CPC)
+				flush_line = 1;
+			else
+				flush_all = 1;
+		}
+
+		/* Trap handler only disabled I-cache, flush it. */
+		cheetah_flush_icache();
+
+		/* Re-enable I-cache */
+		__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+				     "or %%g1, %1, %%g1\n\t"
+				     "stxa %%g1, [%%g0] %0\n\t"
+				     "membar #Sync"
+				     : /* no outputs */
+				     : "i" (ASI_DCU_CONTROL_REG),
+				     "i" (DCU_IC)
+				     : "g1");
+
+		if (flush_all)
+			cheetah_flush_ecache();
+		else if (flush_line)
+			cheetah_flush_ecache_line(afar);
+	}
+
+	/* Re-enable error reporting */
+	__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+			     "or %%g1, %1, %%g1\n\t"
+			     "stxa %%g1, [%%g0] %0\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "i" (ASI_ESTATE_ERROR_EN),
+			       "i" (ESTATE_ERROR_CEEN)
+			     : "g1");
+
+	/* Decide if we can continue after handling this trap and
+	 * logging the error.
+	 */
+	recoverable = 1;
+	if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
+		recoverable = 0;
+
+	/* Re-check AFSR/AFAR */
+	(void) cheetah_recheck_errors(&local_snapshot);
+
+	/* Log errors. */
+	cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable);
+
+	if (!recoverable)
+		panic("Irrecoverable Correctable-ECC error trap.\n");
+}
+
+void cheetah_deferred_handler(struct pt_regs *regs, unsigned long afsr, unsigned long afar)
+{
+	struct cheetah_err_info local_snapshot, *p;
+	int recoverable, is_memory;
+
+#ifdef CONFIG_PCI
+	/* Check for the special PCI poke sequence. */
+	if (pci_poke_in_progress && pci_poke_cpu == smp_processor_id()) {
+		cheetah_flush_icache();
+		cheetah_flush_dcache();
+
+		/* Re-enable I-cache/D-cache */
+		__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+				     "or %%g1, %1, %%g1\n\t"
+				     "stxa %%g1, [%%g0] %0\n\t"
+				     "membar #Sync"
+				     : /* no outputs */
+				     : "i" (ASI_DCU_CONTROL_REG),
+				       "i" (DCU_DC | DCU_IC)
+				     : "g1");
+
+		/* Re-enable error reporting */
+		__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+				     "or %%g1, %1, %%g1\n\t"
+				     "stxa %%g1, [%%g0] %0\n\t"
+				     "membar #Sync"
+				     : /* no outputs */
+				     : "i" (ASI_ESTATE_ERROR_EN),
+				       "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN)
+				     : "g1");
+
+		(void) cheetah_recheck_errors(NULL);
+
+		pci_poke_faulted = 1;
+		regs->tpc += 4;
+		regs->tnpc = regs->tpc + 4;
+		return;
+	}
+#endif
+
+	p = cheetah_get_error_log(afsr);
+	if (!p) {
+		prom_printf("ERROR: Early deferred error afsr[%016lx] afar[%016lx]\n",
+			    afsr, afar);
+		prom_printf("ERROR: CPU(%d) TPC[%016lx] TNPC[%016lx] TSTATE[%016lx]\n",
+			    smp_processor_id(), regs->tpc, regs->tnpc, regs->tstate);
+		prom_halt();
+	}
+
+	/* Grab snapshot of logged error. */
+	memcpy(&local_snapshot, p, sizeof(local_snapshot));
+
+	/* If the current trap snapshot does not match what the
+	 * trap handler passed along into our args, big trouble.
+	 * In such a case, mark the local copy as invalid.
+	 *
+	 * Else, it matches and we mark the afsr in the non-local
+	 * copy as invalid so we may log new error traps there.
+	 */
+	if (p->afsr != afsr || p->afar != afar)
+		local_snapshot.afsr = CHAFSR_INVALID;
+	else
+		p->afsr = CHAFSR_INVALID;
+
+	is_memory = cheetah_check_main_memory(afar);
+
+	{
+		int flush_all, flush_line;
+
+		flush_all = flush_line = 0;
+		if ((afsr & CHAFSR_EDU) != 0UL) {
+			if ((afsr & cheetah_afsr_errors) == CHAFSR_EDU)
+				flush_line = 1;
+			else
+				flush_all = 1;
+		} else if ((afsr & CHAFSR_BERR) != 0UL) {
+			if ((afsr & cheetah_afsr_errors) == CHAFSR_BERR)
+				flush_line = 1;
+			else
+				flush_all = 1;
+		}
+
+		cheetah_flush_icache();
+		cheetah_flush_dcache();
+
+		/* Re-enable I/D caches */
+		__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+				     "or %%g1, %1, %%g1\n\t"
+				     "stxa %%g1, [%%g0] %0\n\t"
+				     "membar #Sync"
+				     : /* no outputs */
+				     : "i" (ASI_DCU_CONTROL_REG),
+				     "i" (DCU_IC | DCU_DC)
+				     : "g1");
+
+		if (flush_all)
+			cheetah_flush_ecache();
+		else if (flush_line)
+			cheetah_flush_ecache_line(afar);
+	}
+
+	/* Re-enable error reporting */
+	__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+			     "or %%g1, %1, %%g1\n\t"
+			     "stxa %%g1, [%%g0] %0\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "i" (ASI_ESTATE_ERROR_EN),
+			     "i" (ESTATE_ERROR_NCEEN | ESTATE_ERROR_CEEN)
+			     : "g1");
+
+	/* Decide if we can continue after handling this trap and
+	 * logging the error.
+	 */
+	recoverable = 1;
+	if (afsr & (CHAFSR_PERR | CHAFSR_IERR | CHAFSR_ISAP))
+		recoverable = 0;
+
+	/* Re-check AFSR/AFAR.  What we are looking for here is whether a new
+	 * error was logged while we had error reporting traps disabled.
+	 */
+	if (cheetah_recheck_errors(&local_snapshot)) {
+		unsigned long new_afsr = local_snapshot.afsr;
+
+		/* If we got a new asynchronous error, die... */
+		if (new_afsr & (CHAFSR_EMU | CHAFSR_EDU |
+				CHAFSR_WDU | CHAFSR_CPU |
+				CHAFSR_IVU | CHAFSR_UE |
+				CHAFSR_BERR | CHAFSR_TO))
+			recoverable = 0;
+	}
+
+	/* Log errors. */
+	cheetah_log_errors(regs, &local_snapshot, afsr, afar, recoverable);
+
+	/* "Recoverable" here means we try to yank the page from ever
+	 * being newly used again.  This depends upon a few things:
+	 * 1) Must be main memory, and AFAR must be valid.
+	 * 2) If we trapped from user, OK.
+	 * 3) Else, if we trapped from kernel we must find exception
+	 *    table entry (ie. we have to have been accessing user
+	 *    space).
+	 *
+	 * If AFAR is not in main memory, or we trapped from kernel
+	 * and cannot find an exception table entry, it is unacceptable
+	 * to try and continue.
+	 */
+	if (recoverable && is_memory) {
+		if ((regs->tstate & TSTATE_PRIV) == 0UL) {
+			/* OK, usermode access. */
+			recoverable = 1;
+		} else {
+			const struct exception_table_entry *entry;
+
+			entry = search_exception_tables(regs->tpc);
+			if (entry) {
+				/* OK, kernel access to userspace. */
+				recoverable = 1;
+
+			} else {
+				/* BAD, privileged state is corrupted. */
+				recoverable = 0;
+			}
+
+			if (recoverable) {
+				if (pfn_valid(afar >> PAGE_SHIFT))
+					get_page(pfn_to_page(afar >> PAGE_SHIFT));
+				else
+					recoverable = 0;
+
+				/* Only perform fixup if we still have a
+				 * recoverable condition.
+				 */
+				if (recoverable) {
+					regs->tpc = entry->fixup;
+					regs->tnpc = regs->tpc + 4;
+				}
+			}
+		}
+	} else {
+		recoverable = 0;
+	}
+
+	if (!recoverable)
+		panic("Irrecoverable deferred error trap.\n");
+}
+
+/* Handle a D/I cache parity error trap.  TYPE is encoded as:
+ *
+ * Bit0:	0=dcache,1=icache
+ * Bit1:	0=recoverable,1=unrecoverable
+ *
+ * The hardware has disabled both the I-cache and D-cache in
+ * the %dcr register.  
+ */
+void cheetah_plus_parity_error(int type, struct pt_regs *regs)
+{
+	if (type & 0x1)
+		__cheetah_flush_icache();
+	else
+		cheetah_plus_zap_dcache_parity();
+	cheetah_flush_dcache();
+
+	/* Re-enable I-cache/D-cache */
+	__asm__ __volatile__("ldxa [%%g0] %0, %%g1\n\t"
+			     "or %%g1, %1, %%g1\n\t"
+			     "stxa %%g1, [%%g0] %0\n\t"
+			     "membar #Sync"
+			     : /* no outputs */
+			     : "i" (ASI_DCU_CONTROL_REG),
+			       "i" (DCU_DC | DCU_IC)
+			     : "g1");
+
+	if (type & 0x2) {
+		printk(KERN_EMERG "CPU[%d]: Cheetah+ %c-cache parity error at TPC[%016lx]\n",
+		       smp_processor_id(),
+		       (type & 0x1) ? 'I' : 'D',
+		       regs->tpc);
+		printk(KERN_EMERG "TPC<%pS>\n", (void *) regs->tpc);
+		panic("Irrecoverable Cheetah+ parity error.");
+	}
+
+	printk(KERN_WARNING "CPU[%d]: Cheetah+ %c-cache parity error at TPC[%016lx]\n",
+	       smp_processor_id(),
+	       (type & 0x1) ? 'I' : 'D',
+	       regs->tpc);
+	printk(KERN_WARNING "TPC<%pS>\n", (void *) regs->tpc);
+}
+
+struct sun4v_error_entry {
+	/* Unique error handle */
+/*0x00*/u64		err_handle;
+
+	/* %stick value at the time of the error */
+/*0x08*/u64		err_stick;
+
+/*0x10*/u8		reserved_1[3];
+
+	/* Error type */
+/*0x13*/u8		err_type;
+#define SUN4V_ERR_TYPE_UNDEFINED	0
+#define SUN4V_ERR_TYPE_UNCORRECTED_RES	1
+#define SUN4V_ERR_TYPE_PRECISE_NONRES	2
+#define SUN4V_ERR_TYPE_DEFERRED_NONRES	3
+#define SUN4V_ERR_TYPE_SHUTDOWN_RQST	4
+#define SUN4V_ERR_TYPE_DUMP_CORE	5
+#define SUN4V_ERR_TYPE_SP_STATE_CHANGE	6
+#define SUN4V_ERR_TYPE_NUM		7
+
+	/* Error attributes */
+/*0x14*/u32		err_attrs;
+#define SUN4V_ERR_ATTRS_PROCESSOR	0x00000001
+#define SUN4V_ERR_ATTRS_MEMORY		0x00000002
+#define SUN4V_ERR_ATTRS_PIO		0x00000004
+#define SUN4V_ERR_ATTRS_INT_REGISTERS	0x00000008
+#define SUN4V_ERR_ATTRS_FPU_REGISTERS	0x00000010
+#define SUN4V_ERR_ATTRS_SHUTDOWN_RQST	0x00000020
+#define SUN4V_ERR_ATTRS_ASR		0x00000040
+#define SUN4V_ERR_ATTRS_ASI		0x00000080
+#define SUN4V_ERR_ATTRS_PRIV_REG	0x00000100
+#define SUN4V_ERR_ATTRS_SPSTATE_MSK	0x00000600
+#define SUN4V_ERR_ATTRS_MCD		0x00000800
+#define SUN4V_ERR_ATTRS_SPSTATE_SHFT	9
+#define SUN4V_ERR_ATTRS_MODE_MSK	0x03000000
+#define SUN4V_ERR_ATTRS_MODE_SHFT	24
+#define SUN4V_ERR_ATTRS_RES_QUEUE_FULL	0x80000000
+
+#define SUN4V_ERR_SPSTATE_FAULTED	0
+#define SUN4V_ERR_SPSTATE_AVAILABLE	1
+#define SUN4V_ERR_SPSTATE_NOT_PRESENT	2
+
+#define SUN4V_ERR_MODE_USER		1
+#define SUN4V_ERR_MODE_PRIV		2
+
+	/* Real address of the memory region or PIO transaction */
+/*0x18*/u64		err_raddr;
+
+	/* Size of the operation triggering the error, in bytes */
+/*0x20*/u32		err_size;
+
+	/* ID of the CPU */
+/*0x24*/u16		err_cpu;
+
+	/* Grace periof for shutdown, in seconds */
+/*0x26*/u16		err_secs;
+
+	/* Value of the %asi register */
+/*0x28*/u8		err_asi;
+
+/*0x29*/u8		reserved_2;
+
+	/* Value of the ASR register number */
+/*0x2a*/u16		err_asr;
+#define SUN4V_ERR_ASR_VALID		0x8000
+
+/*0x2c*/u32		reserved_3;
+/*0x30*/u64		reserved_4;
+/*0x38*/u64		reserved_5;
+};
+
+static atomic_t sun4v_resum_oflow_cnt = ATOMIC_INIT(0);
+static atomic_t sun4v_nonresum_oflow_cnt = ATOMIC_INIT(0);
+
+static const char *sun4v_err_type_to_str(u8 type)
+{
+	static const char *types[SUN4V_ERR_TYPE_NUM] = {
+		"undefined",
+		"uncorrected resumable",
+		"precise nonresumable",
+		"deferred nonresumable",
+		"shutdown request",
+		"dump core",
+		"SP state change",
+	};
+
+	if (type < SUN4V_ERR_TYPE_NUM)
+		return types[type];
+
+	return "unknown";
+}
+
+static void sun4v_emit_err_attr_strings(u32 attrs)
+{
+	static const char *attr_names[] = {
+		"processor",
+		"memory",
+		"PIO",
+		"int-registers",
+		"fpu-registers",
+		"shutdown-request",
+		"ASR",
+		"ASI",
+		"priv-reg",
+	};
+	static const char *sp_states[] = {
+		"sp-faulted",
+		"sp-available",
+		"sp-not-present",
+		"sp-state-reserved",
+	};
+	static const char *modes[] = {
+		"mode-reserved0",
+		"user",
+		"priv",
+		"mode-reserved1",
+	};
+	u32 sp_state, mode;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(attr_names); i++) {
+		if (attrs & (1U << i)) {
+			const char *s = attr_names[i];
+
+			pr_cont("%s ", s);
+		}
+	}
+
+	sp_state = ((attrs & SUN4V_ERR_ATTRS_SPSTATE_MSK) >>
+		    SUN4V_ERR_ATTRS_SPSTATE_SHFT);
+	pr_cont("%s ", sp_states[sp_state]);
+
+	mode = ((attrs & SUN4V_ERR_ATTRS_MODE_MSK) >>
+		SUN4V_ERR_ATTRS_MODE_SHFT);
+	pr_cont("%s ", modes[mode]);
+
+	if (attrs & SUN4V_ERR_ATTRS_RES_QUEUE_FULL)
+		pr_cont("res-queue-full ");
+}
+
+/* When the report contains a real-address of "-1" it means that the
+ * hardware did not provide the address.  So we compute the effective
+ * address of the load or store instruction at regs->tpc and report
+ * that.  Usually when this happens it's a PIO and in such a case we
+ * are using physical addresses with bypass ASIs anyways, so what we
+ * report here is exactly what we want.
+ */
+static void sun4v_report_real_raddr(const char *pfx, struct pt_regs *regs)
+{
+	unsigned int insn;
+	u64 addr;
+
+	if (!(regs->tstate & TSTATE_PRIV))
+		return;
+
+	insn = *(unsigned int *) regs->tpc;
+
+	addr = compute_effective_address(regs, insn, 0);
+
+	printk("%s: insn effective address [0x%016llx]\n",
+	       pfx, addr);
+}
+
+static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent,
+			    int cpu, const char *pfx, atomic_t *ocnt)
+{
+	u64 *raw_ptr = (u64 *) ent;
+	u32 attrs;
+	int cnt;
+
+	printk("%s: Reporting on cpu %d\n", pfx, cpu);
+	printk("%s: TPC [0x%016lx] <%pS>\n",
+	       pfx, regs->tpc, (void *) regs->tpc);
+
+	printk("%s: RAW [%016llx:%016llx:%016llx:%016llx\n",
+	       pfx, raw_ptr[0], raw_ptr[1], raw_ptr[2], raw_ptr[3]);
+	printk("%s:      %016llx:%016llx:%016llx:%016llx]\n",
+	       pfx, raw_ptr[4], raw_ptr[5], raw_ptr[6], raw_ptr[7]);
+
+	printk("%s: handle [0x%016llx] stick [0x%016llx]\n",
+	       pfx, ent->err_handle, ent->err_stick);
+
+	printk("%s: type [%s]\n", pfx, sun4v_err_type_to_str(ent->err_type));
+
+	attrs = ent->err_attrs;
+	printk("%s: attrs [0x%08x] < ", pfx, attrs);
+	sun4v_emit_err_attr_strings(attrs);
+	pr_cont(">\n");
+
+	/* Various fields in the error report are only valid if
+	 * certain attribute bits are set.
+	 */
+	if (attrs & (SUN4V_ERR_ATTRS_MEMORY |
+		     SUN4V_ERR_ATTRS_PIO |
+		     SUN4V_ERR_ATTRS_ASI)) {
+		printk("%s: raddr [0x%016llx]\n", pfx, ent->err_raddr);
+
+		if (ent->err_raddr == ~(u64)0)
+			sun4v_report_real_raddr(pfx, regs);
+	}
+
+	if (attrs & (SUN4V_ERR_ATTRS_MEMORY | SUN4V_ERR_ATTRS_ASI))
+		printk("%s: size [0x%x]\n", pfx, ent->err_size);
+
+	if (attrs & (SUN4V_ERR_ATTRS_PROCESSOR |
+		     SUN4V_ERR_ATTRS_INT_REGISTERS |
+		     SUN4V_ERR_ATTRS_FPU_REGISTERS |
+		     SUN4V_ERR_ATTRS_PRIV_REG))
+		printk("%s: cpu[%u]\n", pfx, ent->err_cpu);
+
+	if (attrs & SUN4V_ERR_ATTRS_ASI)
+		printk("%s: asi [0x%02x]\n", pfx, ent->err_asi);
+
+	if ((attrs & (SUN4V_ERR_ATTRS_INT_REGISTERS |
+		      SUN4V_ERR_ATTRS_FPU_REGISTERS |
+		      SUN4V_ERR_ATTRS_PRIV_REG)) &&
+	    (ent->err_asr & SUN4V_ERR_ASR_VALID) != 0)
+		printk("%s: reg [0x%04x]\n",
+		       pfx, ent->err_asr & ~SUN4V_ERR_ASR_VALID);
+
+	show_regs(regs);
+
+	if ((cnt = atomic_read(ocnt)) != 0) {
+		atomic_set(ocnt, 0);
+		wmb();
+		printk("%s: Queue overflowed %d times.\n",
+		       pfx, cnt);
+	}
+}
+
+/* Handle memory corruption detected error which is vectored in
+ * through resumable error trap.
+ */
+void do_mcd_err(struct pt_regs *regs, struct sun4v_error_entry ent)
+{
+	if (notify_die(DIE_TRAP, "MCD error", regs, 0, 0x34,
+		       SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		/* MCD exception could happen because the task was
+		 * running a system call with MCD enabled and passed a
+		 * non-versioned pointer or pointer with bad version
+		 * tag to the system call. In such cases, hypervisor
+		 * places the address of offending instruction in the
+		 * resumable error report. This is a deferred error,
+		 * so the read/write that caused the trap was potentially
+		 * retired long time back and we may have no choice
+		 * but to send SIGSEGV to the process.
+		 */
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			/* Looks like a bad syscall parameter */
+#ifdef DEBUG_EXCEPTIONS
+			pr_emerg("Exception: PC<%016lx> faddr<UNKNOWN>\n",
+				 regs->tpc);
+			pr_emerg("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
+				 ent.err_raddr, entry->fixup);
+#endif
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return;
+		}
+	}
+
+	/* Send SIGSEGV to the userspace process with the right signal
+	 * code
+	 */
+	force_sig_fault(SIGSEGV, SEGV_ADIDERR, (void __user *)ent.err_raddr,
+			0, current);
+}
+
+/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
+ * Log the event and clear the first word of the entry.
+ */
+void sun4v_resum_error(struct pt_regs *regs, unsigned long offset)
+{
+	enum ctx_state prev_state = exception_enter();
+	struct sun4v_error_entry *ent, local_copy;
+	struct trap_per_cpu *tb;
+	unsigned long paddr;
+	int cpu;
+
+	cpu = get_cpu();
+
+	tb = &trap_block[cpu];
+	paddr = tb->resum_kernel_buf_pa + offset;
+	ent = __va(paddr);
+
+	memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry));
+
+	/* We have a local copy now, so release the entry.  */
+	ent->err_handle = 0;
+	wmb();
+
+	put_cpu();
+
+	if (local_copy.err_type == SUN4V_ERR_TYPE_SHUTDOWN_RQST) {
+		/* We should really take the seconds field of
+		 * the error report and use it for the shutdown
+		 * invocation, but for now do the same thing we
+		 * do for a DS shutdown request.
+		 */
+		pr_info("Shutdown request, %u seconds...\n",
+			local_copy.err_secs);
+		orderly_poweroff(true);
+		goto out;
+	}
+
+	/* If this is a memory corruption detected error vectored in
+	 * by HV through resumable error trap, call the handler
+	 */
+	if (local_copy.err_attrs & SUN4V_ERR_ATTRS_MCD) {
+		do_mcd_err(regs, local_copy);
+		return;
+	}
+
+	sun4v_log_error(regs, &local_copy, cpu,
+			KERN_ERR "RESUMABLE ERROR",
+			&sun4v_resum_oflow_cnt);
+out:
+	exception_exit(prev_state);
+}
+
+/* If we try to printk() we'll probably make matters worse, by trying
+ * to retake locks this cpu already holds or causing more errors. So
+ * just bump a counter, and we'll report these counter bumps above.
+ */
+void sun4v_resum_overflow(struct pt_regs *regs)
+{
+	atomic_inc(&sun4v_resum_oflow_cnt);
+}
+
+/* Given a set of registers, get the virtual addressi that was being accessed
+ * by the faulting instructions at tpc.
+ */
+static unsigned long sun4v_get_vaddr(struct pt_regs *regs)
+{
+	unsigned int insn;
+
+	if (!copy_from_user(&insn, (void __user *)regs->tpc, 4)) {
+		return compute_effective_address(regs, insn,
+						 (insn >> 25) & 0x1f);
+	}
+	return 0;
+}
+
+/* Attempt to handle non-resumable errors generated from userspace.
+ * Returns true if the signal was handled, false otherwise.
+ */
+bool sun4v_nonresum_error_user_handled(struct pt_regs *regs,
+				  struct sun4v_error_entry *ent) {
+
+	unsigned int attrs = ent->err_attrs;
+
+	if (attrs & SUN4V_ERR_ATTRS_MEMORY) {
+		unsigned long addr = ent->err_raddr;
+
+		if (addr == ~(u64)0) {
+			/* This seems highly unlikely to ever occur */
+			pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory error detected in unknown location!\n");
+		} else {
+			unsigned long page_cnt = DIV_ROUND_UP(ent->err_size,
+							      PAGE_SIZE);
+
+			/* Break the unfortunate news. */
+			pr_emerg("SUN4V NON-RECOVERABLE ERROR: Memory failed at %016lX\n",
+				 addr);
+			pr_emerg("SUN4V NON-RECOVERABLE ERROR:   Claiming %lu ages.\n",
+				 page_cnt);
+
+			while (page_cnt-- > 0) {
+				if (pfn_valid(addr >> PAGE_SHIFT))
+					get_page(pfn_to_page(addr >> PAGE_SHIFT));
+				addr += PAGE_SIZE;
+			}
+		}
+		force_sig(SIGKILL, current);
+
+		return true;
+	}
+	if (attrs & SUN4V_ERR_ATTRS_PIO) {
+		force_sig_fault(SIGBUS, BUS_ADRERR,
+				(void __user *)sun4v_get_vaddr(regs), 0, current);
+		return true;
+	}
+
+	/* Default to doing nothing */
+	return false;
+}
+
+/* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate.
+ * Log the event, clear the first word of the entry, and die.
+ */
+void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset)
+{
+	struct sun4v_error_entry *ent, local_copy;
+	struct trap_per_cpu *tb;
+	unsigned long paddr;
+	int cpu;
+
+	cpu = get_cpu();
+
+	tb = &trap_block[cpu];
+	paddr = tb->nonresum_kernel_buf_pa + offset;
+	ent = __va(paddr);
+
+	memcpy(&local_copy, ent, sizeof(struct sun4v_error_entry));
+
+	/* We have a local copy now, so release the entry.  */
+	ent->err_handle = 0;
+	wmb();
+
+	put_cpu();
+
+	if (!(regs->tstate & TSTATE_PRIV) &&
+	    sun4v_nonresum_error_user_handled(regs, &local_copy)) {
+		/* DON'T PANIC: This userspace error was handled. */
+		return;
+	}
+
+#ifdef CONFIG_PCI
+	/* Check for the special PCI poke sequence. */
+	if (pci_poke_in_progress && pci_poke_cpu == cpu) {
+		pci_poke_faulted = 1;
+		regs->tpc += 4;
+		regs->tnpc = regs->tpc + 4;
+		return;
+	}
+#endif
+
+	sun4v_log_error(regs, &local_copy, cpu,
+			KERN_EMERG "NON-RESUMABLE ERROR",
+			&sun4v_nonresum_oflow_cnt);
+
+	panic("Non-resumable error.");
+}
+
+/* If we try to printk() we'll probably make matters worse, by trying
+ * to retake locks this cpu already holds or causing more errors. So
+ * just bump a counter, and we'll report these counter bumps above.
+ */
+void sun4v_nonresum_overflow(struct pt_regs *regs)
+{
+	/* XXX Actually even this can make not that much sense.  Perhaps
+	 * XXX we should just pull the plug and panic directly from here?
+	 */
+	atomic_inc(&sun4v_nonresum_oflow_cnt);
+}
+
+static void sun4v_tlb_error(struct pt_regs *regs)
+{
+	die_if_kernel("TLB/TSB error", regs);
+}
+
+unsigned long sun4v_err_itlb_vaddr;
+unsigned long sun4v_err_itlb_ctx;
+unsigned long sun4v_err_itlb_pte;
+unsigned long sun4v_err_itlb_error;
+
+void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
+{
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+
+	printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
+	       regs->tpc, tl);
+	printk(KERN_EMERG "SUN4V-ITLB: TPC<%pS>\n", (void *) regs->tpc);
+	printk(KERN_EMERG "SUN4V-ITLB: O7[%lx]\n", regs->u_regs[UREG_I7]);
+	printk(KERN_EMERG "SUN4V-ITLB: O7<%pS>\n",
+	       (void *) regs->u_regs[UREG_I7]);
+	printk(KERN_EMERG "SUN4V-ITLB: vaddr[%lx] ctx[%lx] "
+	       "pte[%lx] error[%lx]\n",
+	       sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
+	       sun4v_err_itlb_pte, sun4v_err_itlb_error);
+
+	sun4v_tlb_error(regs);
+}
+
+unsigned long sun4v_err_dtlb_vaddr;
+unsigned long sun4v_err_dtlb_ctx;
+unsigned long sun4v_err_dtlb_pte;
+unsigned long sun4v_err_dtlb_error;
+
+void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
+{
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+
+	printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
+	       regs->tpc, tl);
+	printk(KERN_EMERG "SUN4V-DTLB: TPC<%pS>\n", (void *) regs->tpc);
+	printk(KERN_EMERG "SUN4V-DTLB: O7[%lx]\n", regs->u_regs[UREG_I7]);
+	printk(KERN_EMERG "SUN4V-DTLB: O7<%pS>\n",
+	       (void *) regs->u_regs[UREG_I7]);
+	printk(KERN_EMERG "SUN4V-DTLB: vaddr[%lx] ctx[%lx] "
+	       "pte[%lx] error[%lx]\n",
+	       sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
+	       sun4v_err_dtlb_pte, sun4v_err_dtlb_error);
+
+	sun4v_tlb_error(regs);
+}
+
+void hypervisor_tlbop_error(unsigned long err, unsigned long op)
+{
+	printk(KERN_CRIT "SUN4V: TLB hv call error %lu for op %lu\n",
+	       err, op);
+}
+
+void hypervisor_tlbop_error_xcall(unsigned long err, unsigned long op)
+{
+	printk(KERN_CRIT "SUN4V: XCALL TLB hv call error %lu for op %lu\n",
+	       err, op);
+}
+
+static void do_fpe_common(struct pt_regs *regs)
+{
+	if (regs->tstate & TSTATE_PRIV) {
+		regs->tpc = regs->tnpc;
+		regs->tnpc += 4;
+	} else {
+		unsigned long fsr = current_thread_info()->xfsr[0];
+		int code;
+
+		if (test_thread_flag(TIF_32BIT)) {
+			regs->tpc &= 0xffffffff;
+			regs->tnpc &= 0xffffffff;
+		}
+		code = FPE_FLTUNK;
+		if ((fsr & 0x1c000) == (1 << 14)) {
+			if (fsr & 0x10)
+				code = FPE_FLTINV;
+			else if (fsr & 0x08)
+				code = FPE_FLTOVF;
+			else if (fsr & 0x04)
+				code = FPE_FLTUND;
+			else if (fsr & 0x02)
+				code = FPE_FLTDIV;
+			else if (fsr & 0x01)
+				code = FPE_FLTRES;
+		}
+		force_sig_fault(SIGFPE, code,
+				(void __user *)regs->tpc, 0, current);
+	}
+}
+
+void do_fpieee(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, "fpu exception ieee", regs,
+		       0, 0x24, SIGFPE) == NOTIFY_STOP)
+		goto out;
+
+	do_fpe_common(regs);
+out:
+	exception_exit(prev_state);
+}
+
+void do_fpother(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+	struct fpustate *f = FPUSTATE;
+	int ret = 0;
+
+	if (notify_die(DIE_TRAP, "fpu exception other", regs,
+		       0, 0x25, SIGFPE) == NOTIFY_STOP)
+		goto out;
+
+	switch ((current_thread_info()->xfsr[0] & 0x1c000)) {
+	case (2 << 14): /* unfinished_FPop */
+	case (3 << 14): /* unimplemented_FPop */
+		ret = do_mathemu(regs, f, false);
+		break;
+	}
+	if (ret)
+		goto out;
+	do_fpe_common(regs);
+out:
+	exception_exit(prev_state);
+}
+
+void do_tof(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs,
+		       0, 0x26, SIGEMT) == NOTIFY_STOP)
+		goto out;
+
+	if (regs->tstate & TSTATE_PRIV)
+		die_if_kernel("Penguin overflow trap from kernel mode", regs);
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGEMT, EMT_TAGOVF,
+			(void __user *)regs->tpc, 0, current);
+out:
+	exception_exit(prev_state);
+}
+
+void do_div0(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, "integer division by zero", regs,
+		       0, 0x28, SIGFPE) == NOTIFY_STOP)
+		goto out;
+
+	if (regs->tstate & TSTATE_PRIV)
+		die_if_kernel("TL0: Kernel divide by zero.", regs);
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGFPE, FPE_INTDIV,
+			(void __user *)regs->tpc, 0, current);
+out:
+	exception_exit(prev_state);
+}
+
+static void instruction_dump(unsigned int *pc)
+{
+	int i;
+
+	if ((((unsigned long) pc) & 3))
+		return;
+
+	printk("Instruction DUMP:");
+	for (i = -3; i < 6; i++)
+		printk("%c%08x%c",i?' ':'<',pc[i],i?' ':'>');
+	printk("\n");
+}
+
+static void user_instruction_dump(unsigned int __user *pc)
+{
+	int i;
+	unsigned int buf[9];
+	
+	if ((((unsigned long) pc) & 3))
+		return;
+		
+	if (copy_from_user(buf, pc - 3, sizeof(buf)))
+		return;
+
+	printk("Instruction DUMP:");
+	for (i = 0; i < 9; i++)
+		printk("%c%08x%c",i==3?' ':'<',buf[i],i==3?' ':'>');
+	printk("\n");
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *_ksp)
+{
+	unsigned long fp, ksp;
+	struct thread_info *tp;
+	int count = 0;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	int graph = 0;
+#endif
+
+	ksp = (unsigned long) _ksp;
+	if (!tsk)
+		tsk = current;
+	tp = task_thread_info(tsk);
+	if (ksp == 0UL) {
+		if (tsk == current)
+			asm("mov %%fp, %0" : "=r" (ksp));
+		else
+			ksp = tp->ksp;
+	}
+	if (tp == current_thread_info())
+		flushw_all();
+
+	fp = ksp + STACK_BIAS;
+
+	printk("Call Trace:\n");
+	do {
+		struct sparc_stackf *sf;
+		struct pt_regs *regs;
+		unsigned long pc;
+
+		if (!kstack_valid(tp, fp))
+			break;
+		sf = (struct sparc_stackf *) fp;
+		regs = (struct pt_regs *) (sf + 1);
+
+		if (kstack_is_trap_frame(tp, regs)) {
+			if (!(regs->tstate & TSTATE_PRIV))
+				break;
+			pc = regs->tpc;
+			fp = regs->u_regs[UREG_I6] + STACK_BIAS;
+		} else {
+			pc = sf->callers_pc;
+			fp = (unsigned long)sf->fp + STACK_BIAS;
+		}
+
+		printk(" [%016lx] %pS\n", pc, (void *) pc);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+		if ((pc + 8UL) == (unsigned long) &return_to_handler) {
+			int index = tsk->curr_ret_stack;
+			if (tsk->ret_stack && index >= graph) {
+				pc = tsk->ret_stack[index - graph].ret;
+				printk(" [%016lx] %pS\n", pc, (void *) pc);
+				graph++;
+			}
+		}
+#endif
+	} while (++count < 16);
+}
+
+static inline struct reg_window *kernel_stack_up(struct reg_window *rw)
+{
+	unsigned long fp = rw->ins[6];
+
+	if (!fp)
+		return NULL;
+
+	return (struct reg_window *) (fp + STACK_BIAS);
+}
+
+void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
+{
+	static int die_counter;
+	int count = 0;
+	
+	/* Amuse the user. */
+	printk(
+"              \\|/ ____ \\|/\n"
+"              \"@'/ .. \\`@\"\n"
+"              /_| \\__/ |_\\\n"
+"                 \\__U_/\n");
+
+	printk("%s(%d): %s [#%d]\n", current->comm, task_pid_nr(current), str, ++die_counter);
+	notify_die(DIE_OOPS, str, regs, 0, 255, SIGSEGV);
+	__asm__ __volatile__("flushw");
+	show_regs(regs);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	if (regs->tstate & TSTATE_PRIV) {
+		struct thread_info *tp = current_thread_info();
+		struct reg_window *rw = (struct reg_window *)
+			(regs->u_regs[UREG_FP] + STACK_BIAS);
+
+		/* Stop the back trace when we hit userland or we
+		 * find some badly aligned kernel stack.
+		 */
+		while (rw &&
+		       count++ < 30 &&
+		       kstack_valid(tp, (unsigned long) rw)) {
+			printk("Caller[%016lx]: %pS\n", rw->ins[7],
+			       (void *) rw->ins[7]);
+
+			rw = kernel_stack_up(rw);
+		}
+		instruction_dump ((unsigned int *) regs->tpc);
+	} else {
+		if (test_thread_flag(TIF_32BIT)) {
+			regs->tpc &= 0xffffffff;
+			regs->tnpc &= 0xffffffff;
+		}
+		user_instruction_dump ((unsigned int __user *) regs->tpc);
+	}
+	if (panic_on_oops)
+		panic("Fatal exception");
+	if (regs->tstate & TSTATE_PRIV)
+		do_exit(SIGKILL);
+	do_exit(SIGSEGV);
+}
+EXPORT_SYMBOL(die_if_kernel);
+
+#define VIS_OPCODE_MASK	((0x3 << 30) | (0x3f << 19))
+#define VIS_OPCODE_VAL	((0x2 << 30) | (0x36 << 19))
+
+void do_illegal_instruction(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+	unsigned long pc = regs->tpc;
+	unsigned long tstate = regs->tstate;
+	u32 insn;
+
+	if (notify_die(DIE_TRAP, "illegal instruction", regs,
+		       0, 0x10, SIGILL) == NOTIFY_STOP)
+		goto out;
+
+	if (tstate & TSTATE_PRIV)
+		die_if_kernel("Kernel illegal instruction", regs);
+	if (test_thread_flag(TIF_32BIT))
+		pc = (u32)pc;
+	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
+		if ((insn & 0xc1ffc000) == 0x81700000) /* POPC */ {
+			if (handle_popc(insn, regs))
+				goto out;
+		} else if ((insn & 0xc1580000) == 0xc1100000) /* LDQ/STQ */ {
+			if (handle_ldf_stq(insn, regs))
+				goto out;
+		} else if (tlb_type == hypervisor) {
+			if ((insn & VIS_OPCODE_MASK) == VIS_OPCODE_VAL) {
+				if (!vis_emul(regs, insn))
+					goto out;
+			} else {
+				struct fpustate *f = FPUSTATE;
+
+				/* On UltraSPARC T2 and later, FPU insns which
+				 * are not implemented in HW signal an illegal
+				 * instruction trap and do not set the FP Trap
+				 * Trap in the %fsr to unimplemented_FPop.
+				 */
+				if (do_mathemu(regs, f, true))
+					goto out;
+			}
+		}
+	}
+	force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current);
+out:
+	exception_exit(prev_state);
+}
+
+void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, "memory address unaligned", regs,
+		       0, 0x34, SIGSEGV) == NOTIFY_STOP)
+		goto out;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
+		goto out;
+	}
+	if (is_no_fault_exception(regs))
+		return;
+
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current);
+out:
+	exception_exit(prev_state);
+}
+
+void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx)
+{
+	if (notify_die(DIE_TRAP, "memory address unaligned", regs,
+		       0, 0x34, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		kernel_unaligned_trap(regs, *((unsigned int *)regs->tpc));
+		return;
+	}
+	if (is_no_fault_exception(regs))
+		return;
+
+	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current);
+}
+
+/* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI
+ * tag mismatch.
+ *
+ * ADI version tag mismatch on a load from memory always results in a
+ * precise exception. Tag mismatch on a store to memory will result in
+ * precise exception if MCDPER or PMCDPER is set to 1.
+ */
+void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr,
+				      unsigned long context)
+{
+	if (notify_die(DIE_TRAP, "memory corruption precise exception", regs,
+		       0, 0x8, SIGSEGV) == NOTIFY_STOP)
+		return;
+
+	if (regs->tstate & TSTATE_PRIV) {
+		/* MCD exception could happen because the task was running
+		 * a system call with MCD enabled and passed a non-versioned
+		 * pointer or pointer with bad version tag to  the system
+		 * call.
+		 */
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			/* Looks like a bad syscall parameter */
+#ifdef DEBUG_EXCEPTIONS
+			pr_emerg("Exception: PC<%016lx> faddr<UNKNOWN>\n",
+				 regs->tpc);
+			pr_emerg("EX_TABLE: insn<%016lx> fixup<%016lx>\n",
+				 regs->tpc, entry->fixup);
+#endif
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return;
+		}
+		pr_emerg("%s: ADDR[%016lx] CTX[%lx], going.\n",
+			 __func__, addr, context);
+		die_if_kernel("MCD precise", regs);
+	}
+
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0, current);
+}
+
+void do_privop(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+
+	if (notify_die(DIE_TRAP, "privileged operation", regs,
+		       0, 0x11, SIGILL) == NOTIFY_STOP)
+		goto out;
+
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+	force_sig_fault(SIGILL, ILL_PRVOPC,
+			(void __user *)regs->tpc, 0, current);
+out:
+	exception_exit(prev_state);
+}
+
+void do_privact(struct pt_regs *regs)
+{
+	do_privop(regs);
+}
+
+/* Trap level 1 stuff or other traps we should never see... */
+void do_cee(struct pt_regs *regs)
+{
+	exception_enter();
+	die_if_kernel("TL0: Cache Error Exception", regs);
+}
+
+void do_div0_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: DIV0 Exception", regs);
+}
+
+void do_fpieee_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: FPU IEEE Exception", regs);
+}
+
+void do_fpother_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: FPU Other Exception", regs);
+}
+
+void do_ill_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: Illegal Instruction Exception", regs);
+}
+
+void do_irq_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: IRQ Exception", regs);
+}
+
+void do_lddfmna_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: LDDF Exception", regs);
+}
+
+void do_stdfmna_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: STDF Exception", regs);
+}
+
+void do_paw(struct pt_regs *regs)
+{
+	exception_enter();
+	die_if_kernel("TL0: Phys Watchpoint Exception", regs);
+}
+
+void do_paw_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: Phys Watchpoint Exception", regs);
+}
+
+void do_vaw(struct pt_regs *regs)
+{
+	exception_enter();
+	die_if_kernel("TL0: Virt Watchpoint Exception", regs);
+}
+
+void do_vaw_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: Virt Watchpoint Exception", regs);
+}
+
+void do_tof_tl1(struct pt_regs *regs)
+{
+	exception_enter();
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	die_if_kernel("TL1: Tag Overflow Exception", regs);
+}
+
+void do_getpsr(struct pt_regs *regs)
+{
+	regs->u_regs[UREG_I0] = tstate_to_psr(regs->tstate);
+	regs->tpc   = regs->tnpc;
+	regs->tnpc += 4;
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+}
+
+u64 cpu_mondo_counter[NR_CPUS] = {0};
+struct trap_per_cpu trap_block[NR_CPUS];
+EXPORT_SYMBOL(trap_block);
+
+/* This can get invoked before sched_init() so play it super safe
+ * and use hard_smp_processor_id().
+ */
+void notrace init_cur_cpu_trap(struct thread_info *t)
+{
+	int cpu = hard_smp_processor_id();
+	struct trap_per_cpu *p = &trap_block[cpu];
+
+	p->thread = t;
+	p->pgd_paddr = 0;
+}
+
+extern void thread_info_offsets_are_bolixed_dave(void);
+extern void trap_per_cpu_offsets_are_bolixed_dave(void);
+extern void tsb_config_offsets_are_bolixed_dave(void);
+
+/* Only invoked on boot processor. */
+void __init trap_init(void)
+{
+	/* Compile time sanity check. */
+	BUILD_BUG_ON(TI_TASK != offsetof(struct thread_info, task) ||
+		     TI_FLAGS != offsetof(struct thread_info, flags) ||
+		     TI_CPU != offsetof(struct thread_info, cpu) ||
+		     TI_FPSAVED != offsetof(struct thread_info, fpsaved) ||
+		     TI_KSP != offsetof(struct thread_info, ksp) ||
+		     TI_FAULT_ADDR != offsetof(struct thread_info,
+					       fault_address) ||
+		     TI_KREGS != offsetof(struct thread_info, kregs) ||
+		     TI_UTRAPS != offsetof(struct thread_info, utraps) ||
+		     TI_REG_WINDOW != offsetof(struct thread_info,
+					       reg_window) ||
+		     TI_RWIN_SPTRS != offsetof(struct thread_info,
+					       rwbuf_stkptrs) ||
+		     TI_GSR != offsetof(struct thread_info, gsr) ||
+		     TI_XFSR != offsetof(struct thread_info, xfsr) ||
+		     TI_PRE_COUNT != offsetof(struct thread_info,
+					      preempt_count) ||
+		     TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
+		     TI_CURRENT_DS != offsetof(struct thread_info,
+						current_ds) ||
+		     TI_KUNA_REGS != offsetof(struct thread_info,
+					      kern_una_regs) ||
+		     TI_KUNA_INSN != offsetof(struct thread_info,
+					      kern_una_insn) ||
+		     TI_FPREGS != offsetof(struct thread_info, fpregs) ||
+		     (TI_FPREGS & (64 - 1)));
+
+	BUILD_BUG_ON(TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu,
+						     thread) ||
+		     (TRAP_PER_CPU_PGD_PADDR !=
+		      offsetof(struct trap_per_cpu, pgd_paddr)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_pa)) ||
+		     (TRAP_PER_CPU_DEV_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, dev_mondo_pa)) ||
+		     (TRAP_PER_CPU_RESUM_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, resum_mondo_pa)) ||
+		     (TRAP_PER_CPU_RESUM_KBUF_PA !=
+		      offsetof(struct trap_per_cpu, resum_kernel_buf_pa)) ||
+		     (TRAP_PER_CPU_NONRESUM_MONDO_PA !=
+		      offsetof(struct trap_per_cpu, nonresum_mondo_pa)) ||
+		     (TRAP_PER_CPU_NONRESUM_KBUF_PA !=
+		      offsetof(struct trap_per_cpu, nonresum_kernel_buf_pa)) ||
+		     (TRAP_PER_CPU_FAULT_INFO !=
+		      offsetof(struct trap_per_cpu, fault_info)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_BLOCK_PA !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_block_pa)) ||
+		     (TRAP_PER_CPU_CPU_LIST_PA !=
+		      offsetof(struct trap_per_cpu, cpu_list_pa)) ||
+		     (TRAP_PER_CPU_TSB_HUGE !=
+		      offsetof(struct trap_per_cpu, tsb_huge)) ||
+		     (TRAP_PER_CPU_TSB_HUGE_TEMP !=
+		      offsetof(struct trap_per_cpu, tsb_huge_temp)) ||
+		     (TRAP_PER_CPU_IRQ_WORKLIST_PA !=
+		      offsetof(struct trap_per_cpu, irq_worklist_pa)) ||
+		     (TRAP_PER_CPU_CPU_MONDO_QMASK !=
+		      offsetof(struct trap_per_cpu, cpu_mondo_qmask)) ||
+		     (TRAP_PER_CPU_DEV_MONDO_QMASK !=
+		      offsetof(struct trap_per_cpu, dev_mondo_qmask)) ||
+		     (TRAP_PER_CPU_RESUM_QMASK !=
+		      offsetof(struct trap_per_cpu, resum_qmask)) ||
+		     (TRAP_PER_CPU_NONRESUM_QMASK !=
+		      offsetof(struct trap_per_cpu, nonresum_qmask)) ||
+		     (TRAP_PER_CPU_PER_CPU_BASE !=
+		      offsetof(struct trap_per_cpu, __per_cpu_base)));
+
+	BUILD_BUG_ON((TSB_CONFIG_TSB !=
+		      offsetof(struct tsb_config, tsb)) ||
+		     (TSB_CONFIG_RSS_LIMIT !=
+		      offsetof(struct tsb_config, tsb_rss_limit)) ||
+		     (TSB_CONFIG_NENTRIES !=
+		      offsetof(struct tsb_config, tsb_nentries)) ||
+		     (TSB_CONFIG_REG_VAL !=
+		      offsetof(struct tsb_config, tsb_reg_val)) ||
+		     (TSB_CONFIG_MAP_VADDR !=
+		      offsetof(struct tsb_config, tsb_map_vaddr)) ||
+		     (TSB_CONFIG_MAP_PTE !=
+		      offsetof(struct tsb_config, tsb_map_pte)));
+
+	/* Attach to the address space of init_task.  On SMP we
+	 * do this in smp.c:smp_callin for other cpus.
+	 */
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+}
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
new file mode 100644
index 0000000..eaed39c
--- /dev/null
+++ b/arch/sparc/kernel/tsb.S
@@ -0,0 +1,591 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* tsb.S: Sparc64 TSB table handling.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+
+#include <asm/tsb.h>
+#include <asm/hypervisor.h>
+#include <asm/page.h>
+#include <asm/cpudata.h>
+#include <asm/mmu.h>
+
+	.text
+	.align	32
+
+	/* Invoked from TLB miss handler, we are in the
+	 * MMU global registers and they are setup like
+	 * this:
+	 *
+	 * %g1: TSB entry pointer
+	 * %g2:	available temporary
+	 * %g3:	FAULT_CODE_{D,I}TLB
+	 * %g4:	available temporary
+	 * %g5:	available temporary
+	 * %g6: TAG TARGET
+	 * %g7:	available temporary, will be loaded by us with
+	 *      the physical address base of the linux page
+	 *      tables for the current address space
+	 */
+tsb_miss_dtlb:
+	mov		TLB_TAG_ACCESS, %g4
+	ldxa		[%g4] ASI_DMMU, %g4
+	srlx		%g4, PAGE_SHIFT, %g4
+	ba,pt		%xcc, tsb_miss_page_table_walk
+	 sllx		%g4, PAGE_SHIFT, %g4
+
+tsb_miss_itlb:
+	mov		TLB_TAG_ACCESS, %g4
+	ldxa		[%g4] ASI_IMMU, %g4
+	srlx		%g4, PAGE_SHIFT, %g4
+	ba,pt		%xcc, tsb_miss_page_table_walk
+	 sllx		%g4, PAGE_SHIFT, %g4
+
+	/* At this point we have:
+	 * %g1 --	PAGE_SIZE TSB entry address
+	 * %g3 --	FAULT_CODE_{D,I}TLB
+	 * %g4 --	missing virtual address
+	 * %g6 --	TAG TARGET (vaddr >> 22)
+	 */
+tsb_miss_page_table_walk:
+	TRAP_LOAD_TRAP_BLOCK(%g7, %g5)
+
+	/* Before committing to a full page table walk,
+	 * check the huge page TSB.
+	 */
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+
+661:	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE], %g5
+	nop
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	mov		SCRATCHPAD_UTSBREG2, %g5
+	ldxa		[%g5] ASI_SCRATCHPAD, %g5
+	.previous
+
+	cmp		%g5, -1
+	be,pt		%xcc, 80f
+	 nop
+
+	/* We need an aligned pair of registers containing 2 values
+	 * which can be easily rematerialized.  %g6 and %g7 foot the
+	 * bill just nicely.  We'll save %g6 away into %g2 for the
+	 * huge page TSB TAG comparison.
+	 *
+	 * Perform a huge page TSB lookup.
+	 */
+	mov		%g6, %g2
+	and		%g5, 0x7, %g6
+	mov		512, %g7
+	andn		%g5, 0x7, %g5
+	sllx		%g7, %g6, %g7
+	srlx		%g4, REAL_HPAGE_SHIFT, %g6
+	sub		%g7, 1, %g7
+	and		%g6, %g7, %g6
+	sllx		%g6, 4, %g6
+	add		%g5, %g6, %g5
+
+	TSB_LOAD_QUAD(%g5, %g6)
+	cmp		%g6, %g2
+	be,a,pt		%xcc, tsb_tlb_reload
+	 mov		%g7, %g5
+
+	/* No match, remember the huge page TSB entry address,
+	 * and restore %g6 and %g7.
+	 */
+	TRAP_LOAD_TRAP_BLOCK(%g7, %g6)
+	srlx		%g4, 22, %g6
+80:	stx		%g5, [%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP]
+
+#endif
+
+	ldx		[%g7 + TRAP_PER_CPU_PGD_PADDR], %g7
+
+	/* At this point we have:
+	 * %g1 --	TSB entry address
+	 * %g3 --	FAULT_CODE_{D,I}TLB
+	 * %g4 --	missing virtual address
+	 * %g6 --	TAG TARGET (vaddr >> 22)
+	 * %g7 --	page table physical address
+	 *
+	 * We know that both the base PAGE_SIZE TSB and the HPAGE_SIZE
+	 * TSB both lack a matching entry.
+	 */
+tsb_miss_page_table_walk_sun4v_fastpath:
+	USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
+
+	/* Valid PTE is now in %g5.  */
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	sethi		%uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
+	sllx		%g7, 32, %g7
+
+	andcc		%g5, %g7, %g0
+	be,pt		%xcc, 60f
+	 nop
+
+	/* It is a huge page, use huge page TSB entry address we
+	 * calculated above.  If the huge page TSB has not been
+	 * allocated, setup a trap stack and call hugetlb_setup()
+	 * to do so, then return from the trap to replay the TLB
+	 * miss.
+	 *
+	 * This is necessary to handle the case of transparent huge
+	 * pages where we don't really have a non-atomic context
+	 * in which to allocate the hugepage TSB hash table.  When
+	 * the 'mm' faults in the hugepage for the first time, we
+	 * thus handle it here.  This also makes sure that we can
+	 * allocate the TSB hash table on the correct NUMA node.
+	 */
+	TRAP_LOAD_TRAP_BLOCK(%g7, %g2)
+	ldx		[%g7 + TRAP_PER_CPU_TSB_HUGE_TEMP], %g1
+	cmp		%g1, -1
+	bne,pt		%xcc, 60f
+	 nop
+
+661:	rdpr		%pstate, %g5
+	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	SET_GL(1)
+	nop
+	.previous
+
+	rdpr	%tl, %g7
+	cmp	%g7, 1
+	bne,pn	%xcc, winfix_trampoline
+	 mov	%g3, %g4
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	call	hugetlb_setup
+	 add	%sp, PTREGS_OFF, %o0
+	ba,pt	%xcc, rtrap
+	 nop
+
+60:
+#endif
+
+	/* At this point we have:
+	 * %g1 --	TSB entry address
+	 * %g3 --	FAULT_CODE_{D,I}TLB
+	 * %g5 --	valid PTE
+	 * %g6 --	TAG TARGET (vaddr >> 22)
+	 */
+tsb_reload:
+	TSB_LOCK_TAG(%g1, %g2, %g7)
+	TSB_WRITE(%g1, %g5, %g6)
+
+	/* Finally, load TLB and return from trap.  */
+tsb_tlb_reload:
+	cmp		%g3, FAULT_CODE_DTLB
+	bne,pn		%xcc, tsb_itlb_load
+	 nop
+
+tsb_dtlb_load:
+
+661:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN
+	retry
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	nop
+	nop
+	.previous
+
+	/* For sun4v the ASI_DTLB_DATA_IN store and the retry
+	 * instruction get nop'd out and we get here to branch
+	 * to the sun4v tlb load code.  The registers are setup
+	 * as follows:
+	 *
+	 * %g4: vaddr
+	 * %g5: PTE
+	 * %g6:	TAG
+	 *
+	 * The sun4v TLB load wants the PTE in %g3 so we fix that
+	 * up here.
+	 */
+	ba,pt		%xcc, sun4v_dtlb_load
+	 mov		%g5, %g3
+
+tsb_itlb_load:
+	/* Executable bit must be set.  */
+661:	sethi		%hi(_PAGE_EXEC_4U), %g4
+	andcc		%g5, %g4, %g0
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	andcc		%g5, _PAGE_EXEC_4V, %g0
+	nop
+	.previous
+
+	be,pn		%xcc, tsb_do_fault
+	 nop
+
+661:	stxa		%g5, [%g0] ASI_ITLB_DATA_IN
+	retry
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	nop
+	nop
+	.previous
+
+	/* For sun4v the ASI_ITLB_DATA_IN store and the retry
+	 * instruction get nop'd out and we get here to branch
+	 * to the sun4v tlb load code.  The registers are setup
+	 * as follows:
+	 *
+	 * %g4: vaddr
+	 * %g5: PTE
+	 * %g6:	TAG
+	 *
+	 * The sun4v TLB load wants the PTE in %g3 so we fix that
+	 * up here.
+	 */
+	ba,pt		%xcc, sun4v_itlb_load
+	 mov		%g5, %g3
+
+	/* No valid entry in the page tables, do full fault
+	 * processing.
+	 */
+
+	.globl		tsb_do_fault
+tsb_do_fault:
+	cmp		%g3, FAULT_CODE_DTLB
+
+661:	rdpr		%pstate, %g5
+	wrpr		%g5, PSTATE_AG | PSTATE_MG, %pstate
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	SET_GL(1)
+	ldxa		[%g0] ASI_SCRATCHPAD, %g4
+	.previous
+
+	bne,pn		%xcc, tsb_do_itlb_fault
+	 nop
+
+tsb_do_dtlb_fault:
+	rdpr	%tl, %g3
+	cmp	%g3, 1
+
+661:	mov	TLB_TAG_ACCESS, %g4
+	ldxa	[%g4] ASI_DMMU, %g5
+	.section .sun4v_2insn_patch, "ax"
+	.word	661b
+	ldx	[%g4 + HV_FAULT_D_ADDR_OFFSET], %g5
+	nop
+	.previous
+
+	/* Clear context ID bits.  */
+	srlx		%g5, PAGE_SHIFT, %g5
+	sllx		%g5, PAGE_SHIFT, %g5
+
+	be,pt	%xcc, sparc64_realfault_common
+	 mov	FAULT_CODE_DTLB, %g4
+	ba,pt	%xcc, winfix_trampoline
+	 nop
+
+tsb_do_itlb_fault:
+	rdpr	%tpc, %g5
+	ba,pt	%xcc, sparc64_realfault_common
+	 mov	FAULT_CODE_ITLB, %g4
+
+	.globl	sparc64_realfault_common
+sparc64_realfault_common:
+	/* fault code in %g4, fault address in %g5, etrap will
+	 * preserve these two values in %l4 and %l5 respectively
+	 */
+	ba,pt	%xcc, etrap			! Save trap state
+1:	 rd	%pc, %g7			! ...
+	stb	%l4, [%g6 + TI_FAULT_CODE]	! Save fault code
+	stx	%l5, [%g6 + TI_FAULT_ADDR]	! Save fault address
+	call	do_sparc64_fault		! Call fault handler
+	 add	%sp, PTREGS_OFF, %o0		! Compute pt_regs arg
+	ba,pt	%xcc, rtrap			! Restore cpu state
+	 nop					! Delay slot (fill me)
+
+winfix_trampoline:
+	rdpr	%tpc, %g3			! Prepare winfixup TNPC
+	or	%g3, 0x7c, %g3			! Compute branch offset
+	wrpr	%g3, %tnpc			! Write it into TNPC
+	done					! Trap return
+
+	/* Insert an entry into the TSB.
+	 *
+	 * %o0: TSB entry pointer (virt or phys address)
+	 * %o1: tag
+	 * %o2:	pte
+	 */
+	.align	32
+	.globl	__tsb_insert
+__tsb_insert:
+	rdpr	%pstate, %o5
+	wrpr	%o5, PSTATE_IE, %pstate
+	TSB_LOCK_TAG(%o0, %g2, %g3)
+	TSB_WRITE(%o0, %o2, %o1)
+	wrpr	%o5, %pstate
+	retl
+	 nop
+	.size	__tsb_insert, .-__tsb_insert
+
+	/* Flush the given TSB entry if it has the matching
+	 * tag.
+	 *
+	 * %o0: TSB entry pointer (virt or phys address)
+	 * %o1:	tag
+	 */
+	.align	32
+	.globl	tsb_flush
+	.type	tsb_flush,#function
+tsb_flush:
+	sethi	%hi(TSB_TAG_LOCK_HIGH), %g2
+1:	TSB_LOAD_TAG(%o0, %g1)
+	srlx	%g1, 32, %o3
+	andcc	%o3, %g2, %g0
+	bne,pn	%icc, 1b
+	 nop
+	cmp	%g1, %o1
+	mov	1, %o3
+	bne,pt	%xcc, 2f
+	 sllx	%o3, TSB_TAG_INVALID_BIT, %o3
+	TSB_CAS_TAG(%o0, %g1, %o3)
+	cmp	%g1, %o3
+	bne,pn	%xcc, 1b
+	 nop
+2:	retl
+	 nop
+	.size	tsb_flush, .-tsb_flush
+
+	/* Reload MMU related context switch state at
+	 * schedule() time.
+	 *
+	 * %o0: page table physical address
+	 * %o1:	TSB base config pointer
+	 * %o2:	TSB huge config pointer, or NULL if none
+	 * %o3:	Hypervisor TSB descriptor physical address
+	 * %o4: Secondary context to load, if non-zero
+	 *
+	 * We have to run this whole thing with interrupts
+	 * disabled so that the current cpu doesn't change
+	 * due to preemption.
+	 */
+	.align	32
+	.globl	__tsb_context_switch
+	.type	__tsb_context_switch,#function
+__tsb_context_switch:
+	rdpr	%pstate, %g1
+	wrpr	%g1, PSTATE_IE, %pstate
+
+	brz,pn	%o4, 1f
+	 mov	SECONDARY_CONTEXT, %o5
+
+661:	stxa	%o4, [%o5] ASI_DMMU
+	.section .sun4v_1insn_patch, "ax"
+	.word	661b
+	stxa	%o4, [%o5] ASI_MMU
+	.previous
+	flush	%g6
+
+1:
+	TRAP_LOAD_TRAP_BLOCK(%g2, %g3)
+
+	stx	%o0, [%g2 + TRAP_PER_CPU_PGD_PADDR]
+
+	ldx	[%o1 + TSB_CONFIG_REG_VAL], %o0
+	brz,pt	%o2, 1f
+	 mov	-1, %g3
+
+	ldx	[%o2 + TSB_CONFIG_REG_VAL], %g3
+
+1:	stx	%g3, [%g2 + TRAP_PER_CPU_TSB_HUGE]
+
+	sethi	%hi(tlb_type), %g2
+	lduw	[%g2 + %lo(tlb_type)], %g2
+	cmp	%g2, 3
+	bne,pt	%icc, 50f
+	 nop
+
+	/* Hypervisor TSB switch. */
+	mov	SCRATCHPAD_UTSBREG1, %o5
+	stxa	%o0, [%o5] ASI_SCRATCHPAD
+	mov	SCRATCHPAD_UTSBREG2, %o5
+	stxa	%g3, [%o5] ASI_SCRATCHPAD
+
+	mov	2, %o0
+	cmp	%g3, -1
+	move	%xcc, 1, %o0
+
+	mov	HV_FAST_MMU_TSB_CTXNON0, %o5
+	mov	%o3, %o1
+	ta	HV_FAST_TRAP
+
+	/* Finish up.  */
+	ba,pt	%xcc, 9f
+	 nop
+
+	/* SUN4U TSB switch.  */
+50:	mov	TSB_REG, %o5
+	stxa	%o0, [%o5] ASI_DMMU
+	membar	#Sync
+	stxa	%o0, [%o5] ASI_IMMU
+	membar	#Sync
+
+2:	ldx	[%o1 + TSB_CONFIG_MAP_VADDR], %o4
+	brz	%o4, 9f
+	 ldx	[%o1 + TSB_CONFIG_MAP_PTE], %o5
+
+	sethi	%hi(sparc64_highest_unlocked_tlb_ent), %g2
+	mov	TLB_TAG_ACCESS, %g3
+	lduw	[%g2 + %lo(sparc64_highest_unlocked_tlb_ent)], %g2
+	stxa	%o4, [%g3] ASI_DMMU
+	membar	#Sync
+	sllx	%g2, 3, %g2
+	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
+	membar	#Sync
+
+	brz,pt	%o2, 9f
+	 nop
+
+	ldx	[%o2 + TSB_CONFIG_MAP_VADDR], %o4
+	ldx	[%o2 + TSB_CONFIG_MAP_PTE], %o5
+	mov	TLB_TAG_ACCESS, %g3
+	stxa	%o4, [%g3] ASI_DMMU
+	membar	#Sync
+	sub	%g2, (1 << 3), %g2
+	stxa	%o5, [%g2] ASI_DTLB_DATA_ACCESS
+	membar	#Sync
+
+9:
+	wrpr	%g1, %pstate
+
+	retl
+	 nop
+	.size	__tsb_context_switch, .-__tsb_context_switch
+
+#define TSB_PASS_BITS	((1 << TSB_TAG_LOCK_BIT) | \
+			 (1 << TSB_TAG_INVALID_BIT))
+
+	.align	32
+	.globl	copy_tsb
+	.type	copy_tsb,#function
+copy_tsb:		/* %o0=old_tsb_base, %o1=old_tsb_size
+			 * %o2=new_tsb_base, %o3=new_tsb_size
+			 * %o4=page_size_shift
+			 */
+	sethi		%uhi(TSB_PASS_BITS), %g7
+	srlx		%o3, 4, %o3
+	add		%o0, %o1, %o1	/* end of old tsb */
+	sllx		%g7, 32, %g7
+	sub		%o3, 1, %o3	/* %o3 == new tsb hash mask */
+
+	mov		%o4, %g1	/* page_size_shift */
+
+661:	prefetcha	[%o0] ASI_N, #one_read
+	.section	.tsb_phys_patch, "ax"
+	.word		661b
+	prefetcha	[%o0] ASI_PHYS_USE_EC, #one_read
+	.previous
+
+90:	andcc		%o0, (64 - 1), %g0
+	bne		1f
+	 add		%o0, 64, %o5
+
+661:	prefetcha	[%o5] ASI_N, #one_read
+	.section	.tsb_phys_patch, "ax"
+	.word		661b
+	prefetcha	[%o5] ASI_PHYS_USE_EC, #one_read
+	.previous
+
+1:	TSB_LOAD_QUAD(%o0, %g2)		/* %g2/%g3 == TSB entry */
+	andcc		%g2, %g7, %g0	/* LOCK or INVALID set? */
+	bne,pn		%xcc, 80f	/* Skip it */
+	 sllx		%g2, 22, %o4	/* TAG --> VADDR */
+
+	/* This can definitely be computed faster... */
+	srlx		%o0, 4, %o5	/* Build index */
+	and		%o5, 511, %o5	/* Mask index */
+	sllx		%o5, %g1, %o5	/* Put into vaddr position */
+	or		%o4, %o5, %o4	/* Full VADDR. */
+	srlx		%o4, %g1, %o4	/* Shift down to create index */
+	and		%o4, %o3, %o4	/* Mask with new_tsb_nents-1 */
+	sllx		%o4, 4, %o4	/* Shift back up into tsb ent offset */
+	TSB_STORE(%o2 + %o4, %g2)	/* Store TAG */
+	add		%o4, 0x8, %o4	/* Advance to TTE */
+	TSB_STORE(%o2 + %o4, %g3)	/* Store TTE */
+
+80:	add		%o0, 16, %o0
+	cmp		%o0, %o1
+	bne,pt		%xcc, 90b
+	 nop
+
+	retl
+	 nop
+	.size		copy_tsb, .-copy_tsb
+
+	/* Set the invalid bit in all TSB entries.  */
+	.align		32
+	.globl		tsb_init
+	.type		tsb_init,#function
+tsb_init:		/* %o0 = TSB vaddr, %o1 = size in bytes */
+	prefetch	[%o0 + 0x000], #n_writes
+	mov		1, %g1
+	prefetch	[%o0 + 0x040], #n_writes
+	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
+	prefetch	[%o0 + 0x080], #n_writes
+1:	prefetch	[%o0 + 0x0c0], #n_writes
+	stx		%g1, [%o0 + 0x00]
+	stx		%g1, [%o0 + 0x10]
+	stx		%g1, [%o0 + 0x20]
+	stx		%g1, [%o0 + 0x30]
+	prefetch	[%o0 + 0x100], #n_writes
+	stx		%g1, [%o0 + 0x40]
+	stx		%g1, [%o0 + 0x50]
+	stx		%g1, [%o0 + 0x60]
+	stx		%g1, [%o0 + 0x70]
+	prefetch	[%o0 + 0x140], #n_writes
+	stx		%g1, [%o0 + 0x80]
+	stx		%g1, [%o0 + 0x90]
+	stx		%g1, [%o0 + 0xa0]
+	stx		%g1, [%o0 + 0xb0]
+	prefetch	[%o0 + 0x180], #n_writes
+	stx		%g1, [%o0 + 0xc0]
+	stx		%g1, [%o0 + 0xd0]
+	stx		%g1, [%o0 + 0xe0]
+	stx		%g1, [%o0 + 0xf0]
+	subcc		%o1, 0x100, %o1
+	bne,pt		%xcc, 1b
+	 add		%o0, 0x100, %o0
+	retl
+	 nop
+	nop
+	nop
+	.size		tsb_init, .-tsb_init
+
+	.globl		NGtsb_init
+	.type		NGtsb_init,#function
+NGtsb_init:
+	rd		%asi, %g2
+	mov		1, %g1
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	sllx		%g1, TSB_TAG_INVALID_BIT, %g1
+1:	stxa		%g1, [%o0 + 0x00] %asi
+	stxa		%g1, [%o0 + 0x10] %asi
+	stxa		%g1, [%o0 + 0x20] %asi
+	stxa		%g1, [%o0 + 0x30] %asi
+	stxa		%g1, [%o0 + 0x40] %asi
+	stxa		%g1, [%o0 + 0x50] %asi
+	stxa		%g1, [%o0 + 0x60] %asi
+	stxa		%g1, [%o0 + 0x70] %asi
+	stxa		%g1, [%o0 + 0x80] %asi
+	stxa		%g1, [%o0 + 0x90] %asi
+	stxa		%g1, [%o0 + 0xa0] %asi
+	stxa		%g1, [%o0 + 0xb0] %asi
+	stxa		%g1, [%o0 + 0xc0] %asi
+	stxa		%g1, [%o0 + 0xd0] %asi
+	stxa		%g1, [%o0 + 0xe0] %asi
+	stxa		%g1, [%o0 + 0xf0] %asi
+	subcc		%o1, 0x100, %o1
+	bne,pt		%xcc, 1b
+	 add		%o0, 0x100, %o0
+	membar		#Sync
+	retl
+	 wr		%g2, 0x0, %asi
+	.size		NGtsb_init, .-NGtsb_init
diff --git a/arch/sparc/kernel/ttable_32.S b/arch/sparc/kernel/ttable_32.S
new file mode 100644
index 0000000..e79fd78
--- /dev/null
+++ b/arch/sparc/kernel/ttable_32.S
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* The Sparc trap table, bootloader gives us control at _start. */
+        __HEAD
+
+        .globl  _start
+_start:
+
+	.globl _stext
+_stext:
+
+	.globl  trapbase
+trapbase:
+
+#ifdef CONFIG_SMP
+trapbase_cpu0:
+#endif
+/* We get control passed to us here at t_zero. */
+t_zero:	b gokernel; nop; nop; nop;
+t_tflt:	SRMMU_TFAULT                        /* Inst. Access Exception        */
+t_bins:	TRAP_ENTRY(0x2, bad_instruction)    /* Illegal Instruction           */
+t_pins:	TRAP_ENTRY(0x3, priv_instruction)   /* Privileged Instruction        */
+t_fpd:	TRAP_ENTRY(0x4, fpd_trap_handler)   /* Floating Point Disabled       */
+t_wovf:	WINDOW_SPILL                        /* Window Overflow               */
+t_wunf:	WINDOW_FILL                         /* Window Underflow              */
+t_mna:	TRAP_ENTRY(0x7, mna_handler)        /* Memory Address Not Aligned    */
+t_fpe:	TRAP_ENTRY(0x8, fpe_trap_handler)   /* Floating Point Exception      */
+t_dflt:	SRMMU_DFAULT                        /* Data Miss Exception           */
+t_tio:	TRAP_ENTRY(0xa, do_tag_overflow)    /* Tagged Instruction Ovrflw     */
+t_wpt:	TRAP_ENTRY(0xb, do_watchpoint)      /* Watchpoint Detected           */
+t_badc:	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
+t_irq1:	TRAP_ENTRY_INTERRUPT(1)             /* IRQ Software/SBUS Level 1     */
+t_irq2:	TRAP_ENTRY_INTERRUPT(2)             /* IRQ SBUS Level 2              */
+t_irq3:	TRAP_ENTRY_INTERRUPT(3)             /* IRQ SCSI/DMA/SBUS Level 3     */
+t_irq4:	TRAP_ENTRY_INTERRUPT(4)             /* IRQ Software Level 4          */
+t_irq5:	TRAP_ENTRY_INTERRUPT(5)             /* IRQ SBUS/Ethernet Level 5     */
+t_irq6:	TRAP_ENTRY_INTERRUPT(6)             /* IRQ Software Level 6          */
+t_irq7:	TRAP_ENTRY_INTERRUPT(7)             /* IRQ Video/SBUS Level 5        */
+t_irq8:	TRAP_ENTRY_INTERRUPT(8)             /* IRQ SBUS Level 6              */
+t_irq9:	TRAP_ENTRY_INTERRUPT(9)             /* IRQ SBUS Level 7              */
+t_irq10:TRAP_ENTRY_INTERRUPT(10)            /* IRQ Timer #1 (one we use)     */
+t_irq11:TRAP_ENTRY_INTERRUPT(11)            /* IRQ Floppy Intr.              */
+t_irq12:TRAP_ENTRY_INTERRUPT(12)            /* IRQ Zilog serial chip         */
+t_irq13:TRAP_ENTRY_INTERRUPT(13)            /* IRQ Audio Intr.               */
+t_irq14:TRAP_ENTRY_INTERRUPT(14)            /* IRQ Timer #2                  */
+
+	.globl	t_nmi
+t_nmi:	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
+
+t_racc:	TRAP_ENTRY(0x20, do_reg_access)     /* General Register Access Error */
+t_iacce:BAD_TRAP(0x21)                      /* Instr Access Error            */
+t_bad22:BAD_TRAP(0x22)
+	BAD_TRAP(0x23)
+t_cpdis:TRAP_ENTRY(0x24, do_cp_disabled)    /* Co-Processor Disabled         */
+t_uflsh:SKIP_TRAP(0x25, unimp_flush)        /* Unimplemented FLUSH inst.     */
+t_bad26:BAD_TRAP(0x26) BAD_TRAP(0x27)
+t_cpexc:TRAP_ENTRY(0x28, do_cp_exception)   /* Co-Processor Exception        */
+t_dacce:SRMMU_DFAULT                        /* Data Access Error             */
+t_hwdz:	TRAP_ENTRY(0x2a, do_hw_divzero)     /* Division by zero, you lose... */
+t_dserr:BAD_TRAP(0x2b)                      /* Data Store Error              */
+t_daccm:BAD_TRAP(0x2c)                      /* Data Access MMU-Miss          */
+t_bad2d:BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
+t_bad32:BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
+t_bad37:BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
+t_iaccm:BAD_TRAP(0x3c)                      /* Instr Access MMU-Miss         */
+t_bad3d:BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40) BAD_TRAP(0x41)
+t_bad42:BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45) BAD_TRAP(0x46)
+t_bad47:BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a) BAD_TRAP(0x4b)
+t_bad4c:BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f) BAD_TRAP(0x50)
+t_bad51:BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
+t_bad56:BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
+t_bad5b:BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
+t_bad60:BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
+t_bad65:BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
+t_bad6a:BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
+t_bad6f:BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
+t_bad74:BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
+t_bad79:BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
+t_bad7e:BAD_TRAP(0x7e) BAD_TRAP(0x7f)
+t_bad80:BAD_TRAP(0x80)                      /* SunOS System Call             */
+t_sbkpt:BREAKPOINT_TRAP                     /* Software Breakpoint/KGDB      */
+t_divz:	TRAP_ENTRY(0x82, do_hw_divzero)     /* Divide by zero trap           */
+t_flwin:TRAP_ENTRY(0x83, do_flush_windows)  /* Flush Windows Trap            */
+t_clwin:BAD_TRAP(0x84)                      /* Clean Windows Trap            */
+t_rchk:	BAD_TRAP(0x85)                      /* Range Check                   */
+t_funal:BAD_TRAP(0x86)                      /* Fix Unaligned Access Trap     */
+t_iovf:	BAD_TRAP(0x87)                      /* Integer Overflow Trap         */
+t_bad88:BAD_TRAP(0x88)                      /* Slowaris System Call          */
+t_bad89:BAD_TRAP(0x89)                      /* Net-B.S. System Call          */
+t_bad8a:BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c) BAD_TRAP(0x8d) BAD_TRAP(0x8e)
+t_bad8f:BAD_TRAP(0x8f)
+t_linux:LINUX_SYSCALL_TRAP                  /* Linux System Call             */
+t_bad91:BAD_TRAP(0x91) BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94) BAD_TRAP(0x95)
+t_bad96:BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99) BAD_TRAP(0x9a)
+t_bad9b:BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e) BAD_TRAP(0x9f)
+t_getcc:GETCC_TRAP                          /* Get Condition Codes           */
+t_setcc:SETCC_TRAP                          /* Set Condition Codes           */
+t_getpsr:GETPSR_TRAP                        /* Get PSR Register              */
+t_bada3:BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
+t_bada7:BAD_TRAP(0xa7)
+t_bada8:BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
+t_badac:BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
+t_badb1:BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
+t_badb6:BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
+t_badbb:BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
+t_badc0:BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
+t_badc5:BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
+t_badca:BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
+t_badcf:BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
+t_badd4:BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
+t_badd9:BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
+t_badde:BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
+t_bade3:BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
+t_bade8:BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
+t_baded:BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
+t_badf2:BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
+t_badf7:BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
+t_badfc:BAD_TRAP(0xfc)
+t_kgdb:	KGDB_TRAP(0xfd)
+dbtrap:	BAD_TRAP(0xfe)                      /* Debugger/PROM breakpoint #1   */
+dbtrap2:BAD_TRAP(0xff)                      /* Debugger/PROM breakpoint #2   */
+
+	.globl	end_traptable
+end_traptable:
+
+#ifdef CONFIG_SMP
+	/* Trap tables for the other cpus. */
+	.globl	trapbase_cpu1, trapbase_cpu2, trapbase_cpu3
+trapbase_cpu1:
+	BAD_TRAP(0x0)
+	SRMMU_TFAULT
+	TRAP_ENTRY(0x2, bad_instruction)
+	TRAP_ENTRY(0x3, priv_instruction)
+	TRAP_ENTRY(0x4, fpd_trap_handler)
+	WINDOW_SPILL
+	WINDOW_FILL
+	TRAP_ENTRY(0x7, mna_handler)
+	TRAP_ENTRY(0x8, fpe_trap_handler)
+	SRMMU_DFAULT
+	TRAP_ENTRY(0xa, do_tag_overflow)
+	TRAP_ENTRY(0xb, do_watchpoint)
+	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
+	TRAP_ENTRY_INTERRUPT(1) TRAP_ENTRY_INTERRUPT(2)
+	TRAP_ENTRY_INTERRUPT(3) TRAP_ENTRY_INTERRUPT(4)
+	TRAP_ENTRY_INTERRUPT(5) TRAP_ENTRY_INTERRUPT(6)
+	TRAP_ENTRY_INTERRUPT(7)	TRAP_ENTRY_INTERRUPT(8)
+	TRAP_ENTRY_INTERRUPT(9) TRAP_ENTRY_INTERRUPT(10)
+	TRAP_ENTRY_INTERRUPT(11) TRAP_ENTRY_INTERRUPT(12)
+	TRAP_ENTRY_INTERRUPT(13) TRAP_ENTRY_INTERRUPT(14)
+	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
+	TRAP_ENTRY(0x20, do_reg_access)
+	BAD_TRAP(0x21)
+	BAD_TRAP(0x22)
+	BAD_TRAP(0x23)
+	TRAP_ENTRY(0x24, do_cp_disabled)
+	SKIP_TRAP(0x25, unimp_flush)
+	BAD_TRAP(0x26)
+	BAD_TRAP(0x27)
+	TRAP_ENTRY(0x28, do_cp_exception)
+	SRMMU_DFAULT
+	TRAP_ENTRY(0x2a, do_hw_divzero)
+	BAD_TRAP(0x2b)
+	BAD_TRAP(0x2c)
+	BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
+	BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
+	BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
+	BAD_TRAP(0x3c) BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40)
+	BAD_TRAP(0x41) BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45)
+	BAD_TRAP(0x46) BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a)
+	BAD_TRAP(0x4b) BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f)
+	BAD_TRAP(0x50)
+	BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
+	BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
+	BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
+	BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
+	BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
+	BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
+	BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
+	BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
+	BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
+	BAD_TRAP(0x7e) BAD_TRAP(0x7f)
+	BAD_TRAP(0x80)
+	BREAKPOINT_TRAP
+	TRAP_ENTRY(0x82, do_hw_divzero)
+	TRAP_ENTRY(0x83, do_flush_windows)
+	BAD_TRAP(0x84) BAD_TRAP(0x85) BAD_TRAP(0x86)
+	BAD_TRAP(0x87) BAD_TRAP(0x88) BAD_TRAP(0x89)
+	BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c)
+	BAD_TRAP(0x8d) BAD_TRAP(0x8e) BAD_TRAP(0x8f)
+	LINUX_SYSCALL_TRAP BAD_TRAP(0x91)
+	BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94)
+	BAD_TRAP(0x95) BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99)
+	BAD_TRAP(0x9a) BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e)
+	BAD_TRAP(0x9f)
+	GETCC_TRAP
+	SETCC_TRAP
+	GETPSR_TRAP
+	BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
+	BAD_TRAP(0xa7) BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
+	BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
+	BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
+	BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
+	BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
+	BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
+	BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
+	BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
+	BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
+	BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
+	BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
+	BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
+	BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
+	BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
+	BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
+	BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
+	BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
+	BAD_TRAP(0xfc)
+	KGDB_TRAP(0xfd)
+	BAD_TRAP(0xfe)
+	BAD_TRAP(0xff)
+
+trapbase_cpu2:
+	BAD_TRAP(0x0)
+	SRMMU_TFAULT
+	TRAP_ENTRY(0x2, bad_instruction)
+	TRAP_ENTRY(0x3, priv_instruction)
+	TRAP_ENTRY(0x4, fpd_trap_handler)
+	WINDOW_SPILL
+	WINDOW_FILL
+	TRAP_ENTRY(0x7, mna_handler)
+	TRAP_ENTRY(0x8, fpe_trap_handler)
+	SRMMU_DFAULT
+	TRAP_ENTRY(0xa, do_tag_overflow)
+	TRAP_ENTRY(0xb, do_watchpoint)
+	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
+	TRAP_ENTRY_INTERRUPT(1)
+	TRAP_ENTRY_INTERRUPT(2)
+	TRAP_ENTRY_INTERRUPT(3)
+	TRAP_ENTRY_INTERRUPT(4)
+	TRAP_ENTRY_INTERRUPT(5)
+	TRAP_ENTRY_INTERRUPT(6)
+	TRAP_ENTRY_INTERRUPT(7)
+	TRAP_ENTRY_INTERRUPT(8)
+	TRAP_ENTRY_INTERRUPT(9)
+	TRAP_ENTRY_INTERRUPT(10)
+	TRAP_ENTRY_INTERRUPT(11)
+	TRAP_ENTRY_INTERRUPT(12)
+	TRAP_ENTRY_INTERRUPT(13)
+	TRAP_ENTRY_INTERRUPT(14)
+	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
+	TRAP_ENTRY(0x20, do_reg_access)
+	BAD_TRAP(0x21)
+	BAD_TRAP(0x22)
+	BAD_TRAP(0x23)
+	TRAP_ENTRY(0x24, do_cp_disabled)
+	SKIP_TRAP(0x25, unimp_flush)
+	BAD_TRAP(0x26)
+	BAD_TRAP(0x27)
+	TRAP_ENTRY(0x28, do_cp_exception)
+	SRMMU_DFAULT
+	TRAP_ENTRY(0x2a, do_hw_divzero)
+	BAD_TRAP(0x2b)
+	BAD_TRAP(0x2c)
+	BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
+	BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
+	BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
+	BAD_TRAP(0x3c) BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40)
+	BAD_TRAP(0x41) BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45)
+	BAD_TRAP(0x46) BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a)
+	BAD_TRAP(0x4b) BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f)
+	BAD_TRAP(0x50)
+	BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
+	BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
+	BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
+	BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
+	BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
+	BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
+	BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
+	BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
+	BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
+	BAD_TRAP(0x7e) BAD_TRAP(0x7f)
+	BAD_TRAP(0x80)
+	BREAKPOINT_TRAP
+	TRAP_ENTRY(0x82, do_hw_divzero)
+	TRAP_ENTRY(0x83, do_flush_windows)
+	BAD_TRAP(0x84)
+	BAD_TRAP(0x85)
+	BAD_TRAP(0x86) BAD_TRAP(0x87) BAD_TRAP(0x88)
+	BAD_TRAP(0x89) BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c)
+	BAD_TRAP(0x8d) BAD_TRAP(0x8e) BAD_TRAP(0x8f)
+	LINUX_SYSCALL_TRAP BAD_TRAP(0x91)
+	BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94)
+	BAD_TRAP(0x95) BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99)
+	BAD_TRAP(0x9a) BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e)
+	BAD_TRAP(0x9f)
+	GETCC_TRAP
+	SETCC_TRAP
+	GETPSR_TRAP
+	BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
+	BAD_TRAP(0xa7) BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
+	BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
+	BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
+	BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
+	BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
+	BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
+	BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
+	BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
+	BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
+	BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
+	BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
+	BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
+	BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
+	BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
+	BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
+	BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
+	BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
+	BAD_TRAP(0xfc)
+	KGDB_TRAP(0xfd)
+	BAD_TRAP(0xfe)
+	BAD_TRAP(0xff)
+
+trapbase_cpu3:
+	BAD_TRAP(0x0)
+	SRMMU_TFAULT
+	TRAP_ENTRY(0x2, bad_instruction)
+	TRAP_ENTRY(0x3, priv_instruction)
+	TRAP_ENTRY(0x4, fpd_trap_handler)
+	WINDOW_SPILL
+	WINDOW_FILL
+	TRAP_ENTRY(0x7, mna_handler)
+	TRAP_ENTRY(0x8, fpe_trap_handler)
+	SRMMU_DFAULT
+	TRAP_ENTRY(0xa, do_tag_overflow)
+	TRAP_ENTRY(0xb, do_watchpoint)
+	BAD_TRAP(0xc) BAD_TRAP(0xd) BAD_TRAP(0xe) BAD_TRAP(0xf) BAD_TRAP(0x10)
+	TRAP_ENTRY_INTERRUPT(1)
+	TRAP_ENTRY_INTERRUPT(2)
+	TRAP_ENTRY_INTERRUPT(3)
+	TRAP_ENTRY_INTERRUPT(4)
+	TRAP_ENTRY_INTERRUPT(5)
+	TRAP_ENTRY_INTERRUPT(6)
+	TRAP_ENTRY_INTERRUPT(7)
+	TRAP_ENTRY_INTERRUPT(8)
+	TRAP_ENTRY_INTERRUPT(9)
+	TRAP_ENTRY_INTERRUPT(10)
+	TRAP_ENTRY_INTERRUPT(11)
+	TRAP_ENTRY_INTERRUPT(12)
+	TRAP_ENTRY_INTERRUPT(13)
+	TRAP_ENTRY_INTERRUPT(14)
+	TRAP_ENTRY(0x1f, linux_trap_ipi15_sun4m)
+	TRAP_ENTRY(0x20, do_reg_access)
+	BAD_TRAP(0x21)
+	BAD_TRAP(0x22)
+	BAD_TRAP(0x23)
+	TRAP_ENTRY(0x24, do_cp_disabled)
+	SKIP_TRAP(0x25, unimp_flush)
+	BAD_TRAP(0x26)
+	BAD_TRAP(0x27)
+	TRAP_ENTRY(0x28, do_cp_exception)
+	SRMMU_DFAULT
+	TRAP_ENTRY(0x2a, do_hw_divzero)
+	BAD_TRAP(0x2b) BAD_TRAP(0x2c)
+	BAD_TRAP(0x2d) BAD_TRAP(0x2e) BAD_TRAP(0x2f) BAD_TRAP(0x30) BAD_TRAP(0x31)
+	BAD_TRAP(0x32) BAD_TRAP(0x33) BAD_TRAP(0x34) BAD_TRAP(0x35) BAD_TRAP(0x36)
+	BAD_TRAP(0x37) BAD_TRAP(0x38) BAD_TRAP(0x39) BAD_TRAP(0x3a) BAD_TRAP(0x3b)
+	BAD_TRAP(0x3c) BAD_TRAP(0x3d) BAD_TRAP(0x3e) BAD_TRAP(0x3f) BAD_TRAP(0x40)
+	BAD_TRAP(0x41) BAD_TRAP(0x42) BAD_TRAP(0x43) BAD_TRAP(0x44) BAD_TRAP(0x45)
+	BAD_TRAP(0x46) BAD_TRAP(0x47) BAD_TRAP(0x48) BAD_TRAP(0x49) BAD_TRAP(0x4a)
+	BAD_TRAP(0x4b) BAD_TRAP(0x4c) BAD_TRAP(0x4d) BAD_TRAP(0x4e) BAD_TRAP(0x4f)
+	BAD_TRAP(0x50)
+	BAD_TRAP(0x51) BAD_TRAP(0x52) BAD_TRAP(0x53) BAD_TRAP(0x54) BAD_TRAP(0x55)
+	BAD_TRAP(0x56) BAD_TRAP(0x57) BAD_TRAP(0x58) BAD_TRAP(0x59) BAD_TRAP(0x5a)
+	BAD_TRAP(0x5b) BAD_TRAP(0x5c) BAD_TRAP(0x5d) BAD_TRAP(0x5e) BAD_TRAP(0x5f)
+	BAD_TRAP(0x60) BAD_TRAP(0x61) BAD_TRAP(0x62) BAD_TRAP(0x63) BAD_TRAP(0x64)
+	BAD_TRAP(0x65) BAD_TRAP(0x66) BAD_TRAP(0x67) BAD_TRAP(0x68) BAD_TRAP(0x69)
+	BAD_TRAP(0x6a) BAD_TRAP(0x6b) BAD_TRAP(0x6c) BAD_TRAP(0x6d) BAD_TRAP(0x6e)
+	BAD_TRAP(0x6f) BAD_TRAP(0x70) BAD_TRAP(0x71) BAD_TRAP(0x72) BAD_TRAP(0x73)
+	BAD_TRAP(0x74) BAD_TRAP(0x75) BAD_TRAP(0x76) BAD_TRAP(0x77) BAD_TRAP(0x78)
+	BAD_TRAP(0x79) BAD_TRAP(0x7a) BAD_TRAP(0x7b) BAD_TRAP(0x7c) BAD_TRAP(0x7d)
+	BAD_TRAP(0x7e) BAD_TRAP(0x7f)
+	BAD_TRAP(0x80)
+	BREAKPOINT_TRAP
+	TRAP_ENTRY(0x82, do_hw_divzero)
+	TRAP_ENTRY(0x83, do_flush_windows)
+	BAD_TRAP(0x84) BAD_TRAP(0x85)
+	BAD_TRAP(0x86) BAD_TRAP(0x87) BAD_TRAP(0x88)
+	BAD_TRAP(0x89) BAD_TRAP(0x8a) BAD_TRAP(0x8b) BAD_TRAP(0x8c)
+	BAD_TRAP(0x8d) BAD_TRAP(0x8e) BAD_TRAP(0x8f)
+	LINUX_SYSCALL_TRAP
+	BAD_TRAP(0x91) BAD_TRAP(0x92) BAD_TRAP(0x93) BAD_TRAP(0x94)
+	BAD_TRAP(0x95) BAD_TRAP(0x96) BAD_TRAP(0x97) BAD_TRAP(0x98) BAD_TRAP(0x99)
+	BAD_TRAP(0x9a) BAD_TRAP(0x9b) BAD_TRAP(0x9c) BAD_TRAP(0x9d) BAD_TRAP(0x9e)
+	BAD_TRAP(0x9f)
+	GETCC_TRAP
+	SETCC_TRAP
+	GETPSR_TRAP
+	BAD_TRAP(0xa3) BAD_TRAP(0xa4) BAD_TRAP(0xa5) BAD_TRAP(0xa6)
+	BAD_TRAP(0xa7) BAD_TRAP(0xa8) BAD_TRAP(0xa9) BAD_TRAP(0xaa) BAD_TRAP(0xab)
+	BAD_TRAP(0xac) BAD_TRAP(0xad) BAD_TRAP(0xae) BAD_TRAP(0xaf) BAD_TRAP(0xb0)
+	BAD_TRAP(0xb1) BAD_TRAP(0xb2) BAD_TRAP(0xb3) BAD_TRAP(0xb4) BAD_TRAP(0xb5)
+	BAD_TRAP(0xb6) BAD_TRAP(0xb7) BAD_TRAP(0xb8) BAD_TRAP(0xb9) BAD_TRAP(0xba)
+	BAD_TRAP(0xbb) BAD_TRAP(0xbc) BAD_TRAP(0xbd) BAD_TRAP(0xbe) BAD_TRAP(0xbf)
+	BAD_TRAP(0xc0) BAD_TRAP(0xc1) BAD_TRAP(0xc2) BAD_TRAP(0xc3) BAD_TRAP(0xc4)
+	BAD_TRAP(0xc5) BAD_TRAP(0xc6) BAD_TRAP(0xc7) BAD_TRAP(0xc8) BAD_TRAP(0xc9)
+	BAD_TRAP(0xca) BAD_TRAP(0xcb) BAD_TRAP(0xcc) BAD_TRAP(0xcd) BAD_TRAP(0xce)
+	BAD_TRAP(0xcf) BAD_TRAP(0xd0) BAD_TRAP(0xd1) BAD_TRAP(0xd2) BAD_TRAP(0xd3)
+	BAD_TRAP(0xd4) BAD_TRAP(0xd5) BAD_TRAP(0xd6) BAD_TRAP(0xd7) BAD_TRAP(0xd8)
+	BAD_TRAP(0xd9) BAD_TRAP(0xda) BAD_TRAP(0xdb) BAD_TRAP(0xdc) BAD_TRAP(0xdd)
+	BAD_TRAP(0xde) BAD_TRAP(0xdf) BAD_TRAP(0xe0) BAD_TRAP(0xe1) BAD_TRAP(0xe2)
+	BAD_TRAP(0xe3) BAD_TRAP(0xe4) BAD_TRAP(0xe5) BAD_TRAP(0xe6) BAD_TRAP(0xe7)
+	BAD_TRAP(0xe8) BAD_TRAP(0xe9) BAD_TRAP(0xea) BAD_TRAP(0xeb) BAD_TRAP(0xec)
+	BAD_TRAP(0xed) BAD_TRAP(0xee) BAD_TRAP(0xef) BAD_TRAP(0xf0) BAD_TRAP(0xf1)
+	BAD_TRAP(0xf2) BAD_TRAP(0xf3) BAD_TRAP(0xf4) BAD_TRAP(0xf5) BAD_TRAP(0xf6)
+	BAD_TRAP(0xf7) BAD_TRAP(0xf8) BAD_TRAP(0xf9) BAD_TRAP(0xfa) BAD_TRAP(0xfb)
+	BAD_TRAP(0xfc)
+	KGDB_TRAP(0xfd)
+	BAD_TRAP(0xfe)
+	BAD_TRAP(0xff)
+
+#endif
diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S
new file mode 100644
index 0000000..86e737e
--- /dev/null
+++ b/arch/sparc/kernel/ttable_64.S
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ttable.S: Sparc V9 Trap Table(s) with SpitFire/Cheetah/SUN4V extensions.
+ *
+ * Copyright (C) 1996, 2001, 2006 David S. Miller (davem@davemloft.net)
+ */
+
+
+	.globl	sparc64_ttable_tl0, sparc64_ttable_tl1
+	.globl	tl0_icpe, tl1_icpe
+	.globl	tl0_dcpe, tl1_dcpe
+	.globl	tl0_fecc, tl1_fecc
+	.globl	tl0_cee, tl1_cee
+	.globl	tl0_iae, tl1_iae
+	.globl	tl0_dae, tl1_dae
+
+sparc64_ttable_tl0:
+tl0_resv000:	BOOT_KERNEL BTRAP(0x1) BTRAP(0x2) BTRAP(0x3)
+tl0_resv004:	BTRAP(0x4)  BTRAP(0x5) BTRAP(0x6) BTRAP(0x7)
+tl0_iax:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_insn_access_exception)
+tl0_itsb_4v:	SUN4V_ITSB_MISS
+tl0_iae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
+tl0_resv00b:	BTRAP(0xb) BTRAP(0xc) BTRAP(0xd) BTRAP(0xe) BTRAP(0xf)
+tl0_ill:	membar #Sync
+		TRAP_7INSNS(do_illegal_instruction)
+tl0_privop:	TRAP(do_privop)
+tl0_resv012:	BTRAP(0x12) BTRAP(0x13) BTRAP(0x14) BTRAP(0x15) BTRAP(0x16) BTRAP(0x17)
+tl0_resv018:	BTRAP(0x18) BTRAP(0x19)
+tl0_mcd:	SUN4V_MCD_PRECISE
+tl0_resv01b:	BTRAP(0x1b)
+tl0_resv01c:	BTRAP(0x1c) BTRAP(0x1d)	BTRAP(0x1e) BTRAP(0x1f)
+tl0_fpdis:	TRAP_NOSAVE(do_fpdis)
+tl0_fpieee:	TRAP_SAVEFPU(do_fpieee)
+tl0_fpother:	TRAP_NOSAVE(do_fpother_check_fitos)
+tl0_tof:	TRAP(do_tof)
+tl0_cwin:	CLEAN_WINDOW
+tl0_div0:	TRAP(do_div0)
+tl0_resv029:	BTRAP(0x29) BTRAP(0x2a) BTRAP(0x2b) BTRAP(0x2c) BTRAP(0x2d) BTRAP(0x2e)
+tl0_resv02f:	BTRAP(0x2f)
+tl0_dax:	TRAP_NOSAVE(__spitfire_data_access_exception)
+tl0_dtsb_4v:	SUN4V_DTSB_MISS
+tl0_dae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
+tl0_resv033:	BTRAP(0x33)
+tl0_mna:	TRAP_NOSAVE(do_mna)
+tl0_lddfmna:	TRAP_NOSAVE(do_lddfmna)
+tl0_stdfmna:	TRAP_NOSAVE(do_stdfmna)
+tl0_privact:	TRAP_NOSAVE(__do_privact)
+tl0_resv038:	BTRAP(0x38) BTRAP(0x39) BTRAP(0x3a) BTRAP(0x3b) BTRAP(0x3c) BTRAP(0x3d)
+tl0_resv03e:	BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40)
+#ifdef CONFIG_SMP
+tl0_irq1:	TRAP_IRQ(smp_call_function_client, 1)
+tl0_irq2:	TRAP_IRQ(smp_receive_signal_client, 2)
+tl0_irq3:	TRAP_IRQ(smp_penguin_jailcell, 3)
+tl0_irq4:       BTRAP(0x44)
+#else
+tl0_irq1:	BTRAP(0x41)
+tl0_irq2:	BTRAP(0x42)
+tl0_irq3:	BTRAP(0x43)
+tl0_irq4:	BTRAP(0x44)
+#endif
+tl0_irq5:	TRAP_IRQ(handler_irq, 5)
+#ifdef CONFIG_SMP
+tl0_irq6:	TRAP_IRQ(smp_call_function_single_client, 6)
+#else
+tl0_irq6:	BTRAP(0x46)
+#endif
+tl0_irq7:	TRAP_IRQ(deferred_pcr_work_irq, 7)
+#if defined(CONFIG_KGDB) && defined(CONFIG_SMP)
+tl0_irq8:	TRAP_IRQ(smp_kgdb_capture_client, 8)
+#else
+tl0_irq8:	BTRAP(0x48)
+#endif
+tl0_irq9:	BTRAP(0x49)
+tl0_irq10:	BTRAP(0x4a) BTRAP(0x4b) BTRAP(0x4c) BTRAP(0x4d)
+tl0_irq14:	TRAP_IRQ(timer_interrupt, 14)
+tl0_irq15:	TRAP_NMI_IRQ(perfctr_irq, 15)
+tl0_resv050:	BTRAP(0x50) BTRAP(0x51) BTRAP(0x52) BTRAP(0x53) BTRAP(0x54) BTRAP(0x55)
+tl0_resv056:	BTRAP(0x56) BTRAP(0x57) BTRAP(0x58) BTRAP(0x59) BTRAP(0x5a) BTRAP(0x5b)
+tl0_resv05c:	BTRAP(0x5c) BTRAP(0x5d) BTRAP(0x5e) BTRAP(0x5f)
+tl0_ivec:	TRAP_IVEC
+tl0_paw:	TRAP(do_paw)
+tl0_vaw:	TRAP(do_vaw)
+tl0_cee:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_cee_trap)
+tl0_iamiss:
+#include	"itlb_miss.S"
+tl0_damiss:
+#include	"dtlb_miss.S"
+tl0_daprot:
+#include	"dtlb_prot.S"
+tl0_fecc:	BTRAP(0x70)	/* Fast-ECC on Cheetah */
+tl0_dcpe:	BTRAP(0x71)	/* D-cache Parity Error on Cheetah+ */
+tl0_icpe:	BTRAP(0x72)	/* I-cache Parity Error on Cheetah+ */
+tl0_resv073:	BTRAP(0x73) BTRAP(0x74) BTRAP(0x75)
+tl0_resv076:	BTRAP(0x76) BTRAP(0x77) BTRAP(0x78) BTRAP(0x79) BTRAP(0x7a) BTRAP(0x7b)
+tl0_cpu_mondo:	TRAP_NOSAVE(sun4v_cpu_mondo)
+tl0_dev_mondo:	TRAP_NOSAVE(sun4v_dev_mondo)
+tl0_res_mondo:	TRAP_NOSAVE(sun4v_res_mondo)
+tl0_nres_mondo:	TRAP_NOSAVE(sun4v_nonres_mondo)
+tl0_s0n:	SPILL_0_NORMAL
+tl0_s1n:	SPILL_1_NORMAL
+tl0_s2n:	SPILL_2_NORMAL
+tl0_s3n:	SPILL_0_NORMAL_ETRAP
+tl0_s4n:	SPILL_1_GENERIC_ETRAP
+tl0_s5n:	SPILL_1_GENERIC_ETRAP_FIXUP
+tl0_s6n:	SPILL_2_GENERIC_ETRAP
+tl0_s7n:	SPILL_2_GENERIC_ETRAP_FIXUP
+tl0_s0o:	SPILL_0_OTHER
+tl0_s1o:	SPILL_1_OTHER
+tl0_s2o:	SPILL_2_OTHER
+tl0_s3o:	SPILL_3_OTHER
+tl0_s4o:	SPILL_4_OTHER
+tl0_s5o:	SPILL_5_OTHER
+tl0_s6o:	SPILL_6_OTHER
+tl0_s7o:	SPILL_7_OTHER
+tl0_f0n:	FILL_0_NORMAL
+tl0_f1n:	FILL_1_NORMAL
+tl0_f2n:	FILL_2_NORMAL
+tl0_f3n:	FILL_3_NORMAL
+tl0_f4n:	FILL_4_NORMAL
+tl0_f5n:	FILL_0_NORMAL_RTRAP
+tl0_f6n:	FILL_1_GENERIC_RTRAP
+tl0_f7n:	FILL_2_GENERIC_RTRAP
+tl0_f0o:	FILL_0_OTHER
+tl0_f1o:	FILL_1_OTHER
+tl0_f2o:	FILL_2_OTHER
+tl0_f3o:	FILL_3_OTHER
+tl0_f4o:	FILL_4_OTHER
+tl0_f5o:	FILL_5_OTHER
+tl0_f6o:	FILL_6_OTHER
+tl0_f7o:	FILL_7_OTHER
+tl0_resv100:	BTRAP(0x100)
+tl0_bkpt:	BREAKPOINT_TRAP
+tl0_divz:	TRAP(do_div0)
+tl0_flushw:	FLUSH_WINDOW_TRAP
+tl0_resv104:	BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) BTRAP(0x108)
+tl0_resv109:	BTRAP(0x109) BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d)
+tl0_resv10e:	BTRAP(0x10e) BTRAP(0x10f)
+tl0_linux32:	LINUX_32BIT_SYSCALL_TRAP
+tl0_oldlinux64:	LINUX_64BIT_SYSCALL_TRAP
+tl0_resv112:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_18,0x112) TRAP_UTRAP(UT_TRAP_INSTRUCTION_19,0x113)
+tl0_resv114:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_20,0x114) TRAP_UTRAP(UT_TRAP_INSTRUCTION_21,0x115)
+tl0_resv116:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_22,0x116) TRAP_UTRAP(UT_TRAP_INSTRUCTION_23,0x117)
+tl0_resv118:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_24,0x118) TRAP_UTRAP(UT_TRAP_INSTRUCTION_25,0x119)
+tl0_resv11a:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_26,0x11a) TRAP_UTRAP(UT_TRAP_INSTRUCTION_27,0x11b)
+tl0_resv11c:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_28,0x11c) TRAP_UTRAP(UT_TRAP_INSTRUCTION_29,0x11d)
+tl0_resv11e:	TRAP_UTRAP(UT_TRAP_INSTRUCTION_30,0x11e) TRAP_UTRAP(UT_TRAP_INSTRUCTION_31,0x11f)
+tl0_getcc:	GETCC_TRAP
+tl0_setcc:	SETCC_TRAP
+tl0_getpsr:	TRAP(do_getpsr)
+tl0_resv123:	BTRAP(0x123) BTRAP(0x124) BTRAP(0x125) BTRAP(0x126) BTRAP(0x127)
+tl0_resv128:	BTRAP(0x128) BTRAP(0x129) BTRAP(0x12a) BTRAP(0x12b) BTRAP(0x12c)
+tl0_resv12d:	BTRAP(0x12d) BTRAP(0x12e) BTRAP(0x12f) BTRAP(0x130) BTRAP(0x131)
+tl0_resv132:	BTRAP(0x132) BTRAP(0x133) BTRAP(0x134) BTRAP(0x135) BTRAP(0x136)
+tl0_resv137:	BTRAP(0x137) BTRAP(0x138) BTRAP(0x139) BTRAP(0x13a) BTRAP(0x13b)
+tl0_resv13c:	BTRAP(0x13c) BTRAP(0x13d) BTRAP(0x13e) BTRAP(0x13f) BTRAP(0x140)
+tl0_resv141:	BTRAP(0x141) BTRAP(0x142) BTRAP(0x143) BTRAP(0x144) BTRAP(0x145)
+tl0_resv146:	BTRAP(0x146) BTRAP(0x147) BTRAP(0x148) BTRAP(0x149) BTRAP(0x14a)
+tl0_resv14b:	BTRAP(0x14b) BTRAP(0x14c) BTRAP(0x14d) BTRAP(0x14e) BTRAP(0x14f)
+tl0_resv150:	BTRAP(0x150) BTRAP(0x151) BTRAP(0x152) BTRAP(0x153) BTRAP(0x154)
+tl0_resv155:	BTRAP(0x155) BTRAP(0x156) BTRAP(0x157) BTRAP(0x158) BTRAP(0x159)
+tl0_resv15a:	BTRAP(0x15a) BTRAP(0x15b) BTRAP(0x15c) BTRAP(0x15d) BTRAP(0x15e)
+tl0_resv15f:	BTRAP(0x15f) BTRAP(0x160) BTRAP(0x161) BTRAP(0x162) BTRAP(0x163)
+tl0_resv164:	BTRAP(0x164) BTRAP(0x165) BTRAP(0x166) BTRAP(0x167) BTRAP(0x168)
+tl0_resv169:	BTRAP(0x169) BTRAP(0x16a) BTRAP(0x16b) BTRAP(0x16c)
+tl0_linux64:	LINUX_64BIT_SYSCALL_TRAP
+tl0_gsctx:	TRAP(sparc64_get_context) TRAP(sparc64_set_context)
+tl0_resv170:	KPROBES_TRAP(0x170) KPROBES_TRAP(0x171) KGDB_TRAP(0x172)
+tl0_resv173:	UPROBES_TRAP(0x173) UPROBES_TRAP(0x174) BTRAP(0x175) BTRAP(0x176) BTRAP(0x177)
+tl0_resv178:	BTRAP(0x178) BTRAP(0x179) BTRAP(0x17a) BTRAP(0x17b) BTRAP(0x17c)
+tl0_resv17d:	BTRAP(0x17d) BTRAP(0x17e) BTRAP(0x17f)
+#define BTRAPS(x) BTRAP(x) BTRAP(x+1) BTRAP(x+2) BTRAP(x+3) BTRAP(x+4) BTRAP(x+5) BTRAP(x+6) BTRAP(x+7)
+tl0_resv180:	BTRAPS(0x180) BTRAPS(0x188)
+tl0_resv190:	BTRAPS(0x190) BTRAPS(0x198)
+tl0_resv1a0:	BTRAPS(0x1a0) BTRAPS(0x1a8)
+tl0_resv1b0:	BTRAPS(0x1b0) BTRAPS(0x1b8)
+tl0_resv1c0:	BTRAPS(0x1c0) BTRAPS(0x1c8)
+tl0_resv1d0:	BTRAPS(0x1d0) BTRAPS(0x1d8)
+tl0_resv1e0:	BTRAPS(0x1e0) BTRAPS(0x1e8)
+tl0_resv1f0:	BTRAPS(0x1f0) BTRAPS(0x1f8)
+
+sparc64_ttable_tl1:
+tl1_resv000:	BOOT_KERNEL    BTRAPTL1(0x1) BTRAPTL1(0x2) BTRAPTL1(0x3)
+tl1_resv004:	BTRAPTL1(0x4)  BTRAPTL1(0x5) BTRAPTL1(0x6) BTRAPTL1(0x7)
+tl1_iax:	TRAP_NOSAVE(__spitfire_insn_access_exception_tl1)
+tl1_itsb_4v:	SUN4V_ITSB_MISS
+tl1_iae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
+tl1_resv00b:	BTRAPTL1(0xb) BTRAPTL1(0xc) BTRAPTL1(0xd) BTRAPTL1(0xe) BTRAPTL1(0xf)
+tl1_ill:	TRAPTL1(do_ill_tl1)
+tl1_privop:	BTRAPTL1(0x11)
+tl1_resv012:	BTRAPTL1(0x12) BTRAPTL1(0x13) BTRAPTL1(0x14) BTRAPTL1(0x15)
+tl1_resv016:	BTRAPTL1(0x16) BTRAPTL1(0x17) BTRAPTL1(0x18) BTRAPTL1(0x19)
+tl1_resv01a:	BTRAPTL1(0x1a) BTRAPTL1(0x1b) BTRAPTL1(0x1c) BTRAPTL1(0x1d)
+tl1_resv01e:	BTRAPTL1(0x1e) BTRAPTL1(0x1f)
+tl1_fpdis:	TRAP_NOSAVE(do_fpdis)
+tl1_fpieee:	TRAPTL1(do_fpieee_tl1)
+tl1_fpother:	TRAPTL1(do_fpother_tl1)
+tl1_tof:	TRAPTL1(do_tof_tl1)
+tl1_cwin:	CLEAN_WINDOW
+tl1_div0:	TRAPTL1(do_div0_tl1)
+tl1_resv029:	BTRAPTL1(0x29) BTRAPTL1(0x2a) BTRAPTL1(0x2b) BTRAPTL1(0x2c)
+tl1_resv02d:	BTRAPTL1(0x2d) BTRAPTL1(0x2e) BTRAPTL1(0x2f)
+tl1_dax:	TRAP_NOSAVE(__spitfire_data_access_exception_tl1)
+tl1_dtsb_4v:	SUN4V_DTSB_MISS
+tl1_dae:	membar #Sync
+		TRAP_NOSAVE_7INSNS(__spitfire_access_error)
+tl1_resv033:	BTRAPTL1(0x33)
+tl1_mna:	TRAP_NOSAVE(do_mna)
+tl1_lddfmna:	TRAPTL1(do_lddfmna_tl1)
+tl1_stdfmna:	TRAPTL1(do_stdfmna_tl1)
+tl1_privact:	BTRAPTL1(0x37)
+tl1_resv038:	BTRAPTL1(0x38) BTRAPTL1(0x39) BTRAPTL1(0x3a) BTRAPTL1(0x3b)
+tl1_resv03c:	BTRAPTL1(0x3c) BTRAPTL1(0x3d) BTRAPTL1(0x3e) BTRAPTL1(0x3f)
+tl1_resv040:	BTRAPTL1(0x40)
+tl1_irq1:	TRAP_IRQ(do_irq_tl1, 1)  TRAP_IRQ(do_irq_tl1, 2)  TRAP_IRQ(do_irq_tl1, 3)
+tl1_irq4:	TRAP_IRQ(do_irq_tl1, 4)  TRAP_IRQ(do_irq_tl1, 5)  TRAP_IRQ(do_irq_tl1, 6)
+tl1_irq7:	TRAP_IRQ(do_irq_tl1, 7)  TRAP_IRQ(do_irq_tl1, 8)  TRAP_IRQ(do_irq_tl1, 9)
+tl1_irq10:	TRAP_IRQ(do_irq_tl1, 10) TRAP_IRQ(do_irq_tl1, 11)
+tl1_irq12:	TRAP_IRQ(do_irq_tl1, 12) TRAP_IRQ(do_irq_tl1, 13)
+tl1_irq14:	TRAP_IRQ(do_irq_tl1, 14) TRAP_IRQ(do_irq_tl1, 15)
+tl1_resv050:	BTRAPTL1(0x50) BTRAPTL1(0x51) BTRAPTL1(0x52) BTRAPTL1(0x53)
+tl1_resv054:	BTRAPTL1(0x54) BTRAPTL1(0x55) BTRAPTL1(0x56) BTRAPTL1(0x57)
+tl1_resv058:	BTRAPTL1(0x58) BTRAPTL1(0x59) BTRAPTL1(0x5a) BTRAPTL1(0x5b)
+tl1_resv05c:	BTRAPTL1(0x5c) BTRAPTL1(0x5d) BTRAPTL1(0x5e) BTRAPTL1(0x5f)
+tl1_ivec:	TRAP_IVEC
+tl1_paw:	TRAPTL1(do_paw_tl1)
+tl1_vaw:	TRAPTL1(do_vaw_tl1)
+tl1_cee:	BTRAPTL1(0x63)
+tl1_iamiss:	BTRAPTL1(0x64) BTRAPTL1(0x65) BTRAPTL1(0x66) BTRAPTL1(0x67)
+tl1_damiss:
+#include	"dtlb_miss.S"
+tl1_daprot:
+#include	"dtlb_prot.S"
+tl1_fecc:	BTRAPTL1(0x70)	/* Fast-ECC on Cheetah */
+tl1_dcpe:	BTRAPTL1(0x71)	/* D-cache Parity Error on Cheetah+ */
+tl1_icpe:	BTRAPTL1(0x72)	/* I-cache Parity Error on Cheetah+ */
+tl1_resv073:	BTRAPTL1(0x73)
+tl1_resv074:	BTRAPTL1(0x74) BTRAPTL1(0x75) BTRAPTL1(0x76) BTRAPTL1(0x77)
+tl1_resv078:	BTRAPTL1(0x78) BTRAPTL1(0x79) BTRAPTL1(0x7a) BTRAPTL1(0x7b)
+tl1_resv07c:	BTRAPTL1(0x7c) BTRAPTL1(0x7d) BTRAPTL1(0x7e) BTRAPTL1(0x7f)
+tl1_s0n:	SPILL_0_NORMAL
+tl1_s1n:	SPILL_1_NORMAL
+tl1_s2n:	SPILL_2_NORMAL
+tl1_s3n:	SPILL_3_NORMAL
+tl1_s4n:	SPILL_4_NORMAL
+tl1_s5n:	SPILL_5_NORMAL
+tl1_s6n:	SPILL_6_NORMAL
+tl1_s7n:	SPILL_7_NORMAL
+tl1_s0o:	SPILL_0_OTHER
+tl1_s1o:	SPILL_1_OTHER
+tl1_s2o:	SPILL_2_OTHER
+tl1_s3o:	SPILL_3_OTHER
+tl1_s4o:	SPILL_4_OTHER
+tl1_s5o:	SPILL_5_OTHER
+tl1_s6o:	SPILL_6_OTHER
+tl1_s7o:	SPILL_7_OTHER
+tl1_f0n:	FILL_0_NORMAL
+tl1_f1n:	FILL_1_NORMAL
+tl1_f2n:	FILL_2_NORMAL
+tl1_f3n:	FILL_3_NORMAL
+tl1_f4n:	FILL_4_NORMAL
+tl1_f5n:	FILL_5_NORMAL
+tl1_f6n:	FILL_6_NORMAL
+tl1_f7n:	FILL_7_NORMAL
+tl1_f0o:	FILL_0_OTHER
+tl1_f1o:	FILL_1_OTHER
+tl1_f2o:	FILL_2_OTHER
+tl1_f3o:	FILL_3_OTHER
+tl1_f4o:	FILL_4_OTHER
+tl1_f5o:	FILL_5_OTHER
+tl1_f6o:	FILL_6_OTHER
+tl1_f7o:	FILL_7_OTHER
diff --git a/arch/sparc/kernel/una_asm_32.S b/arch/sparc/kernel/una_asm_32.S
new file mode 100644
index 0000000..f8bf839
--- /dev/null
+++ b/arch/sparc/kernel/una_asm_32.S
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* una_asm.S: Kernel unaligned trap assembler helpers.
+ *
+ * Copyright (C) 1996,2005,2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/errno.h>
+
+	.text
+
+retl_efault:
+	retl
+	 mov	-EFAULT, %o0
+
+	/* int __do_int_store(unsigned long *dst_addr, int size,
+	 *                    unsigned long *src_val)
+	 *
+	 * %o0 = dest_addr
+	 * %o1 = size
+	 * %o2 = src_val
+	 *
+	 * Return '0' on success, -EFAULT on failure.
+	 */
+	.globl	__do_int_store
+__do_int_store:
+	ld	[%o2], %g1
+	cmp	%o1, 2
+	be	2f
+	 cmp	%o1, 4
+	be	1f
+	 srl	%g1, 24, %g2
+	srl	%g1, 16, %g7
+4:	stb	%g2, [%o0]
+	srl	%g1, 8, %g2
+5:	stb	%g7, [%o0 + 1]
+	ld	[%o2 + 4], %g7
+6:	stb	%g2, [%o0 + 2]
+	srl	%g7, 24, %g2
+7:	stb	%g1, [%o0 + 3]
+	srl	%g7, 16, %g1
+8:	stb	%g2, [%o0 + 4]
+	srl	%g7, 8, %g2
+9:	stb	%g1, [%o0 + 5]
+10:	stb	%g2, [%o0 + 6]
+	b	0f
+11:	 stb	%g7, [%o0 + 7]
+1:	srl	%g1, 16, %g7
+12:	stb	%g2, [%o0]
+	srl	%g1, 8, %g2
+13:	stb	%g7, [%o0 + 1]
+14:	stb	%g2, [%o0 + 2]
+	b	0f
+15:	 stb	%g1, [%o0 + 3]
+2:	srl	%g1, 8, %g2
+16:	stb	%g2, [%o0]
+17:	stb	%g1, [%o0 + 1]
+0:	retl
+	 mov	0, %o0
+
+	.section __ex_table,#alloc
+	.word	4b, retl_efault
+	.word	5b, retl_efault
+	.word	6b, retl_efault
+	.word	7b, retl_efault
+	.word	8b, retl_efault
+	.word	9b, retl_efault
+	.word	10b, retl_efault
+	.word	11b, retl_efault
+	.word	12b, retl_efault
+	.word	13b, retl_efault
+	.word	14b, retl_efault
+	.word	15b, retl_efault
+	.word	16b, retl_efault
+	.word	17b, retl_efault
+	.previous
+
+	/* int do_int_load(unsigned long *dest_reg, int size,
+	 *                 unsigned long *saddr, int is_signed)
+	 *
+	 * %o0 = dest_reg
+	 * %o1 = size
+	 * %o2 = saddr
+	 * %o3 = is_signed
+	 *
+	 * Return '0' on success, -EFAULT on failure.
+	 */
+	.globl	do_int_load
+do_int_load:
+	cmp	%o1, 8
+	be	9f
+	 cmp	%o1, 4
+	be	6f
+4:	 ldub	[%o2], %g1
+5:	ldub	[%o2 + 1], %g2
+	sll	%g1, 8, %g1
+	tst	%o3
+	be	3f
+	 or	%g1, %g2, %g1
+	sll	%g1, 16, %g1
+	sra	%g1, 16, %g1
+3:	b	0f
+	 st	%g1, [%o0]
+6:	ldub	[%o2 + 1], %g2
+	sll	%g1, 24, %g1
+7:	ldub	[%o2 + 2], %g7
+	sll	%g2, 16, %g2
+8:	ldub	[%o2 + 3], %g3
+	sll	%g7, 8, %g7
+	or	%g3, %g2, %g3
+	or	%g7, %g3, %g7
+	or	%g1, %g7, %g1
+	b	0f
+	 st	%g1, [%o0]
+9:	ldub	[%o2], %g1
+10:	ldub	[%o2 + 1], %g2
+	sll	%g1, 24, %g1
+11:	ldub	[%o2 + 2], %g7
+	sll	%g2, 16, %g2
+12:	ldub	[%o2 + 3], %g3
+	sll	%g7, 8, %g7
+	or	%g1, %g2, %g1
+	or	%g7, %g3, %g7
+	or	%g1, %g7, %g7
+13:	ldub	[%o2 + 4], %g1
+	st	%g7, [%o0]
+14:	ldub	[%o2 + 5], %g2
+	sll	%g1, 24, %g1
+15:	ldub	[%o2 + 6], %g7
+	sll	%g2, 16, %g2
+16:	ldub	[%o2 + 7], %g3
+	sll	%g7, 8, %g7
+	or	%g1, %g2, %g1
+	or	%g7, %g3, %g7
+	or	%g1, %g7, %g7
+	st	%g7, [%o0 + 4]
+0:	retl
+	 mov	0, %o0
+
+	.section __ex_table,#alloc
+	.word	4b, retl_efault
+	.word	5b, retl_efault
+	.word	6b, retl_efault
+	.word	7b, retl_efault
+	.word	8b, retl_efault
+	.word	9b, retl_efault
+	.word	10b, retl_efault
+	.word	11b, retl_efault
+	.word	12b, retl_efault
+	.word	13b, retl_efault
+	.word	14b, retl_efault
+	.word	15b, retl_efault
+	.word	16b, retl_efault
+	.previous
diff --git a/arch/sparc/kernel/una_asm_64.S b/arch/sparc/kernel/una_asm_64.S
new file mode 100644
index 0000000..e256f39
--- /dev/null
+++ b/arch/sparc/kernel/una_asm_64.S
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* una_asm.S: Kernel unaligned trap assembler helpers.
+ *
+ * Copyright (C) 1996,2005 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+	.text
+
+	.globl	__do_int_store
+__do_int_store:
+	rd	%asi, %o4
+	wr	%o3, 0, %asi
+	mov	%o2, %g3
+	cmp	%o1, 2
+	be,pn	%icc, 2f
+	 cmp	%o1, 4
+	be,pt	%icc, 1f
+	 srlx	%g3, 24, %g2
+	srlx	%g3, 56, %g1
+	srlx	%g3, 48, %g7
+4:	stba	%g1, [%o0] %asi
+	srlx	%g3, 40, %g1
+5:	stba	%g7, [%o0 + 1] %asi
+	srlx	%g3, 32, %g7
+6:	stba	%g1, [%o0 + 2] %asi
+7:	stba	%g7, [%o0 + 3] %asi
+	srlx	%g3, 16, %g1
+8:	stba	%g2, [%o0 + 4] %asi
+	srlx	%g3, 8, %g7
+9:	stba	%g1, [%o0 + 5] %asi
+10:	stba	%g7, [%o0 + 6] %asi
+	ba,pt	%xcc, 0f
+11:	 stba	%g3, [%o0 + 7] %asi
+1:	srl	%g3, 16, %g7
+12:	stba	%g2, [%o0] %asi
+	srl	%g3, 8, %g2
+13:	stba	%g7, [%o0 + 1] %asi
+14:	stba	%g2, [%o0 + 2] %asi
+	ba,pt	%xcc, 0f
+15:	 stba	%g3, [%o0 + 3] %asi
+2:	srl	%g3, 8, %g2
+16:	stba	%g2, [%o0] %asi
+17:	stba	%g3, [%o0 + 1] %asi
+0:
+	wr	%o4, 0x0, %asi
+	retl
+	 mov	0, %o0
+	.size	__do_int_store, .-__do_int_store
+
+	.section	__ex_table,"a"
+	.word		4b, __retl_efault
+	.word		5b, __retl_efault
+	.word		6b, __retl_efault
+	.word		7b, __retl_efault
+	.word		8b, __retl_efault
+	.word		9b, __retl_efault
+	.word		10b, __retl_efault
+	.word		11b, __retl_efault
+	.word		12b, __retl_efault
+	.word		13b, __retl_efault
+	.word		14b, __retl_efault
+	.word		15b, __retl_efault
+	.word		16b, __retl_efault
+	.word		17b, __retl_efault
+	.previous
+
+	.globl	do_int_load
+do_int_load:
+	rd	%asi, %o5
+	wr	%o4, 0, %asi
+	cmp	%o1, 8
+	bge,pn	%icc, 9f
+	 cmp	%o1, 4
+	be,pt	%icc, 6f
+4:	 lduba	[%o2] %asi, %g2
+5:	lduba	[%o2 + 1] %asi, %g3
+	sll	%g2, 8, %g2
+	brz,pt	%o3, 3f
+	 add	%g2, %g3, %g2
+	sllx	%g2, 48, %g2
+	srax	%g2, 48, %g2
+3:	ba,pt	%xcc, 0f
+	 stx	%g2, [%o0]
+6:	lduba	[%o2 + 1] %asi, %g3
+	sll	%g2, 24, %g2
+7:	lduba	[%o2 + 2] %asi, %g7
+	sll	%g3, 16, %g3
+8:	lduba	[%o2 + 3] %asi, %g1
+	sll	%g7, 8, %g7
+	or	%g2, %g3, %g2
+	or	%g7, %g1, %g7
+	or	%g2, %g7, %g2
+	brnz,a,pt %o3, 3f
+	 sra	%g2, 0, %g2
+3:	ba,pt	%xcc, 0f
+	 stx	%g2, [%o0]
+9:	lduba	[%o2] %asi, %g2
+10:	lduba	[%o2 + 1] %asi, %g3
+	sllx	%g2, 56, %g2
+11:	lduba	[%o2 + 2] %asi, %g7
+	sllx	%g3, 48, %g3
+12:	lduba	[%o2 + 3] %asi, %g1
+	sllx	%g7, 40, %g7
+	sllx	%g1, 32, %g1
+	or	%g2, %g3, %g2
+	or	%g7, %g1, %g7
+13:	lduba	[%o2 + 4] %asi, %g3
+	or	%g2, %g7, %g7
+14:	lduba	[%o2 + 5] %asi, %g1
+	sllx	%g3, 24, %g3
+15:	lduba	[%o2 + 6] %asi, %g2
+	sllx	%g1, 16, %g1
+	or	%g7, %g3, %g7
+16:	lduba	[%o2 + 7] %asi, %g3
+	sllx	%g2, 8, %g2
+	or	%g7, %g1, %g7
+	or	%g2, %g3, %g2
+	or	%g7, %g2, %g7
+	cmp	%o1, 8
+	be,a,pt %icc, 0f
+	 stx	%g7, [%o0]
+	srlx	%g7, 32, %g2
+	sra	%g7, 0, %g7
+	stx	%g2, [%o0]
+	stx	%g7, [%o0 + 8]
+0:
+	wr	%o5, 0x0, %asi
+	retl
+	 mov	0, %o0
+	.size	do_int_load, .-do_int_load
+
+	.section	__ex_table,"a"
+	.word		4b, __retl_efault
+	.word		5b, __retl_efault
+	.word		6b, __retl_efault
+	.word		7b, __retl_efault
+	.word		8b, __retl_efault
+	.word		9b, __retl_efault
+	.word		10b, __retl_efault
+	.word		11b, __retl_efault
+	.word		12b, __retl_efault
+	.word		13b, __retl_efault
+	.word		14b, __retl_efault
+	.word		15b, __retl_efault
+	.word		16b, __retl_efault
+	.previous
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
new file mode 100644
index 0000000..64ac8c0
--- /dev/null
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * unaligned.c: Unaligned load/store trap handling with special
+ *              cases for the kernel to do them more quickly.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched/signal.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+#include <linux/smp.h>
+#include <linux/perf_event.h>
+
+#include <asm/setup.h>
+
+#include "kernel.h"
+
+enum direction {
+	load,    /* ld, ldd, ldh, ldsh */
+	store,   /* st, std, sth, stsh */
+	both,    /* Swap, ldstub, etc. */
+	fpload,
+	fpstore,
+	invalid,
+};
+
+static inline enum direction decode_direction(unsigned int insn)
+{
+	unsigned long tmp = (insn >> 21) & 1;
+
+	if(!tmp)
+		return load;
+	else {
+		if(((insn>>19)&0x3f) == 15)
+			return both;
+		else
+			return store;
+	}
+}
+
+/* 8 = double-word, 4 = word, 2 = half-word */
+static inline int decode_access_size(unsigned int insn)
+{
+	insn = (insn >> 19) & 3;
+
+	if(!insn)
+		return 4;
+	else if(insn == 3)
+		return 8;
+	else if(insn == 2)
+		return 2;
+	else {
+		printk("Impossible unaligned trap. insn=%08x\n", insn);
+		die_if_kernel("Byte sized unaligned access?!?!", current->thread.kregs);
+		return 4; /* just to keep gcc happy. */
+	}
+}
+
+/* 0x400000 = signed, 0 = unsigned */
+static inline int decode_signedness(unsigned int insn)
+{
+	return (insn & 0x400000);
+}
+
+static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
+				       unsigned int rd)
+{
+	if(rs2 >= 16 || rs1 >= 16 || rd >= 16) {
+		/* Wheee... */
+		__asm__ __volatile__("save %sp, -0x40, %sp\n\t"
+				     "save %sp, -0x40, %sp\n\t"
+				     "save %sp, -0x40, %sp\n\t"
+				     "save %sp, -0x40, %sp\n\t"
+				     "save %sp, -0x40, %sp\n\t"
+				     "save %sp, -0x40, %sp\n\t"
+				     "save %sp, -0x40, %sp\n\t"
+				     "restore; restore; restore; restore;\n\t"
+				     "restore; restore; restore;\n\t");
+	}
+}
+
+static inline int sign_extend_imm13(int imm)
+{
+	return imm << 19 >> 19;
+}
+
+static inline unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
+{
+	struct reg_window32 *win;
+
+	if(reg < 16)
+		return (!reg ? 0 : regs->u_regs[reg]);
+
+	/* Ho hum, the slightly complicated case. */
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
+	return win->locals[reg - 16]; /* yes, I know what this does... */
+}
+
+static inline unsigned long safe_fetch_reg(unsigned int reg, struct pt_regs *regs)
+{
+	struct reg_window32 __user *win;
+	unsigned long ret;
+
+	if (reg < 16)
+		return (!reg ? 0 : regs->u_regs[reg]);
+
+	/* Ho hum, the slightly complicated case. */
+	win = (struct reg_window32 __user *) regs->u_regs[UREG_FP];
+
+	if ((unsigned long)win & 3)
+		return -1;
+
+	if (get_user(ret, &win->locals[reg - 16]))
+		return -1;
+
+	return ret;
+}
+
+static inline unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
+{
+	struct reg_window32 *win;
+
+	if(reg < 16)
+		return &regs->u_regs[reg];
+	win = (struct reg_window32 *) regs->u_regs[UREG_FP];
+	return &win->locals[reg - 16];
+}
+
+static unsigned long compute_effective_address(struct pt_regs *regs,
+					       unsigned int insn)
+{
+	unsigned int rs1 = (insn >> 14) & 0x1f;
+	unsigned int rs2 = insn & 0x1f;
+	unsigned int rd = (insn >> 25) & 0x1f;
+
+	if(insn & 0x2000) {
+		maybe_flush_windows(rs1, 0, rd);
+		return (fetch_reg(rs1, regs) + sign_extend_imm13(insn));
+	} else {
+		maybe_flush_windows(rs1, rs2, rd);
+		return (fetch_reg(rs1, regs) + fetch_reg(rs2, regs));
+	}
+}
+
+unsigned long safe_compute_effective_address(struct pt_regs *regs,
+					     unsigned int insn)
+{
+	unsigned int rs1 = (insn >> 14) & 0x1f;
+	unsigned int rs2 = insn & 0x1f;
+	unsigned int rd = (insn >> 25) & 0x1f;
+
+	if(insn & 0x2000) {
+		maybe_flush_windows(rs1, 0, rd);
+		return (safe_fetch_reg(rs1, regs) + sign_extend_imm13(insn));
+	} else {
+		maybe_flush_windows(rs1, rs2, rd);
+		return (safe_fetch_reg(rs1, regs) + safe_fetch_reg(rs2, regs));
+	}
+}
+
+/* This is just to make gcc think panic does return... */
+static void unaligned_panic(char *str)
+{
+	panic("%s", str);
+}
+
+/* una_asm.S */
+extern int do_int_load(unsigned long *dest_reg, int size,
+		       unsigned long *saddr, int is_signed);
+extern int __do_int_store(unsigned long *dst_addr, int size,
+			  unsigned long *src_val);
+
+static int do_int_store(int reg_num, int size, unsigned long *dst_addr,
+			struct pt_regs *regs)
+{
+	unsigned long zero[2] = { 0, 0 };
+	unsigned long *src_val;
+
+	if (reg_num)
+		src_val = fetch_reg_addr(reg_num, regs);
+	else {
+		src_val = &zero[0];
+		if (size == 8)
+			zero[1] = fetch_reg(1, regs);
+	}
+	return __do_int_store(dst_addr, size, src_val);
+}
+
+extern void smp_capture(void);
+extern void smp_release(void);
+
+static inline void advance(struct pt_regs *regs)
+{
+	regs->pc   = regs->npc;
+	regs->npc += 4;
+}
+
+static inline int floating_point_load_or_store_p(unsigned int insn)
+{
+	return (insn >> 24) & 1;
+}
+
+static inline int ok_for_kernel(unsigned int insn)
+{
+	return !floating_point_load_or_store_p(insn);
+}
+
+static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
+{
+	unsigned long g2 = regs->u_regs [UREG_G2];
+	unsigned long fixup = search_extables_range(regs->pc, &g2);
+
+	if (!fixup) {
+		unsigned long address = compute_effective_address(regs, insn);
+        	if(address < PAGE_SIZE) {
+                	printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference in mna handler");
+        	} else
+                	printk(KERN_ALERT "Unable to handle kernel paging request in mna handler");
+	        printk(KERN_ALERT " at virtual address %08lx\n",address);
+		printk(KERN_ALERT "current->{mm,active_mm}->context = %08lx\n",
+			(current->mm ? current->mm->context :
+			current->active_mm->context));
+		printk(KERN_ALERT "current->{mm,active_mm}->pgd = %08lx\n",
+			(current->mm ? (unsigned long) current->mm->pgd :
+			(unsigned long) current->active_mm->pgd));
+	        die_if_kernel("Oops", regs);
+		/* Not reached */
+	}
+	regs->pc = fixup;
+	regs->npc = regs->pc + 4;
+	regs->u_regs [UREG_G2] = g2;
+}
+
+asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
+{
+	enum direction dir = decode_direction(insn);
+	int size = decode_access_size(insn);
+
+	if(!ok_for_kernel(insn) || dir == both) {
+		printk("Unsupported unaligned load/store trap for kernel at <%08lx>.\n",
+		       regs->pc);
+		unaligned_panic("Wheee. Kernel does fpu/atomic unaligned load/store.");
+	} else {
+		unsigned long addr = compute_effective_address(regs, insn);
+		int err;
+
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+		switch (dir) {
+		case load:
+			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
+							 regs),
+					  size, (unsigned long *) addr,
+					  decode_signedness(insn));
+			break;
+
+		case store:
+			err = do_int_store(((insn>>25)&0x1f), size,
+					   (unsigned long *) addr, regs);
+			break;
+		default:
+			panic("Impossible kernel unaligned trap.");
+			/* Not reached... */
+		}
+		if (err)
+			kernel_mna_trap_fault(regs, insn);
+		else
+			advance(regs);
+	}
+}
+
+static inline int ok_for_user(struct pt_regs *regs, unsigned int insn,
+			      enum direction dir)
+{
+	unsigned int reg;
+	int check = (dir == load) ? VERIFY_READ : VERIFY_WRITE;
+	int size = ((insn >> 19) & 3) == 3 ? 8 : 4;
+
+	if ((regs->pc | regs->npc) & 3)
+		return 0;
+
+	/* Must access_ok() in all the necessary places. */
+#define WINREG_ADDR(regnum) \
+	((void __user *)(((unsigned long *)regs->u_regs[UREG_FP])+(regnum)))
+
+	reg = (insn >> 25) & 0x1f;
+	if (reg >= 16) {
+		if (!access_ok(check, WINREG_ADDR(reg - 16), size))
+			return -EFAULT;
+	}
+	reg = (insn >> 14) & 0x1f;
+	if (reg >= 16) {
+		if (!access_ok(check, WINREG_ADDR(reg - 16), size))
+			return -EFAULT;
+	}
+	if (!(insn & 0x2000)) {
+		reg = (insn & 0x1f);
+		if (reg >= 16) {
+			if (!access_ok(check, WINREG_ADDR(reg - 16), size))
+				return -EFAULT;
+		}
+	}
+#undef WINREG_ADDR
+	return 0;
+}
+
+static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
+{
+	send_sig_fault(SIGBUS, BUS_ADRALN,
+		       (void __user *)safe_compute_effective_address(regs, insn),
+		       0, current);
+}
+
+asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
+{
+	enum direction dir;
+
+	if(!(current->thread.flags & SPARC_FLAG_UNALIGNED) ||
+	   (((insn >> 30) & 3) != 3))
+		goto kill_user;
+	dir = decode_direction(insn);
+	if(!ok_for_user(regs, insn, dir)) {
+		goto kill_user;
+	} else {
+		int err, size = decode_access_size(insn);
+		unsigned long addr;
+
+		if(floating_point_load_or_store_p(insn)) {
+			printk("User FPU load/store unaligned unsupported.\n");
+			goto kill_user;
+		}
+
+		addr = compute_effective_address(regs, insn);
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+		switch(dir) {
+		case load:
+			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
+							 regs),
+					  size, (unsigned long *) addr,
+					  decode_signedness(insn));
+			break;
+
+		case store:
+			err = do_int_store(((insn>>25)&0x1f), size,
+					   (unsigned long *) addr, regs);
+			break;
+
+		case both:
+			/*
+			 * This was supported in 2.4. However, we question
+			 * the value of SWAP instruction across word boundaries.
+			 */
+			printk("Unaligned SWAP unsupported.\n");
+			err = -EFAULT;
+			break;
+
+		default:
+			unaligned_panic("Impossible user unaligned trap.");
+			goto out;
+		}
+		if (err)
+			goto kill_user;
+		else
+			advance(regs);
+		goto out;
+	}
+
+kill_user:
+	user_mna_trap_fault(regs, insn);
+out:
+	;
+}
diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c
new file mode 100644
index 0000000..23db2ef
--- /dev/null
+++ b/arch/sparc/kernel/unaligned_64.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * unaligned.c: Unaligned load/store trap handling with special
+ *              cases for the kernel to do them more quickly.
+ *
+ * Copyright (C) 1996,2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/extable.h>
+#include <asm/asi.h>
+#include <asm/ptrace.h>
+#include <asm/pstate.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+#include <linux/smp.h>
+#include <linux/bitops.h>
+#include <linux/perf_event.h>
+#include <linux/ratelimit.h>
+#include <linux/context_tracking.h>
+#include <asm/fpumacro.h>
+#include <asm/cacheflush.h>
+#include <asm/setup.h>
+
+#include "entry.h"
+#include "kernel.h"
+
+enum direction {
+	load,    /* ld, ldd, ldh, ldsh */
+	store,   /* st, std, sth, stsh */
+	both,    /* Swap, ldstub, cas, ... */
+	fpld,
+	fpst,
+	invalid,
+};
+
+static inline enum direction decode_direction(unsigned int insn)
+{
+	unsigned long tmp = (insn >> 21) & 1;
+
+	if (!tmp)
+		return load;
+	else {
+		switch ((insn>>19)&0xf) {
+		case 15: /* swap* */
+			return both;
+		default:
+			return store;
+		}
+	}
+}
+
+/* 16 = double-word, 8 = extra-word, 4 = word, 2 = half-word */
+static inline int decode_access_size(struct pt_regs *regs, unsigned int insn)
+{
+	unsigned int tmp;
+
+	tmp = ((insn >> 19) & 0xf);
+	if (tmp == 11 || tmp == 14) /* ldx/stx */
+		return 8;
+	tmp &= 3;
+	if (!tmp)
+		return 4;
+	else if (tmp == 3)
+		return 16;	/* ldd/std - Although it is actually 8 */
+	else if (tmp == 2)
+		return 2;
+	else {
+		printk("Impossible unaligned trap. insn=%08x\n", insn);
+		die_if_kernel("Byte sized unaligned access?!?!", regs);
+
+		/* GCC should never warn that control reaches the end
+		 * of this function without returning a value because
+		 * die_if_kernel() is marked with attribute 'noreturn'.
+		 * Alas, some versions do...
+		 */
+
+		return 0;
+	}
+}
+
+static inline int decode_asi(unsigned int insn, struct pt_regs *regs)
+{
+	if (insn & 0x800000) {
+		if (insn & 0x2000)
+			return (unsigned char)(regs->tstate >> 24);	/* %asi */
+		else
+			return (unsigned char)(insn >> 5);		/* imm_asi */
+	} else
+		return ASI_P;
+}
+
+/* 0x400000 = signed, 0 = unsigned */
+static inline int decode_signedness(unsigned int insn)
+{
+	return (insn & 0x400000);
+}
+
+static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
+				       unsigned int rd, int from_kernel)
+{
+	if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
+		if (from_kernel != 0)
+			__asm__ __volatile__("flushw");
+		else
+			flushw_user();
+	}
+}
+
+static inline long sign_extend_imm13(long imm)
+{
+	return imm << 51 >> 51;
+}
+
+static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
+{
+	unsigned long value, fp;
+	
+	if (reg < 16)
+		return (!reg ? 0 : regs->u_regs[reg]);
+
+	fp = regs->u_regs[UREG_FP];
+
+	if (regs->tstate & TSTATE_PRIV) {
+		struct reg_window *win;
+		win = (struct reg_window *)(fp + STACK_BIAS);
+		value = win->locals[reg - 16];
+	} else if (!test_thread_64bit_stack(fp)) {
+		struct reg_window32 __user *win32;
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
+		get_user(value, &win32->locals[reg - 16]);
+	} else {
+		struct reg_window __user *win;
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
+		get_user(value, &win->locals[reg - 16]);
+	}
+	return value;
+}
+
+static unsigned long *fetch_reg_addr(unsigned int reg, struct pt_regs *regs)
+{
+	unsigned long fp;
+
+	if (reg < 16)
+		return &regs->u_regs[reg];
+
+	fp = regs->u_regs[UREG_FP];
+
+	if (regs->tstate & TSTATE_PRIV) {
+		struct reg_window *win;
+		win = (struct reg_window *)(fp + STACK_BIAS);
+		return &win->locals[reg - 16];
+	} else if (!test_thread_64bit_stack(fp)) {
+		struct reg_window32 *win32;
+		win32 = (struct reg_window32 *)((unsigned long)((u32)fp));
+		return (unsigned long *)&win32->locals[reg - 16];
+	} else {
+		struct reg_window *win;
+		win = (struct reg_window *)(fp + STACK_BIAS);
+		return &win->locals[reg - 16];
+	}
+}
+
+unsigned long compute_effective_address(struct pt_regs *regs,
+					unsigned int insn, unsigned int rd)
+{
+	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
+	unsigned int rs1 = (insn >> 14) & 0x1f;
+	unsigned int rs2 = insn & 0x1f;
+	unsigned long addr;
+
+	if (insn & 0x2000) {
+		maybe_flush_windows(rs1, 0, rd, from_kernel);
+		addr = (fetch_reg(rs1, regs) + sign_extend_imm13(insn));
+	} else {
+		maybe_flush_windows(rs1, rs2, rd, from_kernel);
+		addr = (fetch_reg(rs1, regs) + fetch_reg(rs2, regs));
+	}
+
+	if (!from_kernel && test_thread_flag(TIF_32BIT))
+		addr &= 0xffffffff;
+
+	return addr;
+}
+
+/* This is just to make gcc think die_if_kernel does return... */
+static void __used unaligned_panic(char *str, struct pt_regs *regs)
+{
+	die_if_kernel(str, regs);
+}
+
+extern int do_int_load(unsigned long *dest_reg, int size,
+		       unsigned long *saddr, int is_signed, int asi);
+	
+extern int __do_int_store(unsigned long *dst_addr, int size,
+			  unsigned long src_val, int asi);
+
+static inline int do_int_store(int reg_num, int size, unsigned long *dst_addr,
+			       struct pt_regs *regs, int asi, int orig_asi)
+{
+	unsigned long zero = 0;
+	unsigned long *src_val_p = &zero;
+	unsigned long src_val;
+
+	if (size == 16) {
+		size = 8;
+		zero = (((long)(reg_num ?
+		        (unsigned int)fetch_reg(reg_num, regs) : 0)) << 32) |
+			(unsigned int)fetch_reg(reg_num + 1, regs);
+	} else if (reg_num) {
+		src_val_p = fetch_reg_addr(reg_num, regs);
+	}
+	src_val = *src_val_p;
+	if (unlikely(asi != orig_asi)) {
+		switch (size) {
+		case 2:
+			src_val = swab16(src_val);
+			break;
+		case 4:
+			src_val = swab32(src_val);
+			break;
+		case 8:
+			src_val = swab64(src_val);
+			break;
+		case 16:
+		default:
+			BUG();
+			break;
+		}
+	}
+	return __do_int_store(dst_addr, size, src_val, asi);
+}
+
+static inline void advance(struct pt_regs *regs)
+{
+	regs->tpc   = regs->tnpc;
+	regs->tnpc += 4;
+	if (test_thread_flag(TIF_32BIT)) {
+		regs->tpc &= 0xffffffff;
+		regs->tnpc &= 0xffffffff;
+	}
+}
+
+static inline int floating_point_load_or_store_p(unsigned int insn)
+{
+	return (insn >> 24) & 1;
+}
+
+static inline int ok_for_kernel(unsigned int insn)
+{
+	return !floating_point_load_or_store_p(insn);
+}
+
+static void kernel_mna_trap_fault(int fixup_tstate_asi)
+{
+	struct pt_regs *regs = current_thread_info()->kern_una_regs;
+	unsigned int insn = current_thread_info()->kern_una_insn;
+	const struct exception_table_entry *entry;
+
+	entry = search_exception_tables(regs->tpc);
+	if (!entry) {
+		unsigned long address;
+
+		address = compute_effective_address(regs, insn,
+						    ((insn >> 25) & 0x1f));
+        	if (address < PAGE_SIZE) {
+                	printk(KERN_ALERT "Unable to handle kernel NULL "
+			       "pointer dereference in mna handler");
+        	} else
+                	printk(KERN_ALERT "Unable to handle kernel paging "
+			       "request in mna handler");
+	        printk(KERN_ALERT " at virtual address %016lx\n",address);
+		printk(KERN_ALERT "current->{active_,}mm->context = %016lx\n",
+			(current->mm ? CTX_HWBITS(current->mm->context) :
+			CTX_HWBITS(current->active_mm->context)));
+		printk(KERN_ALERT "current->{active_,}mm->pgd = %016lx\n",
+			(current->mm ? (unsigned long) current->mm->pgd :
+			(unsigned long) current->active_mm->pgd));
+	        die_if_kernel("Oops", regs);
+		/* Not reached */
+	}
+	regs->tpc = entry->fixup;
+	regs->tnpc = regs->tpc + 4;
+
+	if (fixup_tstate_asi) {
+		regs->tstate &= ~TSTATE_ASI;
+		regs->tstate |= (ASI_AIUS << 24UL);
+	}
+}
+
+static void log_unaligned(struct pt_regs *regs)
+{
+	static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
+
+	if (__ratelimit(&ratelimit)) {
+		printk("Kernel unaligned access at TPC[%lx] %pS\n",
+		       regs->tpc, (void *) regs->tpc);
+	}
+}
+
+asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
+{
+	enum direction dir = decode_direction(insn);
+	int size = decode_access_size(regs, insn);
+	int orig_asi, asi;
+
+	current_thread_info()->kern_una_regs = regs;
+	current_thread_info()->kern_una_insn = insn;
+
+	orig_asi = asi = decode_asi(insn, regs);
+
+	/* If this is a {get,put}_user() on an unaligned userspace pointer,
+	 * just signal a fault and do not log the event.
+	 */
+	if (asi == ASI_AIUS) {
+		kernel_mna_trap_fault(0);
+		return;
+	}
+
+	log_unaligned(regs);
+
+	if (!ok_for_kernel(insn) || dir == both) {
+		printk("Unsupported unaligned load/store trap for kernel "
+		       "at <%016lx>.\n", regs->tpc);
+		unaligned_panic("Kernel does fpu/atomic "
+				"unaligned load/store.", regs);
+
+		kernel_mna_trap_fault(0);
+	} else {
+		unsigned long addr, *reg_addr;
+		int err;
+
+		addr = compute_effective_address(regs, insn,
+						 ((insn >> 25) & 0x1f));
+		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
+		switch (asi) {
+		case ASI_NL:
+		case ASI_AIUPL:
+		case ASI_AIUSL:
+		case ASI_PL:
+		case ASI_SL:
+		case ASI_PNFL:
+		case ASI_SNFL:
+			asi &= ~0x08;
+			break;
+		}
+		switch (dir) {
+		case load:
+			reg_addr = fetch_reg_addr(((insn>>25)&0x1f), regs);
+			err = do_int_load(reg_addr, size,
+					  (unsigned long *) addr,
+					  decode_signedness(insn), asi);
+			if (likely(!err) && unlikely(asi != orig_asi)) {
+				unsigned long val_in = *reg_addr;
+				switch (size) {
+				case 2:
+					val_in = swab16(val_in);
+					break;
+				case 4:
+					val_in = swab32(val_in);
+					break;
+				case 8:
+					val_in = swab64(val_in);
+					break;
+				case 16:
+				default:
+					BUG();
+					break;
+				}
+				*reg_addr = val_in;
+			}
+			break;
+
+		case store:
+			err = do_int_store(((insn>>25)&0x1f), size,
+					   (unsigned long *) addr, regs,
+					   asi, orig_asi);
+			break;
+
+		default:
+			panic("Impossible kernel unaligned trap.");
+			/* Not reached... */
+		}
+		if (unlikely(err))
+			kernel_mna_trap_fault(1);
+		else
+			advance(regs);
+	}
+}
+
+int handle_popc(u32 insn, struct pt_regs *regs)
+{
+	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
+	int ret, rd = ((insn >> 25) & 0x1f);
+	u64 value;
+	                        
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	if (insn & 0x2000) {
+		maybe_flush_windows(0, 0, rd, from_kernel);
+		value = sign_extend_imm13(insn);
+	} else {
+		maybe_flush_windows(0, insn & 0x1f, rd, from_kernel);
+		value = fetch_reg(insn & 0x1f, regs);
+	}
+	ret = hweight64(value);
+	if (rd < 16) {
+		if (rd)
+			regs->u_regs[rd] = ret;
+	} else {
+		unsigned long fp = regs->u_regs[UREG_FP];
+
+		if (!test_thread_64bit_stack(fp)) {
+			struct reg_window32 __user *win32;
+			win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
+			put_user(ret, &win32->locals[rd - 16]);
+		} else {
+			struct reg_window __user *win;
+			win = (struct reg_window __user *)(fp + STACK_BIAS);
+			put_user(ret, &win->locals[rd - 16]);
+		}
+	}
+	advance(regs);
+	return 1;
+}
+
+extern void do_fpother(struct pt_regs *regs);
+extern void do_privact(struct pt_regs *regs);
+extern void sun4v_data_access_exception(struct pt_regs *regs,
+					unsigned long addr,
+					unsigned long type_ctx);
+
+int handle_ldf_stq(u32 insn, struct pt_regs *regs)
+{
+	unsigned long addr = compute_effective_address(regs, insn, 0);
+	int freg;
+	struct fpustate *f = FPUSTATE;
+	int asi = decode_asi(insn, regs);
+	int flag;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+	save_and_clear_fpu();
+	current_thread_info()->xfsr[0] &= ~0x1c000;
+	if (insn & 0x200000) {
+		/* STQ */
+		u64 first = 0, second = 0;
+		
+		freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+		if (freg & 3) {
+			current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
+			do_fpother(regs);
+			return 0;
+		}
+		if (current_thread_info()->fpsaved[0] & flag) {
+			first = *(u64 *)&f->regs[freg];
+			second = *(u64 *)&f->regs[freg+2];
+		}
+		if (asi < 0x80) {
+			do_privact(regs);
+			return 1;
+		}
+		switch (asi) {
+		case ASI_P:
+		case ASI_S: break;
+		case ASI_PL:
+		case ASI_SL: 
+			{
+				/* Need to convert endians */
+				u64 tmp = __swab64p(&first);
+				
+				first = __swab64p(&second);
+				second = tmp;
+				break;
+			}
+		default:
+			if (tlb_type == hypervisor)
+				sun4v_data_access_exception(regs, addr, 0);
+			else
+				spitfire_data_access_exception(regs, 0, addr);
+			return 1;
+		}
+		if (put_user (first >> 32, (u32 __user *)addr) ||
+		    __put_user ((u32)first, (u32 __user *)(addr + 4)) ||
+		    __put_user (second >> 32, (u32 __user *)(addr + 8)) ||
+		    __put_user ((u32)second, (u32 __user *)(addr + 12))) {
+			if (tlb_type == hypervisor)
+				sun4v_data_access_exception(regs, addr, 0);
+			else
+				spitfire_data_access_exception(regs, 0, addr);
+		    	return 1;
+		}
+	} else {
+		/* LDF, LDDF, LDQF */
+		u32 data[4] __attribute__ ((aligned(8)));
+		int size, i;
+		int err;
+
+		if (asi < 0x80) {
+			do_privact(regs);
+			return 1;
+		} else if (asi > ASI_SNFL) {
+			if (tlb_type == hypervisor)
+				sun4v_data_access_exception(regs, addr, 0);
+			else
+				spitfire_data_access_exception(regs, 0, addr);
+			return 1;
+		}
+		switch (insn & 0x180000) {
+		case 0x000000: size = 1; break;
+		case 0x100000: size = 4; break;
+		default: size = 2; break;
+		}
+		if (size == 1)
+			freg = (insn >> 25) & 0x1f;
+		else
+			freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+
+		for (i = 0; i < size; i++)
+			data[i] = 0;
+		
+		err = get_user (data[0], (u32 __user *) addr);
+		if (!err) {
+			for (i = 1; i < size; i++)
+				err |= __get_user (data[i], (u32 __user *)(addr + 4*i));
+		}
+		if (err && !(asi & 0x2 /* NF */)) {
+			if (tlb_type == hypervisor)
+				sun4v_data_access_exception(regs, addr, 0);
+			else
+				spitfire_data_access_exception(regs, 0, addr);
+			return 1;
+		}
+		if (asi & 0x8) /* Little */ {
+			u64 tmp;
+
+			switch (size) {
+			case 1: data[0] = le32_to_cpup(data + 0); break;
+			default:*(u64 *)(data + 0) = le64_to_cpup((u64 *)(data + 0));
+				break;
+			case 4: tmp = le64_to_cpup((u64 *)(data + 0));
+				*(u64 *)(data + 0) = le64_to_cpup((u64 *)(data + 2));
+				*(u64 *)(data + 2) = tmp;
+				break;
+			}
+		}
+		if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) {
+			current_thread_info()->fpsaved[0] = FPRS_FEF;
+			current_thread_info()->gsr[0] = 0;
+		}
+		if (!(current_thread_info()->fpsaved[0] & flag)) {
+			if (freg < 32)
+				memset(f->regs, 0, 32*sizeof(u32));
+			else
+				memset(f->regs+32, 0, 32*sizeof(u32));
+		}
+		memcpy(f->regs + freg, data, size * 4);
+		current_thread_info()->fpsaved[0] |= flag;
+	}
+	advance(regs);
+	return 1;
+}
+
+void handle_ld_nf(u32 insn, struct pt_regs *regs)
+{
+	int rd = ((insn >> 25) & 0x1f);
+	int from_kernel = (regs->tstate & TSTATE_PRIV) != 0;
+	unsigned long *reg;
+	                        
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+	maybe_flush_windows(0, 0, rd, from_kernel);
+	reg = fetch_reg_addr(rd, regs);
+	if (from_kernel || rd < 16) {
+		reg[0] = 0;
+		if ((insn & 0x780000) == 0x180000)
+			reg[1] = 0;
+	} else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) {
+		put_user(0, (int __user *) reg);
+		if ((insn & 0x780000) == 0x180000)
+			put_user(0, ((int __user *) reg) + 1);
+	} else {
+		put_user(0, (unsigned long __user *) reg);
+		if ((insn & 0x780000) == 0x180000)
+			put_user(0, (unsigned long __user *) reg + 1);
+	}
+	advance(regs);
+}
+
+void handle_lddfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
+{
+	enum ctx_state prev_state = exception_enter();
+	unsigned long pc = regs->tpc;
+	unsigned long tstate = regs->tstate;
+	u32 insn;
+	u64 value;
+	u8 freg;
+	int flag;
+	struct fpustate *f = FPUSTATE;
+
+	if (tstate & TSTATE_PRIV)
+		die_if_kernel("lddfmna from kernel", regs);
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
+	if (test_thread_flag(TIF_32BIT))
+		pc = (u32)pc;
+	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
+		int asi = decode_asi(insn, regs);
+		u32 first, second;
+		int err;
+
+		if ((asi > ASI_SNFL) ||
+		    (asi < ASI_P))
+			goto daex;
+		first = second = 0;
+		err = get_user(first, (u32 __user *)sfar);
+		if (!err)
+			err = get_user(second, (u32 __user *)(sfar + 4));
+		if (err) {
+			if (!(asi & 0x2))
+				goto daex;
+			first = second = 0;
+		}
+		save_and_clear_fpu();
+		freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		value = (((u64)first) << 32) | second;
+		if (asi & 0x8) /* Little */
+			value = __swab64p(&value);
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+		if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) {
+			current_thread_info()->fpsaved[0] = FPRS_FEF;
+			current_thread_info()->gsr[0] = 0;
+		}
+		if (!(current_thread_info()->fpsaved[0] & flag)) {
+			if (freg < 32)
+				memset(f->regs, 0, 32*sizeof(u32));
+			else
+				memset(f->regs+32, 0, 32*sizeof(u32));
+		}
+		*(u64 *)(f->regs + freg) = value;
+		current_thread_info()->fpsaved[0] |= flag;
+	} else {
+daex:
+		if (tlb_type == hypervisor)
+			sun4v_data_access_exception(regs, sfar, sfsr);
+		else
+			spitfire_data_access_exception(regs, sfsr, sfar);
+		goto out;
+	}
+	advance(regs);
+out:
+	exception_exit(prev_state);
+}
+
+void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr)
+{
+	enum ctx_state prev_state = exception_enter();
+	unsigned long pc = regs->tpc;
+	unsigned long tstate = regs->tstate;
+	u32 insn;
+	u64 value;
+	u8 freg;
+	int flag;
+	struct fpustate *f = FPUSTATE;
+
+	if (tstate & TSTATE_PRIV)
+		die_if_kernel("stdfmna from kernel", regs);
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, sfar);
+	if (test_thread_flag(TIF_32BIT))
+		pc = (u32)pc;
+	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
+		int asi = decode_asi(insn, regs);
+		freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20);
+		value = 0;
+		flag = (freg < 32) ? FPRS_DL : FPRS_DU;
+		if ((asi > ASI_SNFL) ||
+		    (asi < ASI_P))
+			goto daex;
+		save_and_clear_fpu();
+		if (current_thread_info()->fpsaved[0] & flag)
+			value = *(u64 *)&f->regs[freg];
+		switch (asi) {
+		case ASI_P:
+		case ASI_S: break;
+		case ASI_PL:
+		case ASI_SL: 
+			value = __swab64p(&value); break;
+		default: goto daex;
+		}
+		if (put_user (value >> 32, (u32 __user *) sfar) ||
+		    __put_user ((u32)value, (u32 __user *)(sfar + 4)))
+			goto daex;
+	} else {
+daex:
+		if (tlb_type == hypervisor)
+			sun4v_data_access_exception(regs, sfar, sfsr);
+		else
+			spitfire_data_access_exception(regs, sfsr, sfar);
+		goto out;
+	}
+	advance(regs);
+out:
+	exception_exit(prev_state);
+}
diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c
new file mode 100644
index 0000000..d852ae5
--- /dev/null
+++ b/arch/sparc/kernel/uprobes.c
@@ -0,0 +1,331 @@
+/*
+ * User-space Probes (UProbes) for sparc
+ *
+ * Copyright (C) 2013 Oracle Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ *	Jose E. Marchesi <jose.marchesi@oracle.com>
+ *	Eric Saint Etienne <eric.saint.etienne@oracle.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/uprobes.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h> /* For struct task_struct */
+#include <linux/kdebug.h>
+
+#include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+
+/* Compute the address of the breakpoint instruction and return it.
+ *
+ * Note that uprobe_get_swbp_addr is defined as a weak symbol in
+ * kernel/events/uprobe.c.
+ */
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+	return instruction_pointer(regs);
+}
+
+static void copy_to_page(struct page *page, unsigned long vaddr,
+			 const void *src, int len)
+{
+	void *kaddr = kmap_atomic(page);
+
+	memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
+	kunmap_atomic(kaddr);
+}
+
+/* Fill in the xol area with the probed instruction followed by the
+ * single-step trap.  Some fixups in the copied instruction are
+ * performed at this point.
+ *
+ * Note that uprobe_xol_copy is defined as a weak symbol in
+ * kernel/events/uprobe.c.
+ */
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+			   void *src, unsigned long len)
+{
+	const u32 stp_insn = UPROBE_STP_INSN;
+	u32 insn = *(u32 *) src;
+
+	/* Branches annulling their delay slot must be fixed to not do
+	 * so.  Clearing the annul bit on these instructions we can be
+	 * sure the single-step breakpoint in the XOL slot will be
+	 * executed.
+	 */
+
+	u32 op = (insn >> 30) & 0x3;
+	u32 op2 = (insn >> 22) & 0x7;
+
+	if (op == 0 &&
+	    (op2 == 1 || op2 == 2 || op2 == 3 || op2 == 5 || op2 == 6) &&
+	    (insn & ANNUL_BIT) == ANNUL_BIT)
+		insn &= ~ANNUL_BIT;
+
+	copy_to_page(page, vaddr, &insn, len);
+	copy_to_page(page, vaddr+len, &stp_insn, 4);
+}
+
+
+/* Instruction analysis/validity.
+ *
+ * This function returns 0 on success or a -ve number on error.
+ */
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
+			     struct mm_struct *mm, unsigned long addr)
+{
+	/* Any unsupported instruction?  Then return -EINVAL  */
+	return 0;
+}
+
+/* If INSN is a relative control transfer instruction, return the
+ * corrected branch destination value.
+ *
+ * Note that regs->tpc and regs->tnpc still hold the values of the
+ * program counters at the time of the single-step trap due to the
+ * execution of the UPROBE_STP_INSN at utask->xol_vaddr + 4.
+ *
+ */
+static unsigned long relbranch_fixup(u32 insn, struct uprobe_task *utask,
+				     struct pt_regs *regs)
+{
+	/* Branch not taken, no mods necessary.  */
+	if (regs->tnpc == regs->tpc + 0x4UL)
+		return utask->autask.saved_tnpc + 0x4UL;
+
+	/* The three cases are call, branch w/prediction,
+	 * and traditional branch.
+	 */
+	if ((insn & 0xc0000000) == 0x40000000 ||
+	    (insn & 0xc1c00000) == 0x00400000 ||
+	    (insn & 0xc1c00000) == 0x00800000) {
+		unsigned long real_pc = (unsigned long) utask->vaddr;
+		unsigned long ixol_addr = utask->xol_vaddr;
+
+		/* The instruction did all the work for us
+		 * already, just apply the offset to the correct
+		 * instruction location.
+		 */
+		return (real_pc + (regs->tnpc - ixol_addr));
+	}
+
+	/* It is jmpl or some other absolute PC modification instruction,
+	 * leave NPC as-is.
+	 */
+	return regs->tnpc;
+}
+
+/* If INSN is an instruction which writes its PC location
+ * into a destination register, fix that up.
+ */
+static int retpc_fixup(struct pt_regs *regs, u32 insn,
+		       unsigned long real_pc)
+{
+	unsigned long *slot = NULL;
+	int rc = 0;
+
+	/* Simplest case is 'call', which always uses %o7 */
+	if ((insn & 0xc0000000) == 0x40000000)
+		slot = &regs->u_regs[UREG_I7];
+
+	/* 'jmpl' encodes the register inside of the opcode */
+	if ((insn & 0xc1f80000) == 0x81c00000) {
+		unsigned long rd = ((insn >> 25) & 0x1f);
+
+		if (rd <= 15) {
+			slot = &regs->u_regs[rd];
+		} else {
+			unsigned long fp = regs->u_regs[UREG_FP];
+			/* Hard case, it goes onto the stack. */
+			flushw_all();
+
+			rd -= 16;
+			if (test_thread_64bit_stack(fp)) {
+				unsigned long __user *uslot =
+			(unsigned long __user *) (fp + STACK_BIAS) + rd;
+				rc = __put_user(real_pc, uslot);
+			} else {
+				unsigned int __user *uslot = (unsigned int
+						__user *) fp + rd;
+				rc = __put_user((u32) real_pc, uslot);
+			}
+		}
+	}
+	if (slot != NULL)
+		*slot = real_pc;
+	return rc;
+}
+
+/* Single-stepping can be avoided for certain instructions: NOPs and
+ * instructions that can be emulated.  This function determines
+ * whether the instruction where the uprobe is installed falls in one
+ * of these cases and emulates it.
+ *
+ * This function returns true if the single-stepping can be skipped,
+ * false otherwise.
+ */
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	/* We currently only emulate NOP instructions.
+	 */
+
+	if (auprobe->ixol == (1 << 24)) {
+		regs->tnpc += 4;
+		regs->tpc += 4;
+		return true;
+	}
+
+	return false;
+}
+
+/* Prepare to execute out of line.  At this point
+ * current->utask->xol_vaddr points to an allocated XOL slot properly
+ * initialized with the original instruction and the single-stepping
+ * trap instruction.
+ *
+ * This function returns 0 on success, any other number on error.
+ */
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+	struct arch_uprobe_task *autask = &current->utask->autask;
+
+	/* Save the current program counters so they can be restored
+	 * later.
+	 */
+	autask->saved_tpc = regs->tpc;
+	autask->saved_tnpc = regs->tnpc;
+
+	/* Adjust PC and NPC so the first instruction in the XOL slot
+	 * will be executed by the user task.
+	 */
+	instruction_pointer_set(regs, utask->xol_vaddr);
+
+	return 0;
+}
+
+/* Prepare to resume execution after the single-step.  Called after
+ * single-stepping. To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.
+ *
+ * This function returns 0 on success, any other number on error.
+ */
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+	struct arch_uprobe_task *autask = &utask->autask;
+	u32 insn = auprobe->ixol;
+	int rc = 0;
+
+	if (utask->state == UTASK_SSTEP_ACK) {
+		regs->tnpc = relbranch_fixup(insn, utask, regs);
+		regs->tpc = autask->saved_tnpc;
+		rc =  retpc_fixup(regs, insn, (unsigned long) utask->vaddr);
+	} else {
+		regs->tnpc = utask->vaddr+4;
+		regs->tpc = autask->saved_tnpc+4;
+	}
+	return rc;
+}
+
+/* Handler for uprobe traps.  This is called from the traps table and
+ * triggers the proper die notification.
+ */
+asmlinkage void uprobe_trap(struct pt_regs *regs,
+			    unsigned long trap_level)
+{
+	BUG_ON(trap_level != 0x173 && trap_level != 0x174);
+
+	/* We are only interested in user-mode code.  Uprobe traps
+	 * shall not be present in kernel code.
+	 */
+	if (!user_mode(regs)) {
+		local_irq_enable();
+		bad_trap(regs, trap_level);
+		return;
+	}
+
+	/* trap_level == 0x173 --> ta 0x73
+	 * trap_level == 0x174 --> ta 0x74
+	 */
+	if (notify_die((trap_level == 0x173) ? DIE_BPT : DIE_SSTEP,
+				(trap_level == 0x173) ? "bpt" : "sstep",
+				regs, 0, trap_level, SIGTRAP) != NOTIFY_STOP)
+		bad_trap(regs, trap_level);
+}
+
+/* Callback routine for handling die notifications.
+*/
+int arch_uprobe_exception_notify(struct notifier_block *self,
+				 unsigned long val, void *data)
+{
+	int ret = NOTIFY_DONE;
+	struct die_args *args = (struct die_args *)data;
+
+	/* We are only interested in userspace traps */
+	if (args->regs && !user_mode(args->regs))
+		return NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_BPT:
+		if (uprobe_pre_sstep_notifier(args->regs))
+			ret = NOTIFY_STOP;
+		break;
+
+	case DIE_SSTEP:
+		if (uprobe_post_sstep_notifier(args->regs))
+			ret = NOTIFY_STOP;
+
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+/* This function gets called when a XOL instruction either gets
+ * trapped or the thread has a fatal signal, so reset the instruction
+ * pointer to its probed address.
+ */
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	instruction_pointer_set(regs, utask->vaddr);
+}
+
+/* If xol insn itself traps and generates a signal(Say,
+ * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
+ * instruction jumps back to its own address.
+ */
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+	return false;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+				  struct pt_regs *regs)
+{
+	unsigned long orig_ret_vaddr = regs->u_regs[UREG_I7];
+
+	regs->u_regs[UREG_I7] = trampoline_vaddr-8;
+
+	return orig_ret_vaddr + 8;
+}
diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S
new file mode 100644
index 0000000..e4cee7b
--- /dev/null
+++ b/arch/sparc/kernel/urtt_fill.S
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/thread_info.h>
+#include <asm/trap_block.h>
+#include <asm/spitfire.h>
+#include <asm/ptrace.h>
+#include <asm/head.h>
+
+		.text
+		.align	8
+		.globl	user_rtt_fill_fixup_common
+user_rtt_fill_fixup_common:
+		rdpr	%cwp, %g1
+		add	%g1, 1, %g1
+		wrpr	%g1, 0x0, %cwp
+
+		rdpr	%wstate, %g2
+		sll	%g2, 3, %g2
+		wrpr	%g2, 0x0, %wstate
+
+		/* We know %canrestore and %otherwin are both zero.  */
+
+		sethi	%hi(sparc64_kern_pri_context), %g2
+		ldx	[%g2 + %lo(sparc64_kern_pri_context)], %g2
+		mov	PRIMARY_CONTEXT, %g1
+
+661:		stxa	%g2, [%g1] ASI_DMMU
+		.section .sun4v_1insn_patch, "ax"
+		.word	661b
+		stxa	%g2, [%g1] ASI_MMU
+		.previous
+
+		sethi	%hi(KERNBASE), %g1
+		flush	%g1
+
+		mov	%g4, %l4
+		mov	%g5, %l5
+		brnz,pn	%g3, 1f
+		 mov	%g3, %l3
+
+		or	%g4, FAULT_CODE_WINFIXUP, %g4
+		stb	%g4, [%g6 + TI_FAULT_CODE]
+		stx	%g5, [%g6 + TI_FAULT_ADDR]
+1:
+		mov	%g6, %l1
+		wrpr	%g0, 0x0, %tl
+
+661:		nop
+		.section		.sun4v_1insn_patch, "ax"
+		.word			661b
+		SET_GL(0)
+		.previous
+
+661:		wrpr	%g0, RTRAP_PSTATE, %pstate
+		.section		.sun_m7_1insn_patch, "ax"
+		.word			661b
+		/* Re-enable PSTATE.mcde to maintain ADI security */
+		wrpr	%g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate
+		.previous
+
+		mov	%l1, %g6
+		ldx	[%g6 + TI_TASK], %g4
+		LOAD_PER_CPU_BASE(%g5, %g6, %g1, %g2, %g3)
+
+		brnz,pn	%l3, 1f
+		 nop
+
+		call	do_sparc64_fault
+		 add	%sp, PTREGS_OFF, %o0
+		ba,pt	%xcc, rtrap
+		 nop
+
+1:		cmp	%g3, 2
+		bne,pn	%xcc, 2f
+		 nop
+
+		sethi	%hi(tlb_type), %g1
+		lduw	[%g1 + %lo(tlb_type)], %g1
+		cmp	%g1, 3
+		bne,pt	%icc, 1f
+		 add	%sp, PTREGS_OFF, %o0
+		mov	%l4, %o2
+		call	sun4v_do_mna
+		 mov	%l5, %o1
+		ba,a,pt	%xcc, rtrap
+1:		mov	%l4, %o1
+		mov	%l5, %o2
+		call	mem_address_unaligned
+		 nop
+		ba,a,pt	%xcc, rtrap
+
+2:		sethi	%hi(tlb_type), %g1
+		mov	%l4, %o1
+		lduw	[%g1 + %lo(tlb_type)], %g1
+		mov	%l5, %o2
+		cmp	%g1, 3
+		bne,pt	%icc, 1f
+		 add	%sp, PTREGS_OFF, %o0
+		call	sun4v_data_access_exception
+		 nop
+		ba,a,pt	%xcc, rtrap
+		 nop
+
+1:		call	spitfire_data_access_exception
+		 nop
+		ba,a,pt	%xcc, rtrap
diff --git a/arch/sparc/kernel/utrap.S b/arch/sparc/kernel/utrap.S
new file mode 100644
index 0000000..7a2d9a9
--- /dev/null
+++ b/arch/sparc/kernel/utrap.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+	.globl		utrap_trap
+	.type		utrap_trap,#function
+utrap_trap:		/* %g3=handler,%g4=level */
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
+	ldx		[%g6 + TI_UTRAPS], %g1
+	brnz,pt		%g1, invoke_utrap
+	 nop
+
+	ba,pt		%xcc, etrap
+	 rd		%pc, %g7
+	mov		%l4, %o1
+        call		bad_trap
+	 add		%sp, PTREGS_OFF, %o0
+	ba,a,pt		%xcc, rtrap
+
+invoke_utrap:
+	sllx		%g3, 3, %g3
+	ldx		[%g1 + %g3], %g1
+	save		%sp, -128, %sp
+	rdpr		%tstate, %l6
+	rdpr		%cwp, %l7
+	andn		%l6, TSTATE_CWP, %l6
+	wrpr		%l6, %l7, %tstate
+	rdpr		%tpc, %l6
+	rdpr		%tnpc, %l7
+	wrpr		%g1, 0, %tnpc
+	done
+	.size		utrap_trap,.-utrap_trap
diff --git a/arch/sparc/kernel/vdso.c b/arch/sparc/kernel/vdso.c
new file mode 100644
index 0000000..5888066
--- /dev/null
+++ b/arch/sparc/kernel/vdso.c
@@ -0,0 +1,70 @@
+/*
+ *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ *  Copyright 2003 Andi Kleen, SuSE Labs.
+ *
+ *  Thanks to hpa@transmeta.com for some useful hint.
+ *  Special thanks to Ingo Molnar for his early experience with
+ *  a different vsyscall implementation for Linux/IA32 and for the name.
+ */
+
+#include <linux/seqlock.h>
+#include <linux/time.h>
+#include <linux/timekeeper_internal.h>
+
+#include <asm/vvar.h>
+
+void update_vsyscall_tz(void)
+{
+	if (unlikely(vvar_data == NULL))
+		return;
+
+	vvar_data->tz_minuteswest = sys_tz.tz_minuteswest;
+	vvar_data->tz_dsttime = sys_tz.tz_dsttime;
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+	struct vvar_data *vdata = vvar_data;
+
+	if (unlikely(vdata == NULL))
+		return;
+
+	vvar_write_begin(vdata);
+	vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+	vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
+	vdata->clock.mask = tk->tkr_mono.mask;
+	vdata->clock.mult = tk->tkr_mono.mult;
+	vdata->clock.shift = tk->tkr_mono.shift;
+
+	vdata->wall_time_sec = tk->xtime_sec;
+	vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
+
+	vdata->monotonic_time_sec = tk->xtime_sec +
+				    tk->wall_to_monotonic.tv_sec;
+	vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec +
+				      (tk->wall_to_monotonic.tv_nsec <<
+				       tk->tkr_mono.shift);
+
+	while (vdata->monotonic_time_snsec >=
+	       (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+		vdata->monotonic_time_snsec -=
+				((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
+		vdata->monotonic_time_sec++;
+	}
+
+	vdata->wall_time_coarse_sec = tk->xtime_sec;
+	vdata->wall_time_coarse_nsec =
+			(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
+
+	vdata->monotonic_time_coarse_sec =
+		vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
+	vdata->monotonic_time_coarse_nsec =
+		vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+
+	while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
+		vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
+		vdata->monotonic_time_coarse_sec++;
+	}
+
+	vvar_write_end(vdata);
+}
diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c
new file mode 100644
index 0000000..32bae68
--- /dev/null
+++ b/arch/sparc/kernel/vio.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+/* vio.c: Virtual I/O channel devices probing infrastructure.
+ *
+ *    Copyright (c) 2003-2005 IBM Corp.
+ *     Dave Engebretsen engebret@us.ibm.com
+ *     Santiago Leon santil@us.ibm.com
+ *     Hollis Blanchard <hollisb@us.ibm.com>
+ *     Stephen Rothwell
+ *
+ * Adapted to sparc64 by David S. Miller davem@davemloft.net
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+#include <asm/mdesc.h>
+#include <asm/vio.h>
+
+static const struct vio_device_id *vio_match_device(
+	const struct vio_device_id *matches,
+	const struct vio_dev *dev)
+{
+	const char *type, *compat;
+	int len;
+
+	type = dev->type;
+	compat = dev->compat;
+	len = dev->compat_len;
+
+	while (matches->type[0] || matches->compat[0]) {
+		int match = 1;
+		if (matches->type[0])
+			match &= !strcmp(matches->type, type);
+
+		if (matches->compat[0]) {
+			match &= len &&
+				of_find_in_proplist(compat, matches->compat, len);
+		}
+		if (match)
+			return matches;
+		matches++;
+	}
+	return NULL;
+}
+
+static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
+{
+	const struct vio_dev *vio_dev = to_vio_dev(dev);
+
+	add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, vio_dev->compat);
+	return 0;
+}
+
+static int vio_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct vio_dev *vio_dev = to_vio_dev(dev);
+	struct vio_driver *vio_drv = to_vio_driver(drv);
+	const struct vio_device_id *matches = vio_drv->id_table;
+
+	if (!matches)
+		return 0;
+
+	return vio_match_device(matches, vio_dev) != NULL;
+}
+
+static int vio_device_probe(struct device *dev)
+{
+	struct vio_dev *vdev = to_vio_dev(dev);
+	struct vio_driver *drv = to_vio_driver(dev->driver);
+	const struct vio_device_id *id;
+
+	if (!drv->probe)
+		return -ENODEV;
+
+	id = vio_match_device(drv->id_table, vdev);
+	if (!id)
+		return -ENODEV;
+
+	/* alloc irqs (unless the driver specified not to) */
+	if (!drv->no_irq) {
+		if (vdev->tx_irq == 0 && vdev->tx_ino != ~0UL)
+			vdev->tx_irq = sun4v_build_virq(vdev->cdev_handle,
+							vdev->tx_ino);
+
+		if (vdev->rx_irq == 0 && vdev->rx_ino != ~0UL)
+			vdev->rx_irq = sun4v_build_virq(vdev->cdev_handle,
+							vdev->rx_ino);
+	}
+
+	return drv->probe(vdev, id);
+}
+
+static int vio_device_remove(struct device *dev)
+{
+	struct vio_dev *vdev = to_vio_dev(dev);
+	struct vio_driver *drv = to_vio_driver(dev->driver);
+
+	if (drv->remove) {
+		/*
+		 * Ideally, we would remove/deallocate tx/rx virqs
+		 * here - however, there are currently no support
+		 * routines to do so at the moment. TBD
+		 */
+
+		return drv->remove(vdev);
+	}
+
+	return 1;
+}
+
+static ssize_t devspec_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct vio_dev *vdev = to_vio_dev(dev);
+	const char *str = "none";
+
+	if (!strcmp(vdev->type, "vnet-port"))
+		str = "vnet";
+	else if (!strcmp(vdev->type, "vdc-port"))
+		str = "vdisk";
+
+	return sprintf(buf, "%s\n", str);
+}
+static DEVICE_ATTR_RO(devspec);
+
+static ssize_t type_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct vio_dev *vdev = to_vio_dev(dev);
+	return sprintf(buf, "%s\n", vdev->type);
+}
+static DEVICE_ATTR_RO(type);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	const struct vio_dev *vdev = to_vio_dev(dev);
+
+	return sprintf(buf, "vio:T%sS%s\n", vdev->type, vdev->compat);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *vio_dev_attrs[] = {
+	&dev_attr_devspec.attr,
+	&dev_attr_type.attr,
+	&dev_attr_modalias.attr,
+	NULL,
+ };
+ATTRIBUTE_GROUPS(vio_dev);
+
+static struct bus_type vio_bus_type = {
+	.name		= "vio",
+	.dev_groups	= vio_dev_groups,
+	.uevent         = vio_hotplug,
+	.match		= vio_bus_match,
+	.probe		= vio_device_probe,
+	.remove		= vio_device_remove,
+};
+
+int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
+			const char *mod_name)
+{
+	viodrv->driver.bus = &vio_bus_type;
+	viodrv->driver.name = viodrv->name;
+	viodrv->driver.owner = owner;
+	viodrv->driver.mod_name = mod_name;
+
+	return driver_register(&viodrv->driver);
+}
+EXPORT_SYMBOL(__vio_register_driver);
+
+void vio_unregister_driver(struct vio_driver *viodrv)
+{
+	driver_unregister(&viodrv->driver);
+}
+EXPORT_SYMBOL(vio_unregister_driver);
+
+static void vio_dev_release(struct device *dev)
+{
+	kfree(to_vio_dev(dev));
+}
+
+static ssize_t
+show_pciobppath_attr(struct device *dev, struct device_attribute *attr,
+		     char *buf)
+{
+	struct vio_dev *vdev;
+	struct device_node *dp;
+
+	vdev = to_vio_dev(dev);
+	dp = vdev->dp;
+
+	return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
+}
+
+static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH,
+		   show_pciobppath_attr, NULL);
+
+static struct device_node *cdev_node;
+
+static struct vio_dev *root_vdev;
+static u64 cdev_cfg_handle;
+
+static const u64 *vio_cfg_handle(struct mdesc_handle *hp, u64 node)
+{
+	const u64 *cfg_handle = NULL;
+	u64 a;
+
+	mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+		u64 target;
+
+		target = mdesc_arc_target(hp, a);
+		cfg_handle = mdesc_get_property(hp, target,
+						"cfg-handle", NULL);
+		if (cfg_handle)
+			break;
+	}
+
+	return cfg_handle;
+}
+
+/**
+ * vio_vdev_node() - Find VDEV node in MD
+ * @hp:  Handle to the MD
+ * @vdev:  Pointer to VDEV
+ *
+ * Find the node in the current MD which matches the given vio_dev. This
+ * must be done dynamically since the node value can change if the MD
+ * is updated.
+ *
+ * NOTE: the MD must be locked, using mdesc_grab(), when calling this routine
+ *
+ * Return: The VDEV node in MDESC
+ */
+u64 vio_vdev_node(struct mdesc_handle *hp, struct vio_dev *vdev)
+{
+	u64 node;
+
+	if (vdev == NULL)
+		return MDESC_NODE_NULL;
+
+	node = mdesc_get_node(hp, (const char *)vdev->node_name,
+			      &vdev->md_node_info);
+
+	return node;
+}
+EXPORT_SYMBOL(vio_vdev_node);
+
+static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp,
+				  struct vio_dev *vdev)
+{
+	u64 a;
+
+	vdev->tx_ino = ~0UL;
+	vdev->rx_ino = ~0UL;
+	vdev->channel_id = ~0UL;
+	mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
+		const u64 *chan_id;
+		const u64 *irq;
+		u64 target;
+
+		target = mdesc_arc_target(hp, a);
+
+		irq = mdesc_get_property(hp, target, "tx-ino", NULL);
+		if (irq)
+			vdev->tx_ino = *irq;
+
+		irq = mdesc_get_property(hp, target, "rx-ino", NULL);
+		if (irq)
+			vdev->rx_ino = *irq;
+
+		chan_id = mdesc_get_property(hp, target, "id", NULL);
+		if (chan_id)
+			vdev->channel_id = *chan_id;
+	}
+
+	vdev->cdev_handle = cdev_cfg_handle;
+}
+
+int vio_set_intr(unsigned long dev_ino, int state)
+{
+	int err;
+
+	err = sun4v_vintr_set_valid(cdev_cfg_handle, dev_ino, state);
+	return err;
+}
+EXPORT_SYMBOL(vio_set_intr);
+
+static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
+				      const char *node_name,
+				      struct device *parent)
+{
+	const char *type, *compat;
+	struct device_node *dp;
+	struct vio_dev *vdev;
+	int err, tlen, clen;
+	const u64 *id, *cfg_handle;
+
+	type = mdesc_get_property(hp, mp, "device-type", &tlen);
+	if (!type) {
+		type = mdesc_get_property(hp, mp, "name", &tlen);
+		if (!type) {
+			type = mdesc_node_name(hp, mp);
+			tlen = strlen(type) + 1;
+		}
+	}
+	if (tlen > VIO_MAX_TYPE_LEN || strlen(type) >= VIO_MAX_TYPE_LEN) {
+		printk(KERN_ERR "VIO: Type string [%s] is too long.\n",
+		       type);
+		return NULL;
+	}
+
+	id = mdesc_get_property(hp, mp, "id", NULL);
+
+	cfg_handle = vio_cfg_handle(hp, mp);
+
+	compat = mdesc_get_property(hp, mp, "device-type", &clen);
+	if (!compat) {
+		clen = 0;
+	} else if (clen > VIO_MAX_COMPAT_LEN) {
+		printk(KERN_ERR "VIO: Compat len %d for [%s] is too long.\n",
+		       clen, type);
+		return NULL;
+	}
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev) {
+		printk(KERN_ERR "VIO: Could not allocate vio_dev\n");
+		return NULL;
+	}
+
+	vdev->mp = mp;
+	memcpy(vdev->type, type, tlen);
+	if (compat)
+		memcpy(vdev->compat, compat, clen);
+	else
+		memset(vdev->compat, 0, sizeof(vdev->compat));
+	vdev->compat_len = clen;
+
+	vdev->port_id = ~0UL;
+	vdev->tx_irq = 0;
+	vdev->rx_irq = 0;
+
+	vio_fill_channel_info(hp, mp, vdev);
+
+	if (!id) {
+		dev_set_name(&vdev->dev, "%s", type);
+		vdev->dev_no = ~(u64)0;
+	} else if (!cfg_handle) {
+		dev_set_name(&vdev->dev, "%s-%llu", type, *id);
+		vdev->dev_no = *id;
+	} else {
+		dev_set_name(&vdev->dev, "%s-%llu-%llu", type,
+			     *cfg_handle, *id);
+		vdev->dev_no = *cfg_handle;
+		vdev->port_id = *id;
+	}
+
+	vdev->dev.parent = parent;
+	vdev->dev.bus = &vio_bus_type;
+	vdev->dev.release = vio_dev_release;
+
+	if (parent == NULL) {
+		dp = cdev_node;
+	} else if (to_vio_dev(parent) == root_vdev) {
+		dp = of_get_next_child(cdev_node, NULL);
+		while (dp) {
+			if (!strcmp(dp->type, type))
+				break;
+
+			dp = of_get_next_child(cdev_node, dp);
+		}
+	} else {
+		dp = to_vio_dev(parent)->dp;
+	}
+	vdev->dp = dp;
+
+	/*
+	 * node_name is NULL for the parent/channel-devices node and
+	 * the parent doesn't require the MD node info.
+	 */
+	if (node_name != NULL) {
+		(void) snprintf(vdev->node_name, VIO_MAX_NAME_LEN, "%s",
+				node_name);
+
+		err = mdesc_get_node_info(hp, mp, node_name,
+					  &vdev->md_node_info);
+		if (err) {
+			pr_err("VIO: Could not get MD node info %s, err=%d\n",
+			       dev_name(&vdev->dev), err);
+			kfree(vdev);
+			return NULL;
+		}
+	}
+
+	pr_info("VIO: Adding device %s (tx_ino = %llx, rx_ino = %llx)\n",
+		dev_name(&vdev->dev), vdev->tx_ino, vdev->rx_ino);
+
+	err = device_register(&vdev->dev);
+	if (err) {
+		printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
+		       dev_name(&vdev->dev), err);
+		put_device(&vdev->dev);
+		return NULL;
+	}
+	if (vdev->dp)
+		err = sysfs_create_file(&vdev->dev.kobj,
+					&dev_attr_obppath.attr);
+
+	return vdev;
+}
+
+static void vio_add(struct mdesc_handle *hp, u64 node,
+		    const char *node_name)
+{
+	(void) vio_create_one(hp, node, node_name, &root_vdev->dev);
+}
+
+struct vio_remove_node_data {
+	struct mdesc_handle *hp;
+	u64 node;
+};
+
+static int vio_md_node_match(struct device *dev, void *arg)
+{
+	struct vio_dev *vdev = to_vio_dev(dev);
+	struct vio_remove_node_data *node_data;
+	u64 node;
+
+	node_data = (struct vio_remove_node_data *)arg;
+
+	node = vio_vdev_node(node_data->hp, vdev);
+
+	if (node == node_data->node)
+		return 1;
+	else
+		return 0;
+}
+
+static void vio_remove(struct mdesc_handle *hp, u64 node, const char *node_name)
+{
+	struct vio_remove_node_data node_data;
+	struct device *dev;
+
+	node_data.hp = hp;
+	node_data.node = node;
+
+	dev = device_find_child(&root_vdev->dev, (void *)&node_data,
+				vio_md_node_match);
+	if (dev) {
+		printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev));
+
+		device_unregister(dev);
+		put_device(dev);
+	} else {
+		pr_err("VIO: %s node not found in MDESC\n", node_name);
+	}
+}
+
+static struct mdesc_notifier_client vio_device_notifier = {
+	.add		= vio_add,
+	.remove		= vio_remove,
+	.node_name	= "virtual-device-port",
+};
+
+/* We are only interested in domain service ports under the
+ * "domain-services" node.  On control nodes there is another port
+ * under "openboot" that we should not mess with as aparently that is
+ * reserved exclusively for OBP use.
+ */
+static void vio_add_ds(struct mdesc_handle *hp, u64 node,
+		       const char *node_name)
+{
+	int found;
+	u64 a;
+
+	found = 0;
+	mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
+		u64 target = mdesc_arc_target(hp, a);
+		const char *name = mdesc_node_name(hp, target);
+
+		if (!strcmp(name, "domain-services")) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (found)
+		(void) vio_create_one(hp, node, node_name, &root_vdev->dev);
+}
+
+static struct mdesc_notifier_client vio_ds_notifier = {
+	.add		= vio_add_ds,
+	.remove		= vio_remove,
+	.node_name	= "domain-services-port",
+};
+
+static const char *channel_devices_node = "channel-devices";
+static const char *channel_devices_compat = "SUNW,sun4v-channel-devices";
+static const char *cfg_handle_prop = "cfg-handle";
+
+static int __init vio_init(void)
+{
+	struct mdesc_handle *hp;
+	const char *compat;
+	const u64 *cfg_handle;
+	int err, len;
+	u64 root;
+
+	err = bus_register(&vio_bus_type);
+	if (err) {
+		printk(KERN_ERR "VIO: Could not register bus type err=%d\n",
+		       err);
+		return err;
+	}
+
+	hp = mdesc_grab();
+	if (!hp)
+		return 0;
+
+	root = mdesc_node_by_name(hp, MDESC_NODE_NULL, channel_devices_node);
+	if (root == MDESC_NODE_NULL) {
+		printk(KERN_INFO "VIO: No channel-devices MDESC node.\n");
+		mdesc_release(hp);
+		return 0;
+	}
+
+	cdev_node = of_find_node_by_name(NULL, "channel-devices");
+	err = -ENODEV;
+	if (!cdev_node) {
+		printk(KERN_INFO "VIO: No channel-devices OBP node.\n");
+		goto out_release;
+	}
+
+	compat = mdesc_get_property(hp, root, "compatible", &len);
+	if (!compat) {
+		printk(KERN_ERR "VIO: Channel devices lacks compatible "
+		       "property\n");
+		goto out_release;
+	}
+	if (!of_find_in_proplist(compat, channel_devices_compat, len)) {
+		printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
+		       "compat entry.\n", channel_devices_compat);
+		goto out_release;
+	}
+
+	cfg_handle = mdesc_get_property(hp, root, cfg_handle_prop, NULL);
+	if (!cfg_handle) {
+		printk(KERN_ERR "VIO: Channel devices lacks %s property\n",
+		       cfg_handle_prop);
+		goto out_release;
+	}
+
+	cdev_cfg_handle = *cfg_handle;
+
+	root_vdev = vio_create_one(hp, root, NULL, NULL);
+	err = -ENODEV;
+	if (!root_vdev) {
+		printk(KERN_ERR "VIO: Could not create root device.\n");
+		goto out_release;
+	}
+
+	mdesc_register_notifier(&vio_device_notifier);
+	mdesc_register_notifier(&vio_ds_notifier);
+
+	mdesc_release(hp);
+
+	return err;
+
+out_release:
+	mdesc_release(hp);
+	return err;
+}
+
+postcore_initcall(vio_init);
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
new file mode 100644
index 0000000..7db5aab
--- /dev/null
+++ b/arch/sparc/kernel/viohs.c
@@ -0,0 +1,862 @@
+// SPDX-License-Identifier: GPL-2.0
+/* viohs.c: LDOM Virtual I/O handshake helper layer.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/slab.h>
+
+#include <asm/ldc.h>
+#include <asm/vio.h>
+
+int vio_ldc_send(struct vio_driver_state *vio, void *data, int len)
+{
+	int err, limit = 1000;
+
+	err = -EINVAL;
+	while (limit-- > 0) {
+		err = ldc_write(vio->lp, data, len);
+		if (!err || (err != -EAGAIN))
+			break;
+		udelay(1);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(vio_ldc_send);
+
+static int send_ctrl(struct vio_driver_state *vio,
+		     struct vio_msg_tag *tag, int len)
+{
+	tag->sid = vio_send_sid(vio);
+	return vio_ldc_send(vio, tag, len);
+}
+
+static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env)
+{
+	tag->type = type;
+	tag->stype = stype;
+	tag->stype_env = stype_env;
+}
+
+static int send_version(struct vio_driver_state *vio, u16 major, u16 minor)
+{
+	struct vio_ver_info pkt;
+
+	vio->_local_sid = (u32) sched_clock();
+
+	memset(&pkt, 0, sizeof(pkt));
+	init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO);
+	pkt.major = major;
+	pkt.minor = minor;
+	pkt.dev_class = vio->dev_class;
+
+	viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n",
+	       major, minor, vio->dev_class);
+
+	return send_ctrl(vio, &pkt.tag, sizeof(pkt));
+}
+
+static int start_handshake(struct vio_driver_state *vio)
+{
+	int err;
+
+	viodbg(HS, "START HANDSHAKE\n");
+
+	vio->hs_state = VIO_HS_INVALID;
+
+	err = send_version(vio,
+			   vio->ver_table[0].major,
+			   vio->ver_table[0].minor);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void flush_rx_dring(struct vio_driver_state *vio)
+{
+	struct vio_dring_state *dr;
+	u64 ident;
+
+	BUG_ON(!(vio->dr_state & VIO_DR_STATE_RXREG));
+
+	dr = &vio->drings[VIO_DRIVER_RX_RING];
+	ident = dr->ident;
+
+	BUG_ON(!vio->desc_buf);
+	kfree(vio->desc_buf);
+	vio->desc_buf = NULL;
+
+	memset(dr, 0, sizeof(*dr));
+	dr->ident = ident;
+}
+
+void vio_link_state_change(struct vio_driver_state *vio, int event)
+{
+	if (event == LDC_EVENT_UP) {
+		vio->hs_state = VIO_HS_INVALID;
+
+		switch (vio->dev_class) {
+		case VDEV_NETWORK:
+		case VDEV_NETWORK_SWITCH:
+			vio->dr_state = (VIO_DR_STATE_TXREQ |
+					 VIO_DR_STATE_RXREQ);
+			break;
+
+		case VDEV_DISK:
+			vio->dr_state = VIO_DR_STATE_TXREQ;
+			break;
+		case VDEV_DISK_SERVER:
+			vio->dr_state = VIO_DR_STATE_RXREQ;
+			break;
+		}
+		start_handshake(vio);
+	} else if (event == LDC_EVENT_RESET) {
+		vio->hs_state = VIO_HS_INVALID;
+
+		if (vio->dr_state & VIO_DR_STATE_RXREG)
+			flush_rx_dring(vio);
+
+		vio->dr_state = 0x00;
+		memset(&vio->ver, 0, sizeof(vio->ver));
+
+		ldc_disconnect(vio->lp);
+	}
+}
+EXPORT_SYMBOL(vio_link_state_change);
+
+static int handshake_failure(struct vio_driver_state *vio)
+{
+	struct vio_dring_state *dr;
+
+	/* XXX Put policy here...  Perhaps start a timer to fire
+	 * XXX in 100 ms, which will bring the link up and retry
+	 * XXX the handshake.
+	 */
+
+	viodbg(HS, "HANDSHAKE FAILURE\n");
+
+	vio->dr_state &= ~(VIO_DR_STATE_TXREG |
+			   VIO_DR_STATE_RXREG);
+
+	dr = &vio->drings[VIO_DRIVER_RX_RING];
+	memset(dr, 0, sizeof(*dr));
+
+	kfree(vio->desc_buf);
+	vio->desc_buf = NULL;
+	vio->desc_buf_len = 0;
+
+	vio->hs_state = VIO_HS_INVALID;
+
+	return -ECONNRESET;
+}
+
+static int process_unknown(struct vio_driver_state *vio, void *arg)
+{
+	struct vio_msg_tag *pkt = arg;
+
+	viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n",
+	       pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
+
+	printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n",
+	       vio->vdev->channel_id);
+
+	ldc_disconnect(vio->lp);
+
+	return -ECONNRESET;
+}
+
+static int send_dreg(struct vio_driver_state *vio)
+{
+	struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING];
+	union {
+		struct vio_dring_register pkt;
+		char all[sizeof(struct vio_dring_register) +
+			 (sizeof(struct ldc_trans_cookie) *
+			  VIO_MAX_RING_COOKIES)];
+	} u;
+	size_t bytes = sizeof(struct vio_dring_register) +
+		       (sizeof(struct ldc_trans_cookie) *
+			dr->ncookies);
+	int i;
+
+	if (WARN_ON(bytes > sizeof(u)))
+		return -EINVAL;
+
+	memset(&u, 0, bytes);
+	init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG);
+	u.pkt.dring_ident = 0;
+	u.pkt.num_descr = dr->num_entries;
+	u.pkt.descr_size = dr->entry_size;
+	u.pkt.options = VIO_TX_DRING;
+	u.pkt.num_cookies = dr->ncookies;
+
+	viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] "
+	       "ncookies[%u]\n",
+	       u.pkt.num_descr, u.pkt.descr_size, u.pkt.options,
+	       u.pkt.num_cookies);
+
+	for (i = 0; i < dr->ncookies; i++) {
+		u.pkt.cookies[i] = dr->cookies[i];
+
+		viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
+		       i,
+		       (unsigned long long) u.pkt.cookies[i].cookie_addr,
+		       (unsigned long long) u.pkt.cookies[i].cookie_size);
+	}
+
+	return send_ctrl(vio, &u.pkt.tag, bytes);
+}
+
+static int send_rdx(struct vio_driver_state *vio)
+{
+	struct vio_rdx pkt;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX);
+
+	viodbg(HS, "SEND RDX INFO\n");
+
+	return send_ctrl(vio, &pkt.tag, sizeof(pkt));
+}
+
+static int send_attr(struct vio_driver_state *vio)
+{
+	if (!vio->ops)
+		return -EINVAL;
+
+	return vio->ops->send_attr(vio);
+}
+
+static struct vio_version *find_by_major(struct vio_driver_state *vio,
+					 u16 major)
+{
+	struct vio_version *ret = NULL;
+	int i;
+
+	for (i = 0; i < vio->ver_table_entries; i++) {
+		struct vio_version *v = &vio->ver_table[i];
+		if (v->major <= major) {
+			ret = v;
+			break;
+		}
+	}
+	return ret;
+}
+
+static int process_ver_info(struct vio_driver_state *vio,
+			    struct vio_ver_info *pkt)
+{
+	struct vio_version *vap;
+	int err;
+
+	viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n",
+	       pkt->major, pkt->minor, pkt->dev_class);
+
+	if (vio->hs_state != VIO_HS_INVALID) {
+		/* XXX Perhaps invoke start_handshake? XXX */
+		memset(&vio->ver, 0, sizeof(vio->ver));
+		vio->hs_state = VIO_HS_INVALID;
+	}
+
+	vap = find_by_major(vio, pkt->major);
+
+	vio->_peer_sid = pkt->tag.sid;
+
+	if (!vap) {
+		pkt->tag.stype = VIO_SUBTYPE_NACK;
+		pkt->major = 0;
+		pkt->minor = 0;
+		viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n");
+		err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+	} else if (vap->major != pkt->major) {
+		pkt->tag.stype = VIO_SUBTYPE_NACK;
+		pkt->major = vap->major;
+		pkt->minor = vap->minor;
+		viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n",
+		       pkt->major, pkt->minor);
+		err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+	} else {
+		struct vio_version ver = {
+			.major = pkt->major,
+			.minor = pkt->minor,
+		};
+		if (ver.minor > vap->minor)
+			ver.minor = vap->minor;
+		pkt->minor = ver.minor;
+		pkt->tag.stype = VIO_SUBTYPE_ACK;
+		pkt->dev_class = vio->dev_class;
+		viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n",
+		       pkt->major, pkt->minor);
+		err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+		if (err > 0) {
+			vio->ver = ver;
+			vio->hs_state = VIO_HS_GOTVERS;
+		}
+	}
+	if (err < 0)
+		return handshake_failure(vio);
+
+	return 0;
+}
+
+static int process_ver_ack(struct vio_driver_state *vio,
+			   struct vio_ver_info *pkt)
+{
+	viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n",
+	       pkt->major, pkt->minor, pkt->dev_class);
+
+	if (vio->hs_state & VIO_HS_GOTVERS) {
+		if (vio->ver.major != pkt->major ||
+		    vio->ver.minor != pkt->minor) {
+			pkt->tag.stype = VIO_SUBTYPE_NACK;
+			(void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+			return handshake_failure(vio);
+		}
+	} else {
+		vio->ver.major = pkt->major;
+		vio->ver.minor = pkt->minor;
+		vio->hs_state = VIO_HS_GOTVERS;
+	}
+
+	switch (vio->dev_class) {
+	case VDEV_NETWORK:
+	case VDEV_DISK:
+		if (send_attr(vio) < 0)
+			return handshake_failure(vio);
+		break;
+
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int process_ver_nack(struct vio_driver_state *vio,
+			    struct vio_ver_info *pkt)
+{
+	struct vio_version *nver;
+
+	viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n",
+	       pkt->major, pkt->minor, pkt->dev_class);
+
+	if (pkt->major == 0 && pkt->minor == 0)
+		return handshake_failure(vio);
+	nver = find_by_major(vio, pkt->major);
+	if (!nver)
+		return handshake_failure(vio);
+
+	if (send_version(vio, nver->major, nver->minor) < 0)
+		return handshake_failure(vio);
+
+	return 0;
+}
+
+static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt)
+{
+	switch (pkt->tag.stype) {
+	case VIO_SUBTYPE_INFO:
+		return process_ver_info(vio, pkt);
+
+	case VIO_SUBTYPE_ACK:
+		return process_ver_ack(vio, pkt);
+
+	case VIO_SUBTYPE_NACK:
+		return process_ver_nack(vio, pkt);
+
+	default:
+		return handshake_failure(vio);
+	}
+}
+
+static int process_attr(struct vio_driver_state *vio, void *pkt)
+{
+	int err;
+
+	if (!(vio->hs_state & VIO_HS_GOTVERS))
+		return handshake_failure(vio);
+
+	if (!vio->ops)
+		return 0;
+
+	err = vio->ops->handle_attr(vio, pkt);
+	if (err < 0) {
+		return handshake_failure(vio);
+	} else {
+		vio->hs_state |= VIO_HS_GOT_ATTR;
+
+		if ((vio->dr_state & VIO_DR_STATE_TXREQ) &&
+		    !(vio->hs_state & VIO_HS_SENT_DREG)) {
+			if (send_dreg(vio) < 0)
+				return handshake_failure(vio);
+
+			vio->hs_state |= VIO_HS_SENT_DREG;
+		}
+	}
+
+	return 0;
+}
+
+static int all_drings_registered(struct vio_driver_state *vio)
+{
+	int need_rx, need_tx;
+
+	need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ);
+	need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ);
+
+	if (need_rx &&
+	    !(vio->dr_state & VIO_DR_STATE_RXREG))
+		return 0;
+
+	if (need_tx &&
+	    !(vio->dr_state & VIO_DR_STATE_TXREG))
+		return 0;
+
+	return 1;
+}
+
+static int process_dreg_info(struct vio_driver_state *vio,
+			     struct vio_dring_register *pkt)
+{
+	struct vio_dring_state *dr;
+	int i, len;
+
+	viodbg(HS, "GOT DRING_REG INFO ident[%llx] "
+	       "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+	       (unsigned long long) pkt->dring_ident,
+	       pkt->num_descr, pkt->descr_size, pkt->options,
+	       pkt->num_cookies);
+
+	if (!(vio->dr_state & VIO_DR_STATE_RXREQ))
+		goto send_nack;
+
+	if (vio->dr_state & VIO_DR_STATE_RXREG)
+		goto send_nack;
+
+	/* v1.6 and higher, ACK with desired, supported mode, or NACK */
+	if (vio_version_after_eq(vio, 1, 6)) {
+		if (!(pkt->options & VIO_TX_DRING))
+			goto send_nack;
+		pkt->options = VIO_TX_DRING;
+	}
+
+	BUG_ON(vio->desc_buf);
+
+	vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
+	if (!vio->desc_buf)
+		goto send_nack;
+
+	vio->desc_buf_len = pkt->descr_size;
+
+	dr = &vio->drings[VIO_DRIVER_RX_RING];
+
+	dr->num_entries = pkt->num_descr;
+	dr->entry_size = pkt->descr_size;
+	dr->ncookies = pkt->num_cookies;
+	for (i = 0; i < dr->ncookies; i++) {
+		dr->cookies[i] = pkt->cookies[i];
+
+		viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
+		       i,
+		       (unsigned long long)
+		       pkt->cookies[i].cookie_addr,
+		       (unsigned long long)
+		       pkt->cookies[i].cookie_size);
+	}
+
+	pkt->tag.stype = VIO_SUBTYPE_ACK;
+	pkt->dring_ident = ++dr->ident;
+
+	viodbg(HS, "SEND DRING_REG ACK ident[%llx] "
+	       "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+	       (unsigned long long) pkt->dring_ident,
+	       pkt->num_descr, pkt->descr_size, pkt->options,
+	       pkt->num_cookies);
+
+	len = (sizeof(*pkt) +
+	       (dr->ncookies * sizeof(struct ldc_trans_cookie)));
+	if (send_ctrl(vio, &pkt->tag, len) < 0)
+		goto send_nack;
+
+	vio->dr_state |= VIO_DR_STATE_RXREG;
+
+	return 0;
+
+send_nack:
+	pkt->tag.stype = VIO_SUBTYPE_NACK;
+	viodbg(HS, "SEND DRING_REG NACK\n");
+	(void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
+
+	return handshake_failure(vio);
+}
+
+static int process_dreg_ack(struct vio_driver_state *vio,
+			    struct vio_dring_register *pkt)
+{
+	struct vio_dring_state *dr;
+
+	viodbg(HS, "GOT DRING_REG ACK ident[%llx] "
+	       "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+	       (unsigned long long) pkt->dring_ident,
+	       pkt->num_descr, pkt->descr_size, pkt->options,
+	       pkt->num_cookies);
+
+	dr = &vio->drings[VIO_DRIVER_TX_RING];
+
+	if (!(vio->dr_state & VIO_DR_STATE_TXREQ))
+		return handshake_failure(vio);
+
+	dr->ident = pkt->dring_ident;
+	vio->dr_state |= VIO_DR_STATE_TXREG;
+
+	if (all_drings_registered(vio)) {
+		if (send_rdx(vio) < 0)
+			return handshake_failure(vio);
+		vio->hs_state = VIO_HS_SENT_RDX;
+	}
+	return 0;
+}
+
+static int process_dreg_nack(struct vio_driver_state *vio,
+			     struct vio_dring_register *pkt)
+{
+	viodbg(HS, "GOT DRING_REG NACK ident[%llx] "
+	       "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
+	       (unsigned long long) pkt->dring_ident,
+	       pkt->num_descr, pkt->descr_size, pkt->options,
+	       pkt->num_cookies);
+
+	return handshake_failure(vio);
+}
+
+static int process_dreg(struct vio_driver_state *vio,
+			struct vio_dring_register *pkt)
+{
+	if (!(vio->hs_state & VIO_HS_GOTVERS))
+		return handshake_failure(vio);
+
+	switch (pkt->tag.stype) {
+	case VIO_SUBTYPE_INFO:
+		return process_dreg_info(vio, pkt);
+
+	case VIO_SUBTYPE_ACK:
+		return process_dreg_ack(vio, pkt);
+
+	case VIO_SUBTYPE_NACK:
+		return process_dreg_nack(vio, pkt);
+
+	default:
+		return handshake_failure(vio);
+	}
+}
+
+static int process_dunreg(struct vio_driver_state *vio,
+			  struct vio_dring_unregister *pkt)
+{
+	struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING];
+
+	viodbg(HS, "GOT DRING_UNREG\n");
+
+	if (pkt->dring_ident != dr->ident)
+		return 0;
+
+	vio->dr_state &= ~VIO_DR_STATE_RXREG;
+
+	memset(dr, 0, sizeof(*dr));
+
+	kfree(vio->desc_buf);
+	vio->desc_buf = NULL;
+	vio->desc_buf_len = 0;
+
+	return 0;
+}
+
+static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+	viodbg(HS, "GOT RDX INFO\n");
+
+	pkt->tag.stype = VIO_SUBTYPE_ACK;
+	viodbg(HS, "SEND RDX ACK\n");
+	if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0)
+		return handshake_failure(vio);
+
+	vio->hs_state |= VIO_HS_SENT_RDX_ACK;
+	return 0;
+}
+
+static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+	viodbg(HS, "GOT RDX ACK\n");
+
+	if (!(vio->hs_state & VIO_HS_SENT_RDX))
+		return handshake_failure(vio);
+
+	vio->hs_state |= VIO_HS_GOT_RDX_ACK;
+	return 0;
+}
+
+static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+	viodbg(HS, "GOT RDX NACK\n");
+
+	return handshake_failure(vio);
+}
+
+static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt)
+{
+	if (!all_drings_registered(vio))
+		handshake_failure(vio);
+
+	switch (pkt->tag.stype) {
+	case VIO_SUBTYPE_INFO:
+		return process_rdx_info(vio, pkt);
+
+	case VIO_SUBTYPE_ACK:
+		return process_rdx_ack(vio, pkt);
+
+	case VIO_SUBTYPE_NACK:
+		return process_rdx_nack(vio, pkt);
+
+	default:
+		return handshake_failure(vio);
+	}
+}
+
+int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt)
+{
+	struct vio_msg_tag *tag = pkt;
+	u8 prev_state = vio->hs_state;
+	int err;
+
+	switch (tag->stype_env) {
+	case VIO_VER_INFO:
+		err = process_ver(vio, pkt);
+		break;
+
+	case VIO_ATTR_INFO:
+		err = process_attr(vio, pkt);
+		break;
+
+	case VIO_DRING_REG:
+		err = process_dreg(vio, pkt);
+		break;
+
+	case VIO_DRING_UNREG:
+		err = process_dunreg(vio, pkt);
+		break;
+
+	case VIO_RDX:
+		err = process_rdx(vio, pkt);
+		break;
+
+	default:
+		err = process_unknown(vio, pkt);
+		break;
+	}
+
+	if (!err &&
+	    vio->hs_state != prev_state &&
+	    (vio->hs_state & VIO_HS_COMPLETE)) {
+		if (vio->ops)
+			vio->ops->handshake_complete(vio);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(vio_control_pkt_engine);
+
+void vio_conn_reset(struct vio_driver_state *vio)
+{
+}
+EXPORT_SYMBOL(vio_conn_reset);
+
+/* The issue is that the Solaris virtual disk server just mirrors the
+ * SID values it gets from the client peer.  So we work around that
+ * here in vio_{validate,send}_sid() so that the drivers don't need
+ * to be aware of this crap.
+ */
+int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp)
+{
+	u32 sid;
+
+	/* Always let VERSION+INFO packets through unchecked, they
+	 * define the new SID.
+	 */
+	if (tp->type == VIO_TYPE_CTRL &&
+	    tp->stype == VIO_SUBTYPE_INFO &&
+	    tp->stype_env == VIO_VER_INFO)
+		return 0;
+
+	/* Ok, now figure out which SID to use.  */
+	switch (vio->dev_class) {
+	case VDEV_NETWORK:
+	case VDEV_NETWORK_SWITCH:
+	case VDEV_DISK_SERVER:
+	default:
+		sid = vio->_peer_sid;
+		break;
+
+	case VDEV_DISK:
+		sid = vio->_local_sid;
+		break;
+	}
+
+	if (sid == tp->sid)
+		return 0;
+	viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n",
+	       tp->sid, vio->_peer_sid, vio->_local_sid);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(vio_validate_sid);
+
+u32 vio_send_sid(struct vio_driver_state *vio)
+{
+	switch (vio->dev_class) {
+	case VDEV_NETWORK:
+	case VDEV_NETWORK_SWITCH:
+	case VDEV_DISK:
+	default:
+		return vio->_local_sid;
+
+	case VDEV_DISK_SERVER:
+		return vio->_peer_sid;
+	}
+}
+EXPORT_SYMBOL(vio_send_sid);
+
+int vio_ldc_alloc(struct vio_driver_state *vio,
+			 struct ldc_channel_config *base_cfg,
+			 void *event_arg)
+{
+	struct ldc_channel_config cfg = *base_cfg;
+	struct ldc_channel *lp;
+
+	cfg.tx_irq = vio->vdev->tx_irq;
+	cfg.rx_irq = vio->vdev->rx_irq;
+
+	lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name);
+	if (IS_ERR(lp))
+		return PTR_ERR(lp);
+
+	vio->lp = lp;
+
+	return 0;
+}
+EXPORT_SYMBOL(vio_ldc_alloc);
+
+void vio_ldc_free(struct vio_driver_state *vio)
+{
+	ldc_free(vio->lp);
+	vio->lp = NULL;
+
+	kfree(vio->desc_buf);
+	vio->desc_buf = NULL;
+	vio->desc_buf_len = 0;
+}
+EXPORT_SYMBOL(vio_ldc_free);
+
+void vio_port_up(struct vio_driver_state *vio)
+{
+	unsigned long flags;
+	int err, state;
+
+	spin_lock_irqsave(&vio->lock, flags);
+
+	state = ldc_state(vio->lp);
+
+	err = 0;
+	if (state == LDC_STATE_INIT) {
+		err = ldc_bind(vio->lp);
+		if (err)
+			printk(KERN_WARNING "%s: Port %lu bind failed, "
+			       "err=%d\n",
+			       vio->name, vio->vdev->channel_id, err);
+	}
+
+	if (!err) {
+		if (ldc_mode(vio->lp) == LDC_MODE_RAW)
+			ldc_set_state(vio->lp, LDC_STATE_CONNECTED);
+		else
+			err = ldc_connect(vio->lp);
+
+		if (err)
+			printk(KERN_WARNING "%s: Port %lu connect failed, "
+			       "err=%d\n",
+			       vio->name, vio->vdev->channel_id, err);
+	}
+	if (err) {
+		unsigned long expires = jiffies + HZ;
+
+		expires = round_jiffies(expires);
+		mod_timer(&vio->timer, expires);
+	}
+
+	spin_unlock_irqrestore(&vio->lock, flags);
+}
+EXPORT_SYMBOL(vio_port_up);
+
+static void vio_port_timer(struct timer_list *t)
+{
+	struct vio_driver_state *vio = from_timer(vio, t, timer);
+
+	vio_port_up(vio);
+}
+
+int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
+		    u8 dev_class, struct vio_version *ver_table,
+		    int ver_table_size, struct vio_driver_ops *ops,
+		    char *name)
+{
+	switch (dev_class) {
+	case VDEV_NETWORK:
+	case VDEV_NETWORK_SWITCH:
+	case VDEV_DISK:
+	case VDEV_DISK_SERVER:
+	case VDEV_CONSOLE_CON:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (dev_class == VDEV_NETWORK ||
+	    dev_class == VDEV_NETWORK_SWITCH ||
+	    dev_class == VDEV_DISK ||
+	    dev_class == VDEV_DISK_SERVER) {
+		if (!ops || !ops->send_attr || !ops->handle_attr ||
+		    !ops->handshake_complete)
+			return -EINVAL;
+	}
+
+	if (!ver_table || ver_table_size < 0)
+		return -EINVAL;
+
+	if (!name)
+		return -EINVAL;
+
+	spin_lock_init(&vio->lock);
+
+	vio->name = name;
+
+	vio->dev_class = dev_class;
+	vio->vdev = vdev;
+
+	vio->ver_table = ver_table;
+	vio->ver_table_entries = ver_table_size;
+
+	vio->ops = ops;
+
+	timer_setup(&vio->timer, vio_port_timer, 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(vio_driver_init);
diff --git a/arch/sparc/kernel/visemul.c b/arch/sparc/kernel/visemul.c
new file mode 100644
index 0000000..64ed80e
--- /dev/null
+++ b/arch/sparc/kernel/visemul.c
@@ -0,0 +1,899 @@
+// SPDX-License-Identifier: GPL-2.0
+/* visemul.c: Emulation of VIS instructions.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/thread_info.h>
+#include <linux/perf_event.h>
+
+#include <asm/ptrace.h>
+#include <asm/pstate.h>
+#include <asm/fpumacro.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+/* OPF field of various VIS instructions.  */
+
+/* 000111011 - four 16-bit packs  */
+#define FPACK16_OPF	0x03b
+
+/* 000111010 - two 32-bit packs  */
+#define FPACK32_OPF	0x03a
+
+/* 000111101 - four 16-bit packs  */
+#define FPACKFIX_OPF	0x03d
+
+/* 001001101 - four 16-bit expands  */
+#define FEXPAND_OPF	0x04d
+
+/* 001001011 - two 32-bit merges */
+#define FPMERGE_OPF	0x04b
+
+/* 000110001 - 8-by-16-bit partitioned product  */
+#define FMUL8x16_OPF	0x031
+
+/* 000110011 - 8-by-16-bit upper alpha partitioned product  */
+#define FMUL8x16AU_OPF	0x033
+
+/* 000110101 - 8-by-16-bit lower alpha partitioned product  */
+#define FMUL8x16AL_OPF	0x035
+
+/* 000110110 - upper 8-by-16-bit partitioned product  */
+#define FMUL8SUx16_OPF	0x036
+
+/* 000110111 - lower 8-by-16-bit partitioned product  */
+#define FMUL8ULx16_OPF	0x037
+
+/* 000111000 - upper 8-by-16-bit partitioned product  */
+#define FMULD8SUx16_OPF	0x038
+
+/* 000111001 - lower unsigned 8-by-16-bit partitioned product  */
+#define FMULD8ULx16_OPF	0x039
+
+/* 000101000 - four 16-bit compare; set rd if src1 > src2  */
+#define FCMPGT16_OPF	0x028
+
+/* 000101100 - two 32-bit compare; set rd if src1 > src2  */
+#define FCMPGT32_OPF	0x02c
+
+/* 000100000 - four 16-bit compare; set rd if src1 <= src2  */
+#define FCMPLE16_OPF	0x020
+
+/* 000100100 - two 32-bit compare; set rd if src1 <= src2  */
+#define FCMPLE32_OPF	0x024
+
+/* 000100010 - four 16-bit compare; set rd if src1 != src2  */
+#define FCMPNE16_OPF	0x022
+
+/* 000100110 - two 32-bit compare; set rd if src1 != src2  */
+#define FCMPNE32_OPF	0x026
+
+/* 000101010 - four 16-bit compare; set rd if src1 == src2  */
+#define FCMPEQ16_OPF	0x02a
+
+/* 000101110 - two 32-bit compare; set rd if src1 == src2  */
+#define FCMPEQ32_OPF	0x02e
+
+/* 000000000 - Eight 8-bit edge boundary processing  */
+#define EDGE8_OPF	0x000
+
+/* 000000001 - Eight 8-bit edge boundary processing, no CC */
+#define EDGE8N_OPF	0x001
+
+/* 000000010 - Eight 8-bit edge boundary processing, little-endian  */
+#define EDGE8L_OPF	0x002
+
+/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC  */
+#define EDGE8LN_OPF	0x003
+
+/* 000000100 - Four 16-bit edge boundary processing  */
+#define EDGE16_OPF	0x004
+
+/* 000000101 - Four 16-bit edge boundary processing, no CC  */
+#define EDGE16N_OPF	0x005
+
+/* 000000110 - Four 16-bit edge boundary processing, little-endian  */
+#define EDGE16L_OPF	0x006
+
+/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC  */
+#define EDGE16LN_OPF	0x007
+
+/* 000001000 - Two 32-bit edge boundary processing  */
+#define EDGE32_OPF	0x008
+
+/* 000001001 - Two 32-bit edge boundary processing, no CC  */
+#define EDGE32N_OPF	0x009
+
+/* 000001010 - Two 32-bit edge boundary processing, little-endian  */
+#define EDGE32L_OPF	0x00a
+
+/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC  */
+#define EDGE32LN_OPF	0x00b
+
+/* 000111110 - distance between 8 8-bit components  */
+#define PDIST_OPF	0x03e
+
+/* 000010000 - convert 8-bit 3-D address to blocked byte address  */
+#define ARRAY8_OPF	0x010
+
+/* 000010010 - convert 16-bit 3-D address to blocked byte address  */
+#define ARRAY16_OPF	0x012
+
+/* 000010100 - convert 32-bit 3-D address to blocked byte address  */
+#define ARRAY32_OPF	0x014
+
+/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE  */
+#define BMASK_OPF	0x019
+
+/* 001001100 - Permute bytes as specified by GSR.MASK  */
+#define BSHUFFLE_OPF	0x04c
+
+#define VIS_OPF_SHIFT	5
+#define VIS_OPF_MASK	(0x1ff << VIS_OPF_SHIFT)
+
+#define RS1(INSN)	(((INSN) >> 14) & 0x1f)
+#define RS2(INSN)	(((INSN) >>  0) & 0x1f)
+#define RD(INSN)	(((INSN) >> 25) & 0x1f)
+
+static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
+				       unsigned int rd, int from_kernel)
+{
+	if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
+		if (from_kernel != 0)
+			__asm__ __volatile__("flushw");
+		else
+			flushw_user();
+	}
+}
+
+static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
+{
+	unsigned long value, fp;
+	
+	if (reg < 16)
+		return (!reg ? 0 : regs->u_regs[reg]);
+
+	fp = regs->u_regs[UREG_FP];
+
+	if (regs->tstate & TSTATE_PRIV) {
+		struct reg_window *win;
+		win = (struct reg_window *)(fp + STACK_BIAS);
+		value = win->locals[reg - 16];
+	} else if (!test_thread_64bit_stack(fp)) {
+		struct reg_window32 __user *win32;
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
+		get_user(value, &win32->locals[reg - 16]);
+	} else {
+		struct reg_window __user *win;
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
+		get_user(value, &win->locals[reg - 16]);
+	}
+	return value;
+}
+
+static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
+							  struct pt_regs *regs)
+{
+	unsigned long fp = regs->u_regs[UREG_FP];
+
+	BUG_ON(reg < 16);
+	BUG_ON(regs->tstate & TSTATE_PRIV);
+
+	if (!test_thread_64bit_stack(fp)) {
+		struct reg_window32 __user *win32;
+		win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp));
+		return (unsigned long __user *)&win32->locals[reg - 16];
+	} else {
+		struct reg_window __user *win;
+		win = (struct reg_window __user *)(fp + STACK_BIAS);
+		return &win->locals[reg - 16];
+	}
+}
+
+static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
+						   struct pt_regs *regs)
+{
+	BUG_ON(reg >= 16);
+	BUG_ON(regs->tstate & TSTATE_PRIV);
+
+	return &regs->u_regs[reg];
+}
+
+static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
+{
+	if (rd < 16) {
+		unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
+
+		*rd_kern = val;
+	} else {
+		unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
+
+		if (!test_thread_64bit_stack(regs->u_regs[UREG_FP]))
+			__put_user((u32)val, (u32 __user *)rd_user);
+		else
+			__put_user(val, rd_user);
+	}
+}
+
+static inline unsigned long fpd_regval(struct fpustate *f,
+				       unsigned int insn_regnum)
+{
+	insn_regnum = (((insn_regnum & 1) << 5) |
+		       (insn_regnum & 0x1e));
+
+	return *(unsigned long *) &f->regs[insn_regnum];
+}
+
+static inline unsigned long *fpd_regaddr(struct fpustate *f,
+					 unsigned int insn_regnum)
+{
+	insn_regnum = (((insn_regnum & 1) << 5) |
+		       (insn_regnum & 0x1e));
+
+	return (unsigned long *) &f->regs[insn_regnum];
+}
+
+static inline unsigned int fps_regval(struct fpustate *f,
+				      unsigned int insn_regnum)
+{
+	return f->regs[insn_regnum];
+}
+
+static inline unsigned int *fps_regaddr(struct fpustate *f,
+					unsigned int insn_regnum)
+{
+	return &f->regs[insn_regnum];
+}
+
+struct edge_tab {
+	u16 left, right;
+};
+static struct edge_tab edge8_tab[8] = {
+	{ 0xff, 0x80 },
+	{ 0x7f, 0xc0 },
+	{ 0x3f, 0xe0 },
+	{ 0x1f, 0xf0 },
+	{ 0x0f, 0xf8 },
+	{ 0x07, 0xfc },
+	{ 0x03, 0xfe },
+	{ 0x01, 0xff },
+};
+static struct edge_tab edge8_tab_l[8] = {
+	{ 0xff, 0x01 },
+	{ 0xfe, 0x03 },
+	{ 0xfc, 0x07 },
+	{ 0xf8, 0x0f },
+	{ 0xf0, 0x1f },
+	{ 0xe0, 0x3f },
+	{ 0xc0, 0x7f },
+	{ 0x80, 0xff },
+};
+static struct edge_tab edge16_tab[4] = {
+	{ 0xf, 0x8 },
+	{ 0x7, 0xc },
+	{ 0x3, 0xe },
+	{ 0x1, 0xf },
+};
+static struct edge_tab edge16_tab_l[4] = {
+	{ 0xf, 0x1 },
+	{ 0xe, 0x3 },
+	{ 0xc, 0x7 },
+	{ 0x8, 0xf },
+};
+static struct edge_tab edge32_tab[2] = {
+	{ 0x3, 0x2 },
+	{ 0x1, 0x3 },
+};
+static struct edge_tab edge32_tab_l[2] = {
+	{ 0x3, 0x1 },
+	{ 0x2, 0x3 },
+};
+
+static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+	unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
+	u16 left, right;
+
+	maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
+	orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
+	orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
+
+	if (test_thread_flag(TIF_32BIT)) {
+		rs1 = rs1 & 0xffffffff;
+		rs2 = rs2 & 0xffffffff;
+	}
+	switch (opf) {
+	default:
+	case EDGE8_OPF:
+	case EDGE8N_OPF:
+		left = edge8_tab[rs1 & 0x7].left;
+		right = edge8_tab[rs2 & 0x7].right;
+		break;
+	case EDGE8L_OPF:
+	case EDGE8LN_OPF:
+		left = edge8_tab_l[rs1 & 0x7].left;
+		right = edge8_tab_l[rs2 & 0x7].right;
+		break;
+
+	case EDGE16_OPF:
+	case EDGE16N_OPF:
+		left = edge16_tab[(rs1 >> 1) & 0x3].left;
+		right = edge16_tab[(rs2 >> 1) & 0x3].right;
+		break;
+
+	case EDGE16L_OPF:
+	case EDGE16LN_OPF:
+		left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
+		right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
+		break;
+
+	case EDGE32_OPF:
+	case EDGE32N_OPF:
+		left = edge32_tab[(rs1 >> 2) & 0x1].left;
+		right = edge32_tab[(rs2 >> 2) & 0x1].right;
+		break;
+
+	case EDGE32L_OPF:
+	case EDGE32LN_OPF:
+		left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
+		right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
+		break;
+	}
+
+	if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
+		rd_val = right & left;
+	else
+		rd_val = left;
+
+	store_reg(regs, rd_val, RD(insn));
+
+	switch (opf) {
+	case EDGE8_OPF:
+	case EDGE8L_OPF:
+	case EDGE16_OPF:
+	case EDGE16L_OPF:
+	case EDGE32_OPF:
+	case EDGE32L_OPF: {
+		unsigned long ccr, tstate;
+
+		__asm__ __volatile__("subcc	%1, %2, %%g0\n\t"
+				     "rd	%%ccr, %0"
+				     : "=r" (ccr)
+				     : "r" (orig_rs1), "r" (orig_rs2)
+				     : "cc");
+		tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
+		regs->tstate = tstate | (ccr << 32UL);
+	}
+	}
+}
+
+static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+	unsigned long rs1, rs2, rd_val;
+	unsigned int bits, bits_mask;
+
+	maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
+	rs1 = fetch_reg(RS1(insn), regs);
+	rs2 = fetch_reg(RS2(insn), regs);
+
+	bits = (rs2 > 5 ? 5 : rs2);
+	bits_mask = (1UL << bits) - 1UL;
+
+	rd_val = ((((rs1 >> 11) & 0x3) <<  0) |
+		  (((rs1 >> 33) & 0x3) <<  2) |
+		  (((rs1 >> 55) & 0x1) <<  4) |
+		  (((rs1 >> 13) & 0xf) <<  5) |
+		  (((rs1 >> 35) & 0xf) <<  9) |
+		  (((rs1 >> 56) & 0xf) << 13) |
+		  (((rs1 >> 17) & bits_mask) << 17) |
+		  (((rs1 >> 39) & bits_mask) << (17 + bits)) |
+		  (((rs1 >> 60) & 0xf)       << (17 + (2*bits))));
+
+	switch (opf) {
+	case ARRAY16_OPF:
+		rd_val <<= 1;
+		break;
+
+	case ARRAY32_OPF:
+		rd_val <<= 2;
+	}
+
+	store_reg(regs, rd_val, RD(insn));
+}
+
+static void bmask(struct pt_regs *regs, unsigned int insn)
+{
+	unsigned long rs1, rs2, rd_val, gsr;
+
+	maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
+	rs1 = fetch_reg(RS1(insn), regs);
+	rs2 = fetch_reg(RS2(insn), regs);
+	rd_val = rs1 + rs2;
+
+	store_reg(regs, rd_val, RD(insn));
+
+	gsr = current_thread_info()->gsr[0] & 0xffffffff;
+	gsr |= rd_val << 32UL;
+	current_thread_info()->gsr[0] = gsr;
+}
+
+static void bshuffle(struct pt_regs *regs, unsigned int insn)
+{
+	struct fpustate *f = FPUSTATE;
+	unsigned long rs1, rs2, rd_val;
+	unsigned long bmask, i;
+
+	bmask = current_thread_info()->gsr[0] >> 32UL;
+
+	rs1 = fpd_regval(f, RS1(insn));
+	rs2 = fpd_regval(f, RS2(insn));
+
+	rd_val = 0UL;
+	for (i = 0; i < 8; i++) {
+		unsigned long which = (bmask >> (i * 4)) & 0xf;
+		unsigned long byte;
+
+		if (which < 8)
+			byte = (rs1 >> (which * 8)) & 0xff;
+		else
+			byte = (rs2 >> ((which-8)*8)) & 0xff;
+		rd_val |= (byte << (i * 8));
+	}
+
+	*fpd_regaddr(f, RD(insn)) = rd_val;
+}
+
+static void pdist(struct pt_regs *regs, unsigned int insn)
+{
+	struct fpustate *f = FPUSTATE;
+	unsigned long rs1, rs2, *rd, rd_val;
+	unsigned long i;
+
+	rs1 = fpd_regval(f, RS1(insn));
+	rs2 = fpd_regval(f, RS2(insn));
+	rd = fpd_regaddr(f, RD(insn));
+
+	rd_val = *rd;
+
+	for (i = 0; i < 8; i++) {
+		s16 s1, s2;
+
+		s1 = (rs1 >> (56 - (i * 8))) & 0xff;
+		s2 = (rs2 >> (56 - (i * 8))) & 0xff;
+
+		/* Absolute value of difference. */
+		s1 -= s2;
+		if (s1 < 0)
+			s1 = ~s1 + 1;
+
+		rd_val += s1;
+	}
+
+	*rd = rd_val;
+}
+
+static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+	struct fpustate *f = FPUSTATE;
+	unsigned long rs1, rs2, gsr, scale, rd_val;
+
+	gsr = current_thread_info()->gsr[0];
+	scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
+	switch (opf) {
+	case FPACK16_OPF: {
+		unsigned long byte;
+
+		rs2 = fpd_regval(f, RS2(insn));
+		rd_val = 0;
+		for (byte = 0; byte < 4; byte++) {
+			unsigned int val;
+			s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
+			int scaled = src << scale;
+			int from_fixed = scaled >> 7;
+
+			val = ((from_fixed < 0) ?
+			       0 :
+			       (from_fixed > 255) ?
+			       255 : from_fixed);
+
+			rd_val |= (val << (8 * byte));
+		}
+		*fps_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+
+	case FPACK32_OPF: {
+		unsigned long word;
+
+		rs1 = fpd_regval(f, RS1(insn));
+		rs2 = fpd_regval(f, RS2(insn));
+		rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
+		for (word = 0; word < 2; word++) {
+			unsigned long val;
+			s32 src = (rs2 >> (word * 32UL));
+			s64 scaled = src << scale;
+			s64 from_fixed = scaled >> 23;
+
+			val = ((from_fixed < 0) ?
+			       0 :
+			       (from_fixed > 255) ?
+			       255 : from_fixed);
+
+			rd_val |= (val << (32 * word));
+		}
+		*fpd_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+
+	case FPACKFIX_OPF: {
+		unsigned long word;
+
+		rs2 = fpd_regval(f, RS2(insn));
+
+		rd_val = 0;
+		for (word = 0; word < 2; word++) {
+			long val;
+			s32 src = (rs2 >> (word * 32UL));
+			s64 scaled = src << scale;
+			s64 from_fixed = scaled >> 16;
+
+			val = ((from_fixed < -32768) ?
+			       -32768 :
+			       (from_fixed > 32767) ?
+			       32767 : from_fixed);
+
+			rd_val |= ((val & 0xffff) << (word * 16));
+		}
+		*fps_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+
+	case FEXPAND_OPF: {
+		unsigned long byte;
+
+		rs2 = fps_regval(f, RS2(insn));
+
+		rd_val = 0;
+		for (byte = 0; byte < 4; byte++) {
+			unsigned long val;
+			u8 src = (rs2 >> (byte * 8)) & 0xff;
+
+			val = src << 4;
+
+			rd_val |= (val << (byte * 16));
+		}
+		*fpd_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+
+	case FPMERGE_OPF: {
+		rs1 = fps_regval(f, RS1(insn));
+		rs2 = fps_regval(f, RS2(insn));
+
+		rd_val = (((rs2 & 0x000000ff) <<  0) |
+			  ((rs1 & 0x000000ff) <<  8) |
+			  ((rs2 & 0x0000ff00) <<  8) |
+			  ((rs1 & 0x0000ff00) << 16) |
+			  ((rs2 & 0x00ff0000) << 16) |
+			  ((rs1 & 0x00ff0000) << 24) |
+			  ((rs2 & 0xff000000) << 24) |
+			  ((rs1 & 0xff000000) << 32));
+		*fpd_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+	}
+}
+
+static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+	struct fpustate *f = FPUSTATE;
+	unsigned long rs1, rs2, rd_val;
+
+	switch (opf) {
+	case FMUL8x16_OPF: {
+		unsigned long byte;
+
+		rs1 = fps_regval(f, RS1(insn));
+		rs2 = fpd_regval(f, RS2(insn));
+
+		rd_val = 0;
+		for (byte = 0; byte < 4; byte++) {
+			u16 src1 = (rs1 >> (byte *  8)) & 0x00ff;
+			s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
+			u32 prod = src1 * src2;
+			u16 scaled = ((prod & 0x00ffff00) >> 8);
+
+			/* Round up.  */
+			if (prod & 0x80)
+				scaled++;
+			rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
+		}
+
+		*fpd_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+
+	case FMUL8x16AU_OPF:
+	case FMUL8x16AL_OPF: {
+		unsigned long byte;
+		s16 src2;
+
+		rs1 = fps_regval(f, RS1(insn));
+		rs2 = fps_regval(f, RS2(insn));
+
+		rd_val = 0;
+		src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0);
+		for (byte = 0; byte < 4; byte++) {
+			u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
+			u32 prod = src1 * src2;
+			u16 scaled = ((prod & 0x00ffff00) >> 8);
+
+			/* Round up.  */
+			if (prod & 0x80)
+				scaled++;
+			rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
+		}
+
+		*fpd_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+
+	case FMUL8SUx16_OPF:
+	case FMUL8ULx16_OPF: {
+		unsigned long byte, ushift;
+
+		rs1 = fpd_regval(f, RS1(insn));
+		rs2 = fpd_regval(f, RS2(insn));
+
+		rd_val = 0;
+		ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
+		for (byte = 0; byte < 4; byte++) {
+			u16 src1;
+			s16 src2;
+			u32 prod;
+			u16 scaled;
+
+			src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
+			src2 = ((rs2 >> (16 * byte)) & 0xffff);
+			prod = src1 * src2;
+			scaled = ((prod & 0x00ffff00) >> 8);
+
+			/* Round up.  */
+			if (prod & 0x80)
+				scaled++;
+			rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
+		}
+
+		*fpd_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+
+	case FMULD8SUx16_OPF:
+	case FMULD8ULx16_OPF: {
+		unsigned long byte, ushift;
+
+		rs1 = fps_regval(f, RS1(insn));
+		rs2 = fps_regval(f, RS2(insn));
+
+		rd_val = 0;
+		ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
+		for (byte = 0; byte < 2; byte++) {
+			u16 src1;
+			s16 src2;
+			u32 prod;
+			u16 scaled;
+
+			src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
+			src2 = ((rs2 >> (16 * byte)) & 0xffff);
+			prod = src1 * src2;
+			scaled = ((prod & 0x00ffff00) >> 8);
+
+			/* Round up.  */
+			if (prod & 0x80)
+				scaled++;
+			rd_val |= ((scaled & 0xffffUL) <<
+				   ((byte * 32UL) + 7UL));
+		}
+		*fpd_regaddr(f, RD(insn)) = rd_val;
+		break;
+	}
+	}
+}
+
+static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
+{
+	struct fpustate *f = FPUSTATE;
+	unsigned long rs1, rs2, rd_val, i;
+
+	rs1 = fpd_regval(f, RS1(insn));
+	rs2 = fpd_regval(f, RS2(insn));
+
+	rd_val = 0;
+
+	switch (opf) {
+	case FCMPGT16_OPF:
+		for (i = 0; i < 4; i++) {
+			s16 a = (rs1 >> (i * 16)) & 0xffff;
+			s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+			if (a > b)
+				rd_val |= 8 >> i;
+		}
+		break;
+
+	case FCMPGT32_OPF:
+		for (i = 0; i < 2; i++) {
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
+
+			if (a > b)
+				rd_val |= 2 >> i;
+		}
+		break;
+
+	case FCMPLE16_OPF:
+		for (i = 0; i < 4; i++) {
+			s16 a = (rs1 >> (i * 16)) & 0xffff;
+			s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+			if (a <= b)
+				rd_val |= 8 >> i;
+		}
+		break;
+
+	case FCMPLE32_OPF:
+		for (i = 0; i < 2; i++) {
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
+
+			if (a <= b)
+				rd_val |= 2 >> i;
+		}
+		break;
+
+	case FCMPNE16_OPF:
+		for (i = 0; i < 4; i++) {
+			s16 a = (rs1 >> (i * 16)) & 0xffff;
+			s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+			if (a != b)
+				rd_val |= 8 >> i;
+		}
+		break;
+
+	case FCMPNE32_OPF:
+		for (i = 0; i < 2; i++) {
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
+
+			if (a != b)
+				rd_val |= 2 >> i;
+		}
+		break;
+
+	case FCMPEQ16_OPF:
+		for (i = 0; i < 4; i++) {
+			s16 a = (rs1 >> (i * 16)) & 0xffff;
+			s16 b = (rs2 >> (i * 16)) & 0xffff;
+
+			if (a == b)
+				rd_val |= 8 >> i;
+		}
+		break;
+
+	case FCMPEQ32_OPF:
+		for (i = 0; i < 2; i++) {
+			s32 a = (rs1 >> (i * 32)) & 0xffffffff;
+			s32 b = (rs2 >> (i * 32)) & 0xffffffff;
+
+			if (a == b)
+				rd_val |= 2 >> i;
+		}
+		break;
+	}
+
+	maybe_flush_windows(0, 0, RD(insn), 0);
+	store_reg(regs, rd_val, RD(insn));
+}
+
+/* Emulate the VIS instructions which are not implemented in
+ * hardware on Niagara.
+ */
+int vis_emul(struct pt_regs *regs, unsigned int insn)
+{
+	unsigned long pc = regs->tpc;
+	unsigned int opf;
+
+	BUG_ON(regs->tstate & TSTATE_PRIV);
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+	if (test_thread_flag(TIF_32BIT))
+		pc = (u32)pc;
+
+	if (get_user(insn, (u32 __user *) pc))
+		return -EFAULT;
+
+	save_and_clear_fpu();
+
+	opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
+	switch (opf) {
+	default:
+		return -EINVAL;
+
+	/* Pixel Formatting Instructions.  */
+	case FPACK16_OPF:
+	case FPACK32_OPF:
+	case FPACKFIX_OPF:
+	case FEXPAND_OPF:
+	case FPMERGE_OPF:
+		pformat(regs, insn, opf);
+		break;
+
+	/* Partitioned Multiply Instructions  */
+	case FMUL8x16_OPF:
+	case FMUL8x16AU_OPF:
+	case FMUL8x16AL_OPF:
+	case FMUL8SUx16_OPF:
+	case FMUL8ULx16_OPF:
+	case FMULD8SUx16_OPF:
+	case FMULD8ULx16_OPF:
+		pmul(regs, insn, opf);
+		break;
+
+	/* Pixel Compare Instructions  */
+	case FCMPGT16_OPF:
+	case FCMPGT32_OPF:
+	case FCMPLE16_OPF:
+	case FCMPLE32_OPF:
+	case FCMPNE16_OPF:
+	case FCMPNE32_OPF:
+	case FCMPEQ16_OPF:
+	case FCMPEQ32_OPF:
+		pcmp(regs, insn, opf);
+		break;
+
+	/* Edge Handling Instructions  */
+	case EDGE8_OPF:
+	case EDGE8N_OPF:
+	case EDGE8L_OPF:
+	case EDGE8LN_OPF:
+	case EDGE16_OPF:
+	case EDGE16N_OPF:
+	case EDGE16L_OPF:
+	case EDGE16LN_OPF:
+	case EDGE32_OPF:
+	case EDGE32N_OPF:
+	case EDGE32L_OPF:
+	case EDGE32LN_OPF:
+		edge(regs, insn, opf);
+		break;
+
+	/* Pixel Component Distance  */
+	case PDIST_OPF:
+		pdist(regs, insn);
+		break;
+
+	/* Three-Dimensional Array Addressing Instructions  */
+	case ARRAY8_OPF:
+	case ARRAY16_OPF:
+	case ARRAY32_OPF:
+		array(regs, insn, opf);
+		break;
+
+	/* Byte Mask and Shuffle Instructions  */
+	case BMASK_OPF:
+		bmask(regs, insn);
+		break;
+
+	case BSHUFFLE_OPF:
+		bshuffle(regs, insn);
+		break;
+	}
+
+	regs->tpc = regs->tnpc;
+	regs->tnpc += 4;
+	return 0;
+}
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
new file mode 100644
index 0000000..61afd78
--- /dev/null
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -0,0 +1,191 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ld script for sparc32/sparc64 kernel */
+
+#include <asm-generic/vmlinux.lds.h>
+
+#include <asm/page.h>
+#include <asm/thread_info.h>
+
+#ifdef CONFIG_SPARC32
+#define INITIAL_ADDRESS  0x10000 + SIZEOF_HEADERS
+#define TEXTSTART	0xf0004000
+
+#define SMP_CACHE_BYTES_SHIFT 5
+
+#else
+#define SMP_CACHE_BYTES_SHIFT 6
+#define INITIAL_ADDRESS 0x4000
+#define TEXTSTART      0x0000000000404000
+
+#endif
+
+#define SMP_CACHE_BYTES (1 << SMP_CACHE_BYTES_SHIFT)
+
+#ifdef CONFIG_SPARC32
+OUTPUT_FORMAT("elf32-sparc", "elf32-sparc", "elf32-sparc")
+OUTPUT_ARCH(sparc)
+ENTRY(_start)
+jiffies = jiffies_64 + 4;
+#else
+/* sparc64 */
+OUTPUT_FORMAT("elf64-sparc", "elf64-sparc", "elf64-sparc")
+OUTPUT_ARCH(sparc:v9a)
+ENTRY(_start)
+jiffies = jiffies_64;
+#endif
+
+#ifdef CONFIG_SPARC64
+ASSERT((swapper_tsb == 0x0000000000408000), "Error: sparc64 early assembler too large")
+#endif
+
+SECTIONS
+{
+#ifdef CONFIG_SPARC64
+	swapper_pg_dir = 0x0000000000402000;
+#endif
+	. = INITIAL_ADDRESS;
+	.text TEXTSTART :
+	{
+		_text = .;
+		HEAD_TEXT
+		TEXT_TEXT
+		SCHED_TEXT
+		CPUIDLE_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		*(.gnu.warning)
+	} = 0
+	_etext = .;
+
+	RO_DATA(PAGE_SIZE)
+
+	/* Start of data section */
+	_sdata = .;
+
+	.data1 : {
+		*(.data1)
+	}
+	RW_DATA_SECTION(SMP_CACHE_BYTES, 0, THREAD_SIZE)
+
+	/* End of data section */
+	_edata = .;
+
+	.fixup : {
+		__start___fixup = .;
+		*(.fixup)
+		__stop___fixup = .;
+	}
+	EXCEPTION_TABLE(16)
+	NOTES
+
+	. = ALIGN(PAGE_SIZE);
+	__init_begin = ALIGN(PAGE_SIZE);
+	INIT_TEXT_SECTION(PAGE_SIZE)
+	__init_text_end = .;
+	INIT_DATA_SECTION(16)
+
+	. = ALIGN(4);
+	.tsb_ldquad_phys_patch : {
+		__tsb_ldquad_phys_patch = .;
+		*(.tsb_ldquad_phys_patch)
+		__tsb_ldquad_phys_patch_end = .;
+	}
+
+	.tsb_phys_patch : {
+		__tsb_phys_patch = .;
+		*(.tsb_phys_patch)
+		__tsb_phys_patch_end = .;
+	}
+
+	.cpuid_patch : {
+		__cpuid_patch = .;
+		*(.cpuid_patch)
+		__cpuid_patch_end = .;
+	}
+
+	.sun4v_1insn_patch : {
+		__sun4v_1insn_patch = .;
+		*(.sun4v_1insn_patch)
+		__sun4v_1insn_patch_end = .;
+	}
+	.sun4v_2insn_patch : {
+		__sun4v_2insn_patch = .;
+		*(.sun4v_2insn_patch)
+		__sun4v_2insn_patch_end = .;
+	}
+	.leon_1insn_patch : {
+		__leon_1insn_patch = .;
+		*(.leon_1insn_patch)
+		__leon_1insn_patch_end = .;
+	}
+	.swapper_tsb_phys_patch : {
+		__swapper_tsb_phys_patch = .;
+		*(.swapper_tsb_phys_patch)
+		__swapper_tsb_phys_patch_end = .;
+	}
+	.swapper_4m_tsb_phys_patch : {
+		__swapper_4m_tsb_phys_patch = .;
+		*(.swapper_4m_tsb_phys_patch)
+		__swapper_4m_tsb_phys_patch_end = .;
+	}
+	.popc_3insn_patch : {
+		__popc_3insn_patch = .;
+		*(.popc_3insn_patch)
+		__popc_3insn_patch_end = .;
+	}
+	.popc_6insn_patch : {
+		__popc_6insn_patch = .;
+		*(.popc_6insn_patch)
+		__popc_6insn_patch_end = .;
+	}
+	.pause_3insn_patch : {
+		__pause_3insn_patch = .;
+		*(.pause_3insn_patch)
+		__pause_3insn_patch_end = .;
+	}
+	.sun_m7_1insn_patch : {
+		__sun_m7_1insn_patch = .;
+		*(.sun_m7_1insn_patch)
+		__sun_m7_1insn_patch_end = .;
+	}
+	.sun_m7_2insn_patch : {
+		__sun_m7_2insn_patch = .;
+		*(.sun_m7_2insn_patch)
+		__sun_m7_2insn_patch_end = .;
+	}
+	.get_tick_patch : {
+		__get_tick_patch = .;
+		*(.get_tick_patch)
+		__get_tick_patch_end = .;
+	}
+	.pud_huge_patch : {
+		__pud_huge_patch = .;
+		*(.pud_huge_patch)
+		__pud_huge_patch_end = .;
+	}
+	.fast_win_ctrl_1insn_patch : {
+		__fast_win_ctrl_1insn_patch = .;
+		*(.fast_win_ctrl_1insn_patch)
+		__fast_win_ctrl_1insn_patch_end = .;
+	}
+	PERCPU_SECTION(SMP_CACHE_BYTES)
+
+#ifdef CONFIG_JUMP_LABEL
+	. = ALIGN(PAGE_SIZE);
+	.exit.text : {
+		EXIT_TEXT
+	}
+#endif
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+	BSS_SECTION(0, 0, 0)
+	_end = . ;
+
+	STABS_DEBUG
+	DWARF_DEBUG
+
+	DISCARDS
+}
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
new file mode 100644
index 0000000..69a6ba6
--- /dev/null
+++ b/arch/sparc/kernel/windows.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* windows.c: Routines to deal with register window management
+ *            at the C-code level.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <linux/uaccess.h>
+
+#include "kernel.h"
+
+/* Do save's until all user register windows are out of the cpu. */
+void flush_user_windows(void)
+{
+	register int ctr asm("g5");
+
+	ctr = 0;
+	__asm__ __volatile__(
+		"\n1:\n\t"
+		"ld	[%%g6 + %2], %%g4\n\t"
+		"orcc	%%g0, %%g4, %%g0\n\t"
+		"add	%0, 1, %0\n\t"
+		"bne	1b\n\t"
+		" save	%%sp, -64, %%sp\n"
+		"2:\n\t"
+		"subcc	%0, 1, %0\n\t"
+		"bne	2b\n\t"
+		" restore %%g0, %%g0, %%g0\n"
+	: "=&r" (ctr)
+	: "0" (ctr),
+	  "i" ((const unsigned long)TI_UWINMASK)
+	: "g4", "cc");
+}
+
+static inline void shift_window_buffer(int first_win, int last_win, struct thread_info *tp)
+{
+	int i;
+
+	for(i = first_win; i < last_win; i++) {
+		tp->rwbuf_stkptrs[i] = tp->rwbuf_stkptrs[i+1];
+		memcpy(&tp->reg_window[i], &tp->reg_window[i+1], sizeof(struct reg_window32));
+	}
+}
+
+/* Place as many of the user's current register windows 
+ * on the stack that we can.  Even if the %sp is unaligned
+ * we still copy the window there, the only case that we don't
+ * succeed is if the %sp points to a bum mapping altogether.
+ * setup_frame() and do_sigreturn() use this before shifting
+ * the user stack around.  Future instruction and hardware
+ * bug workaround routines will need this functionality as
+ * well.
+ */
+void synchronize_user_stack(void)
+{
+	struct thread_info *tp = current_thread_info();
+	int window;
+
+	flush_user_windows();
+	if(!tp->w_saved)
+		return;
+
+	/* Ok, there is some dirty work to do. */
+	for(window = tp->w_saved - 1; window >= 0; window--) {
+		unsigned long sp = tp->rwbuf_stkptrs[window];
+
+		/* Ok, let it rip. */
+		if (copy_to_user((char __user *) sp, &tp->reg_window[window],
+				 sizeof(struct reg_window32)))
+			continue;
+
+		shift_window_buffer(window, tp->w_saved - 1, tp);
+		tp->w_saved--;
+	}
+}
+
+#if 0
+/* An optimization. */
+static inline void copy_aligned_window(void *dest, const void *src)
+{
+	__asm__ __volatile__("ldd [%1], %%g2\n\t"
+			     "ldd [%1 + 0x8], %%g4\n\t"
+			     "std %%g2, [%0]\n\t"
+			     "std %%g4, [%0 + 0x8]\n\t"
+			     "ldd [%1 + 0x10], %%g2\n\t"
+			     "ldd [%1 + 0x18], %%g4\n\t"
+			     "std %%g2, [%0 + 0x10]\n\t"
+			     "std %%g4, [%0 + 0x18]\n\t"
+			     "ldd [%1 + 0x20], %%g2\n\t"
+			     "ldd [%1 + 0x28], %%g4\n\t"
+			     "std %%g2, [%0 + 0x20]\n\t"
+			     "std %%g4, [%0 + 0x28]\n\t"
+			     "ldd [%1 + 0x30], %%g2\n\t"
+			     "ldd [%1 + 0x38], %%g4\n\t"
+			     "std %%g2, [%0 + 0x30]\n\t"
+			     "std %%g4, [%0 + 0x38]\n\t" : :
+			     "r" (dest), "r" (src) :
+			     "g2", "g3", "g4", "g5");
+}
+#endif
+
+/* Try to push the windows in a threads window buffer to the
+ * user stack.  Unaligned %sp's are not allowed here.
+ */
+
+void try_to_clear_window_buffer(struct pt_regs *regs, int who)
+{
+	struct thread_info *tp = current_thread_info();
+	int window;
+
+	flush_user_windows();
+	for(window = 0; window < tp->w_saved; window++) {
+		unsigned long sp = tp->rwbuf_stkptrs[window];
+
+		if ((sp & 7) ||
+		    copy_to_user((char __user *) sp, &tp->reg_window[window],
+				 sizeof(struct reg_window32)))
+			do_exit(SIGILL);
+	}
+	tp->w_saved = 0;
+}
diff --git a/arch/sparc/kernel/winfixup.S b/arch/sparc/kernel/winfixup.S
new file mode 100644
index 0000000..448acce
--- /dev/null
+++ b/arch/sparc/kernel/winfixup.S
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* winfixup.S: Handle cases where user stack pointer is found to be bogus.
+ *
+ * Copyright (C) 1997, 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/head.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+#include <asm/spitfire.h>
+#include <asm/thread_info.h>
+
+	.text
+
+	/* It used to be the case that these register window fault
+	 * handlers could run via the save and restore instructions
+	 * done by the trap entry and exit code.  They now do the
+	 * window spill/fill by hand, so that case no longer can occur.
+	 */
+
+	.align	32
+fill_fixup:
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
+	rdpr	%tstate, %g1
+	and	%g1, TSTATE_CWP, %g1
+	or	%g4, FAULT_CODE_WINFIXUP, %g4
+	stb	%g4, [%g6 + TI_FAULT_CODE]
+	stx	%g5, [%g6 + TI_FAULT_ADDR]
+	wrpr	%g1, %cwp
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	call	do_sparc64_fault
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+	/* Be very careful about usage of the trap globals here.
+	 * You cannot touch %g5 as that has the fault information.
+	 */
+spill_fixup:
+spill_fixup_mna:
+spill_fixup_dax:
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
+	ldx	[%g6 + TI_FLAGS], %g1
+	andcc	%sp, 0x1, %g0
+	movne	%icc, 0, %g1
+	andcc	%g1, _TIF_32BIT, %g0
+	ldub	[%g6 + TI_WSAVED], %g1
+	sll	%g1, 3, %g3
+	add	%g6, %g3, %g3
+	stx	%sp, [%g3 + TI_RWIN_SPTRS]
+	sll	%g1, 7, %g3
+	bne,pt	%xcc, 1f
+	 add	%g6, %g3, %g3
+	stx	%l0, [%g3 + TI_REG_WINDOW + 0x00]
+	stx	%l1, [%g3 + TI_REG_WINDOW + 0x08]
+	stx	%l2, [%g3 + TI_REG_WINDOW + 0x10]
+	stx	%l3, [%g3 + TI_REG_WINDOW + 0x18]
+	stx	%l4, [%g3 + TI_REG_WINDOW + 0x20]
+	stx	%l5, [%g3 + TI_REG_WINDOW + 0x28]
+	stx	%l6, [%g3 + TI_REG_WINDOW + 0x30]
+	stx	%l7, [%g3 + TI_REG_WINDOW + 0x38]
+	stx	%i0, [%g3 + TI_REG_WINDOW + 0x40]
+	stx	%i1, [%g3 + TI_REG_WINDOW + 0x48]
+	stx	%i2, [%g3 + TI_REG_WINDOW + 0x50]
+	stx	%i3, [%g3 + TI_REG_WINDOW + 0x58]
+	stx	%i4, [%g3 + TI_REG_WINDOW + 0x60]
+	stx	%i5, [%g3 + TI_REG_WINDOW + 0x68]
+	stx	%i6, [%g3 + TI_REG_WINDOW + 0x70]
+	ba,pt	%xcc, 2f
+	 stx	%i7, [%g3 + TI_REG_WINDOW + 0x78]
+1:	stw	%l0, [%g3 + TI_REG_WINDOW + 0x00]
+	stw	%l1, [%g3 + TI_REG_WINDOW + 0x04]
+	stw	%l2, [%g3 + TI_REG_WINDOW + 0x08]
+	stw	%l3, [%g3 + TI_REG_WINDOW + 0x0c]
+	stw	%l4, [%g3 + TI_REG_WINDOW + 0x10]
+	stw	%l5, [%g3 + TI_REG_WINDOW + 0x14]
+	stw	%l6, [%g3 + TI_REG_WINDOW + 0x18]
+	stw	%l7, [%g3 + TI_REG_WINDOW + 0x1c]
+	stw	%i0, [%g3 + TI_REG_WINDOW + 0x20]
+	stw	%i1, [%g3 + TI_REG_WINDOW + 0x24]
+	stw	%i2, [%g3 + TI_REG_WINDOW + 0x28]
+	stw	%i3, [%g3 + TI_REG_WINDOW + 0x2c]
+	stw	%i4, [%g3 + TI_REG_WINDOW + 0x30]
+	stw	%i5, [%g3 + TI_REG_WINDOW + 0x34]
+	stw	%i6, [%g3 + TI_REG_WINDOW + 0x38]
+	stw	%i7, [%g3 + TI_REG_WINDOW + 0x3c]
+2:	add	%g1, 1, %g1
+	stb	%g1, [%g6 + TI_WSAVED]
+	rdpr	%tstate, %g1
+	andcc	%g1, TSTATE_PRIV, %g0
+	saved
+	be,pn	%xcc, 1f
+	 and	%g1, TSTATE_CWP, %g1
+	retry
+1:	mov	FAULT_CODE_WRITE | FAULT_CODE_DTLB | FAULT_CODE_WINFIXUP, %g4
+	stb	%g4, [%g6 + TI_FAULT_CODE]
+	stx	%g5, [%g6 + TI_FAULT_ADDR]
+	wrpr	%g1, %cwp
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	call	do_sparc64_fault
+	 add	%sp, PTREGS_OFF, %o0
+	ba,a,pt	%xcc, rtrap
+
+winfix_mna:
+	andn	%g3, 0x7f, %g3
+	add	%g3, 0x78, %g3
+	wrpr	%g3, %tnpc
+	done
+
+fill_fixup_mna:
+	rdpr	%tstate, %g1
+	and	%g1, TSTATE_CWP, %g1
+	wrpr	%g1, %cwp
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	sethi	%hi(tlb_type), %g1
+	lduw	[%g1 + %lo(tlb_type)], %g1
+	cmp	%g1, 3
+	bne,pt	%icc, 1f
+	 add	%sp, PTREGS_OFF, %o0
+	mov	%l4, %o2
+	call	sun4v_do_mna
+	 mov	%l5, %o1
+	ba,a,pt	%xcc, rtrap
+1:	mov	%l4, %o1
+	mov	%l5, %o2
+	call	mem_address_unaligned
+	 nop
+	ba,a,pt	%xcc, rtrap
+
+winfix_dax:
+	andn	%g3, 0x7f, %g3
+	add	%g3, 0x74, %g3
+	wrpr	%g3, %tnpc
+	done
+
+fill_fixup_dax:
+	rdpr	%tstate, %g1
+	and	%g1, TSTATE_CWP, %g1
+	wrpr	%g1, %cwp
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	sethi	%hi(tlb_type), %g1
+	mov	%l4, %o1
+	lduw	[%g1 + %lo(tlb_type)], %g1
+	mov	%l5, %o2
+	cmp	%g1, 3
+	bne,pt	%icc, 1f
+	 add	%sp, PTREGS_OFF, %o0
+	call	sun4v_data_access_exception
+	 nop
+	ba,a,pt	%xcc, rtrap
+	 nop
+1:	call	spitfire_data_access_exception
+	 nop
+	ba,a,pt	%xcc, rtrap
+	 nop
diff --git a/arch/sparc/kernel/wof.S b/arch/sparc/kernel/wof.S
new file mode 100644
index 0000000..96a3a11
--- /dev/null
+++ b/arch/sparc/kernel/wof.S
@@ -0,0 +1,366 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * wof.S: Sparc window overflow handler.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/contregs.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/smp.h>
+#include <asm/asi.h>
+#include <asm/winmacro.h>
+#include <asm/asmmacro.h>
+#include <asm/thread_info.h>
+
+/* WARNING: This routine is hairy and _very_ complicated, but it
+ *          must be as fast as possible as it handles the allocation
+ *          of register windows to the user and kernel.  If you touch
+ *          this code be _very_ careful as many other pieces of the
+ *          kernel depend upon how this code behaves.  You have been
+ *          duly warned...
+ */
+
+/* We define macro's for registers which have a fixed
+ * meaning throughout this entire routine.  The 'T' in
+ * the comments mean that the register can only be
+ * accessed when in the 'trap' window, 'G' means
+ * accessible in any window.  Do not change these registers
+ * after they have been set, until you are ready to return
+ * from the trap.
+ */
+#define t_psr       l0 /* %psr at trap time                     T */
+#define t_pc        l1 /* PC for trap return                    T */
+#define t_npc       l2 /* NPC for trap return                   T */
+#define t_wim       l3 /* %wim at trap time                     T */
+#define saved_g5    l5 /* Global save register                  T */
+#define saved_g6    l6 /* Global save register                  T */
+#define curptr      g6 /* Gets set to 'current' then stays      G */
+
+/* Now registers whose values can change within the handler.      */
+#define twin_tmp    l4 /* Temp reg, only usable in trap window  T */
+#define glob_tmp    g5 /* Global temporary reg, usable anywhere G */
+
+	.text
+	.align	4
+	/* BEGINNING OF PATCH INSTRUCTIONS */
+	/* On a 7-window Sparc the boot code patches spnwin_*
+	 * instructions with the following ones.
+	 */
+	.globl	spnwin_patch1_7win, spnwin_patch2_7win, spnwin_patch3_7win
+spnwin_patch1_7win:	sll	%t_wim, 6, %glob_tmp
+spnwin_patch2_7win:	and	%glob_tmp, 0x7f, %glob_tmp
+spnwin_patch3_7win:	and	%twin_tmp, 0x7f, %twin_tmp
+	/* END OF PATCH INSTRUCTIONS */
+
+	/* The trap entry point has done the following:
+	 *
+	 * rd    %psr, %l0
+	 * rd    %wim, %l3
+	 * b     spill_window_entry
+	 * andcc %l0, PSR_PS, %g0
+	 */
+
+	/* Datum current_thread_info->uwinmask contains at all times a bitmask
+	 * where if any user windows are active, at least one bit will
+	 * be set in to mask.  If no user windows are active, the bitmask
+	 * will be all zeroes.
+	 */
+	.globl	spill_window_entry 
+	.globl	spnwin_patch1, spnwin_patch2, spnwin_patch3
+spill_window_entry:
+	/* LOCATION: Trap Window */
+
+	mov	%g5, %saved_g5		! save away global temp register
+	mov	%g6, %saved_g6		! save away 'current' ptr register
+
+	/* Compute what the new %wim will be if we save the
+	 * window properly in this trap handler.
+	 *
+	 * newwim = ((%wim>>1) | (%wim<<(nwindows - 1)));
+	 */
+		srl	%t_wim, 0x1, %twin_tmp
+spnwin_patch1:	sll	%t_wim, 7, %glob_tmp
+		or	%glob_tmp, %twin_tmp, %glob_tmp
+spnwin_patch2:	and	%glob_tmp, 0xff, %glob_tmp
+
+	/* The trap entry point has set the condition codes
+	 * up for us to see if this is from user or kernel.
+	 * Get the load of 'curptr' out of the way.
+	 */
+	LOAD_CURRENT(curptr, twin_tmp)
+
+	andcc	%t_psr, PSR_PS, %g0
+	be,a	spwin_fromuser				! all user wins, branch
+	 save	%g0, %g0, %g0				! Go where saving will occur
+	
+	/* See if any user windows are active in the set. */
+	ld	[%curptr + TI_UWINMASK], %twin_tmp	! grab win mask
+	orcc	%g0, %twin_tmp, %g0			! check for set bits
+	bne	spwin_exist_uwins			! yep, there are some
+	 andn	%twin_tmp, %glob_tmp, %twin_tmp		! compute new uwinmask
+
+	/* Save into the window which must be saved and do it.
+	 * Basically if we are here, this means that we trapped
+	 * from kernel mode with only kernel windows in the register
+	 * file.
+	 */
+	save	%g0, %g0, %g0		! save into the window to stash away
+	wr	%glob_tmp, 0x0, %wim	! set new %wim, this is safe now
+
+spwin_no_userwins_from_kernel:
+	/* LOCATION: Window to be saved */
+
+	STORE_WINDOW(sp)		! stash the window
+	restore	%g0, %g0, %g0		! go back into trap window
+
+	/* LOCATION: Trap window */
+	mov	%saved_g5, %g5		! restore %glob_tmp
+	mov	%saved_g6, %g6		! restore %curptr
+	wr	%t_psr, 0x0, %psr	! restore condition codes in %psr
+	WRITE_PAUSE			! waste some time
+	jmp	%t_pc			! Return from trap
+	rett	%t_npc			! we are done
+
+spwin_exist_uwins:
+	/* LOCATION: Trap window */
+
+	/* Wow, user windows have to be dealt with, this is dirty
+	 * and messy as all hell.  And difficult to follow if you
+	 * are approaching the infamous register window trap handling
+	 * problem for the first time. DON'T LOOK!
+	 *
+	 * Note that how the execution path works out, the new %wim
+	 * will be left for us in the global temporary register,
+	 * %glob_tmp.  We cannot set the new %wim first because we
+	 * need to save into the appropriate window without inducing
+	 * a trap (traps are off, we'd get a watchdog wheee)...
+	 * But first, store the new user window mask calculated
+	 * above.
+	 */
+	st	%twin_tmp, [%curptr + TI_UWINMASK]
+	save	%g0, %g0, %g0		! Go to where the saving will occur
+
+spwin_fromuser:
+	/* LOCATION: Window to be saved */
+	wr	%glob_tmp, 0x0, %wim	! Now it is safe to set new %wim
+
+	/* LOCATION: Window to be saved */
+
+	/* This instruction branches to a routine which will check
+	 * to validity of the users stack pointer by whatever means
+	 * are necessary.  This means that this is architecture
+	 * specific and thus this branch instruction will need to
+	 * be patched at boot time once the machine type is known.
+	 * This routine _shall not_ touch %curptr under any
+	 * circumstances whatsoever!  It will branch back to the
+	 * label 'spwin_good_ustack' if the stack is ok but still
+	 * needs to be dumped (SRMMU for instance will not need to
+	 * do this) or 'spwin_finish_up' if the stack is ok and the
+	 * registers have already been saved.  If the stack is found
+	 * to be bogus for some reason the routine shall branch to
+	 * the label 'spwin_user_stack_is_bolixed' which will take
+	 * care of things at that point.
+	 */
+	b	spwin_srmmu_stackchk
+	 andcc	%sp, 0x7, %g0
+
+spwin_good_ustack:
+	/* LOCATION: Window to be saved */
+
+	/* The users stack is ok and we can safely save it at
+	 * %sp.
+	 */
+	STORE_WINDOW(sp)
+
+spwin_finish_up:
+	restore	%g0, %g0, %g0		/* Back to trap window. */
+
+	/* LOCATION: Trap window */
+
+	/* We have spilled successfully, and we have properly stored
+	 * the appropriate window onto the stack.
+	 */
+
+	/* Restore saved globals */
+	mov	%saved_g5, %g5
+	mov	%saved_g6, %g6
+
+	wr	%t_psr, 0x0, %psr
+	WRITE_PAUSE
+	jmp	%t_pc
+	rett	%t_npc
+
+spwin_user_stack_is_bolixed:
+	/* LOCATION: Window to be saved */
+
+	/* Wheee, user has trashed his/her stack.  We have to decide
+	 * how to proceed based upon whether we came from kernel mode
+	 * or not.  If we came from kernel mode, toss the window into
+	 * a special buffer and proceed, the kernel _needs_ a window
+	 * and we could be in an interrupt handler so timing is crucial.
+	 * If we came from user land we build a full stack frame and call
+	 * c-code to gun down the process.
+	 */
+	rd	%psr, %glob_tmp
+	andcc	%glob_tmp, PSR_PS, %g0
+	bne	spwin_bad_ustack_from_kernel
+	 nop
+
+	/* Oh well, throw this one window into the per-task window
+	 * buffer, the first one.
+	 */
+	st	%sp, [%curptr + TI_RWIN_SPTRS]
+	STORE_WINDOW(curptr + TI_REG_WINDOW)
+	restore	%g0, %g0, %g0
+
+	/* LOCATION: Trap Window */
+
+	/* Back in the trap window, update winbuffer save count. */
+	mov	1, %twin_tmp
+	st	%twin_tmp, [%curptr + TI_W_SAVED]
+
+		/* Compute new user window mask.  What we are basically
+		 * doing is taking two windows, the invalid one at trap
+		 * time and the one we attempted to throw onto the users
+		 * stack, and saying that everything else is an ok user
+		 * window.  umask = ((~(%t_wim | %wim)) & valid_wim_bits)
+		 */
+		rd	%wim, %twin_tmp
+		or	%twin_tmp, %t_wim, %twin_tmp
+		not	%twin_tmp
+spnwin_patch3:	and	%twin_tmp, 0xff, %twin_tmp	! patched on 7win Sparcs
+		st	%twin_tmp, [%curptr + TI_UWINMASK]
+
+#define STACK_OFFSET (THREAD_SIZE - TRACEREG_SZ - STACKFRAME_SZ)
+
+	sethi	%hi(STACK_OFFSET), %sp
+	or	%sp, %lo(STACK_OFFSET), %sp
+	add	%curptr, %sp, %sp
+
+	/* Restore the saved globals and build a pt_regs frame. */
+	mov	%saved_g5, %g5
+	mov	%saved_g6, %g6
+	STORE_PT_ALL(sp, t_psr, t_pc, t_npc, g1)
+
+	sethi	%hi(STACK_OFFSET), %g6
+	or	%g6, %lo(STACK_OFFSET), %g6
+	sub	%sp, %g6, %g6		! curptr
+
+	/* Turn on traps and call c-code to deal with it. */
+	wr	%t_psr, PSR_ET, %psr
+	nop
+	call	window_overflow_fault
+	 nop
+
+	/* Return from trap if C-code actually fixes things, if it
+	 * doesn't then we never get this far as the process will
+	 * be given the look of death from Commander Peanut.
+	 */
+	b	ret_trap_entry
+	 clr	%l6
+
+spwin_bad_ustack_from_kernel:
+	/* LOCATION: Window to be saved */
+
+	/* The kernel provoked a spill window trap, but the window we
+	 * need to save is a user one and the process has trashed its
+	 * stack pointer.  We need to be quick, so we throw it into
+	 * a per-process window buffer until we can properly handle
+	 * this later on.
+	 */
+	SAVE_BOLIXED_USER_STACK(curptr, glob_tmp)
+	restore	%g0, %g0, %g0
+
+	/* LOCATION: Trap window */
+
+	/* Restore globals, condition codes in the %psr and
+	 * return from trap.  Note, restoring %g6 when returning
+	 * to kernel mode is not necessarily these days. ;-)
+	 */
+	mov	%saved_g5, %g5
+	mov	%saved_g6, %g6
+
+	wr	%t_psr, 0x0, %psr
+	WRITE_PAUSE
+
+	jmp	%t_pc
+	rett	%t_npc
+
+/* Undefine the register macros which would only cause trouble
+ * if used below.  This helps find 'stupid' coding errors that
+ * produce 'odd' behavior.  The routines below are allowed to
+ * make usage of glob_tmp and t_psr so we leave them defined.
+ */
+#undef twin_tmp
+#undef curptr
+#undef t_pc
+#undef t_npc
+#undef t_wim
+#undef saved_g5
+#undef saved_g6
+
+/* Now come the per-architecture window overflow stack checking routines.
+ * As noted above %curptr cannot be touched by this routine at all.
+ */
+
+	/* This is a generic SRMMU routine.  As far as I know this
+	 * works for all current v8/srmmu implementations, we'll
+	 * see...
+	 */
+	.globl	spwin_srmmu_stackchk
+spwin_srmmu_stackchk:
+	/* LOCATION: Window to be saved on the stack */
+
+	/* Because of SMP concerns and speed we play a trick.
+	 * We disable fault traps in the MMU control register,
+	 * Execute the stores, then check the fault registers
+	 * to see what happens.  I can hear Linus now
+	 * "disgusting... broken hardware...".
+	 *
+	 * But first, check to see if the users stack has ended
+	 * up in kernel vma, then we would succeed for the 'wrong'
+	 * reason... ;(  Note that the 'sethi' below assumes the
+	 * kernel is page aligned, which should always be the case.
+	 */
+	/* Check results of callers andcc %sp, 0x7, %g0 */
+	bne	spwin_user_stack_is_bolixed
+	 sethi   %hi(PAGE_OFFSET), %glob_tmp
+	cmp	%glob_tmp, %sp
+	bleu	spwin_user_stack_is_bolixed
+	 mov	AC_M_SFSR, %glob_tmp
+
+	/* Clear the fault status and turn on the no_fault bit. */
+LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %g0)	! eat SFSR
+SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %g0)		! eat SFSR
+
+LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %glob_tmp)	! read MMU control
+SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %glob_tmp)		! read MMU control
+	or	%glob_tmp, 0x2, %glob_tmp		! or in no_fault bit
+LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)	! set it
+SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)		! set it
+
+	/* Dump the registers and cross fingers. */
+	STORE_WINDOW(sp)
+
+	/* Clear the no_fault bit and check the status. */
+	andn	%glob_tmp, 0x2, %glob_tmp
+LEON_PI(sta	%glob_tmp, [%g0] ASI_LEON_MMUREGS)
+SUN_PI_(sta	%glob_tmp, [%g0] ASI_M_MMUREGS)
+
+	mov	AC_M_SFAR, %glob_tmp
+LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %g0)
+SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %g0)
+
+	mov	AC_M_SFSR, %glob_tmp
+LEON_PI(lda	[%glob_tmp] ASI_LEON_MMUREGS, %glob_tmp)
+SUN_PI_(lda	[%glob_tmp] ASI_M_MMUREGS, %glob_tmp)
+	andcc	%glob_tmp, 0x2, %g0			! did we fault?
+	be,a	spwin_finish_up + 0x4			! cool beans, success
+	 restore %g0, %g0, %g0
+
+	rd	%psr, %glob_tmp
+	b	spwin_user_stack_is_bolixed + 0x4	! we faulted, ugh
+	 nop
diff --git a/arch/sparc/kernel/wuf.S b/arch/sparc/kernel/wuf.S
new file mode 100644
index 0000000..1a4ca49
--- /dev/null
+++ b/arch/sparc/kernel/wuf.S
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * wuf.S: Window underflow trap handler for the Sparc.
+ *
+ * Copyright (C) 1995 David S. Miller
+ */
+
+#include <asm/contregs.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/smp.h>
+#include <asm/asi.h>
+#include <asm/winmacro.h>
+#include <asm/asmmacro.h>
+#include <asm/thread_info.h>
+
+/* Just like the overflow handler we define macros for registers
+ * with fixed meanings in this routine.
+ */
+#define t_psr       l0
+#define t_pc        l1
+#define t_npc       l2
+#define t_wim       l3
+/* Don't touch the above registers or else you die horribly... */
+
+/* Now macros for the available scratch registers in this routine. */
+#define twin_tmp1    l4
+#define twin_tmp2    l5
+
+#define curptr       g6
+
+	.text
+	.align	4
+
+	/* The trap entry point has executed the following:
+	 *
+	 * rd    %psr, %l0
+	 * rd    %wim, %l3
+	 * b     fill_window_entry
+	 * andcc %l0, PSR_PS, %g0
+	 */
+
+	/* Datum current_thread_info->uwinmask contains at all times a bitmask
+	 * where if any user windows are active, at least one bit will
+	 * be set in to mask.  If no user windows are active, the bitmask
+	 * will be all zeroes.
+	 */
+
+	/* To get an idea of what has just happened to cause this
+	 * trap take a look at this diagram:
+	 *
+	 *      1  2  3  4     <--  Window number
+	 *      ----------
+	 *      T  O  W  I     <--  Symbolic name
+	 *
+	 *      O == the window that execution was in when
+	 *           the restore was attempted
+	 *
+	 *      T == the trap itself has save'd us into this
+	 *           window
+	 *
+	 *      W == this window is the one which is now invalid
+	 *           and must be made valid plus loaded from the
+	 *           stack
+	 *
+	 *      I == this window will be the invalid one when we
+	 *           are done and return from trap if successful
+	 */
+
+	/* BEGINNING OF PATCH INSTRUCTIONS */
+
+	/* On 7-window Sparc the boot code patches fnwin_patch1
+	 * with the following instruction.
+	 */
+	.globl	fnwin_patch1_7win, fnwin_patch2_7win
+fnwin_patch1_7win:	srl	%t_wim, 6, %twin_tmp2
+fnwin_patch2_7win:	and	%twin_tmp1, 0x7f, %twin_tmp1
+	/* END OF PATCH INSTRUCTIONS */
+
+	.globl	fill_window_entry, fnwin_patch1, fnwin_patch2
+fill_window_entry:
+	/* LOCATION: Window 'T' */
+
+	/* Compute what the new %wim is going to be if we retrieve
+	 * the proper window off of the stack.
+	 */
+		sll	%t_wim, 1, %twin_tmp1
+fnwin_patch1:	srl	%t_wim, 7, %twin_tmp2
+		or	%twin_tmp1, %twin_tmp2, %twin_tmp1
+fnwin_patch2:	and	%twin_tmp1, 0xff, %twin_tmp1
+
+	wr	%twin_tmp1, 0x0, %wim	/* Make window 'I' invalid */
+
+	andcc	%t_psr, PSR_PS, %g0
+	be	fwin_from_user
+	 restore	%g0, %g0, %g0		/* Restore to window 'O' */
+
+	/* Trapped from kernel, we trust that the kernel does not
+	 * 'over restore' sorta speak and just grab the window
+	 * from the stack and return.  Easy enough.
+	 */
+fwin_from_kernel:
+	/* LOCATION: Window 'O' */
+
+	restore %g0, %g0, %g0
+
+	/* LOCATION: Window 'W' */
+
+	LOAD_WINDOW(sp)	                /* Load it up */
+
+	/* Spin the wheel... */
+	save	%g0, %g0, %g0
+	save	%g0, %g0, %g0
+	/* I'd like to buy a vowel please... */
+
+	/* LOCATION: Window 'T' */
+
+	/* Now preserve the condition codes in %psr, pause, and
+	 * return from trap.  This is the simplest case of all.
+	 */
+	wr	%t_psr, 0x0, %psr
+	WRITE_PAUSE
+
+	jmp	%t_pc
+	rett	%t_npc
+
+fwin_from_user:
+	/* LOCATION: Window 'O' */
+
+	restore	%g0, %g0, %g0		/* Restore to window 'W' */
+
+	/* LOCATION: Window 'W' */
+
+	/* Branch to the stack validation routine */
+	b	srmmu_fwin_stackchk
+	 andcc	%sp, 0x7, %g0
+
+#define STACK_OFFSET (THREAD_SIZE - TRACEREG_SZ - STACKFRAME_SZ)
+
+fwin_user_stack_is_bolixed:
+	/* LOCATION: Window 'W' */
+
+	/* Place a pt_regs frame on the kernel stack, save back
+	 * to the trap window and call c-code to deal with this.
+	 */
+	LOAD_CURRENT(l4, l5)
+
+	sethi	%hi(STACK_OFFSET), %l5
+	or	%l5, %lo(STACK_OFFSET), %l5
+	add	%l4, %l5, %l5
+
+	/* Store globals into pt_regs frame. */
+	STORE_PT_GLOBALS(l5)
+	STORE_PT_YREG(l5, g3)
+
+	/* Save current in a global while we change windows. */
+	mov	%l4, %curptr
+
+	save	%g0, %g0, %g0
+
+	/* LOCATION: Window 'O' */
+
+	rd	%psr, %g3		/* Read %psr in live user window */
+	mov	%fp, %g4		/* Save bogus frame pointer. */
+
+	save	%g0, %g0, %g0
+
+	/* LOCATION: Window 'T' */
+
+	sethi	%hi(STACK_OFFSET), %l5
+	or	%l5, %lo(STACK_OFFSET), %l5
+	add	%curptr, %l5, %sp
+
+	/* Build rest of pt_regs. */
+	STORE_PT_INS(sp)
+	STORE_PT_PRIV(sp, t_psr, t_pc, t_npc)
+
+	/* re-set trap time %wim value */
+	wr	%t_wim, 0x0, %wim
+
+	/* Fix users window mask and buffer save count. */
+	mov	0x1, %g5
+	sll	%g5, %g3, %g5
+	st	%g5, [%curptr + TI_UWINMASK]		! one live user window still
+	st	%g0, [%curptr + TI_W_SAVED]		! no windows in the buffer
+
+	wr	%t_psr, PSR_ET, %psr			! enable traps
+	nop
+	call	window_underflow_fault
+	 mov	%g4, %o0
+
+	b	ret_trap_entry
+	 clr	%l6
+
+fwin_user_stack_is_ok:
+	/* LOCATION: Window 'W' */
+
+	/* The users stack area is kosher and mapped, load the
+	 * window and fall through to the finish up routine.
+	 */
+	LOAD_WINDOW(sp)
+
+	/* Round and round she goes... */
+	save	%g0, %g0, %g0		/* Save to window 'O' */
+	save	%g0, %g0, %g0		/* Save to window 'T' */
+	/* Where she'll trap nobody knows... */
+
+	/* LOCATION: Window 'T' */
+
+fwin_user_finish_up:
+	/* LOCATION: Window 'T' */
+
+	wr	%t_psr, 0x0, %psr
+	WRITE_PAUSE	
+
+	jmp	%t_pc
+	rett	%t_npc
+
+	/* Here come the architecture specific checks for stack.
+	 * mappings.  Note that unlike the window overflow handler
+	 * we only need to check whether the user can read from
+	 * the appropriate addresses.  Also note that we are in
+	 * an invalid window which will be loaded, and this means
+	 * that until we actually load the window up we are free
+	 * to use any of the local registers contained within.
+	 *
+	 * On success these routine branch to fwin_user_stack_is_ok
+	 * if the area at %sp is user readable and the window still
+	 * needs to be loaded, else fwin_user_finish_up if the
+	 * routine has done the loading itself.  On failure (bogus
+	 * user stack) the routine shall branch to the label called
+	 * fwin_user_stack_is_bolixed.
+	 *
+	 * Contrary to the arch-specific window overflow stack
+	 * check routines in wof.S, these routines are free to use
+	 * any of the local registers they want to as this window
+	 * does not belong to anyone at this point, however the
+	 * outs and ins are still verboten as they are part of
+	 * 'someone elses' window possibly.
+	 */
+
+	.globl	srmmu_fwin_stackchk
+srmmu_fwin_stackchk:
+	/* LOCATION: Window 'W' */
+
+	/* Caller did 'andcc %sp, 0x7, %g0' */
+	bne	fwin_user_stack_is_bolixed
+	 sethi   %hi(PAGE_OFFSET), %l5
+
+	/* Check if the users stack is in kernel vma, then our
+	 * trial and error technique below would succeed for
+	 * the 'wrong' reason.
+	 */
+	mov	AC_M_SFSR, %l4
+	cmp	%l5, %sp
+	bleu	fwin_user_stack_is_bolixed
+LEON_PI( lda	[%l4] ASI_LEON_MMUREGS, %g0)	! clear fault status
+SUN_PI_( lda	[%l4] ASI_M_MMUREGS, %g0)	! clear fault status
+
+	/* The technique is, turn off faults on this processor,
+	 * just let the load rip, then check the sfsr to see if
+	 * a fault did occur.  Then we turn on fault traps again
+	 * and branch conditionally based upon what happened.
+	 */
+LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %l5)	! read mmu-ctrl reg
+SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %l5)	! read mmu-ctrl reg
+	or	%l5, 0x2, %l5			! turn on no-fault bit
+LEON_PI(sta	%l5, [%g0] ASI_LEON_MMUREGS)	! store it
+SUN_PI_(sta	%l5, [%g0] ASI_M_MMUREGS)	! store it
+
+	/* Cross fingers and go for it. */
+	LOAD_WINDOW(sp)
+
+	/* A penny 'saved'... */
+	save	%g0, %g0, %g0
+	save	%g0, %g0, %g0
+	/* Is a BADTRAP earned... */
+
+	/* LOCATION: Window 'T' */
+
+LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %twin_tmp1)	! load mmu-ctrl again
+SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %twin_tmp1)	! load mmu-ctrl again
+	andn	%twin_tmp1, 0x2, %twin_tmp1		! clear no-fault bit
+LEON_PI(sta	%twin_tmp1, [%g0] ASI_LEON_MMUREGS)	! store it
+SUN_PI_(sta	%twin_tmp1, [%g0] ASI_M_MMUREGS)	! store it
+
+	mov	AC_M_SFAR, %twin_tmp2
+LEON_PI(lda	[%twin_tmp2] ASI_LEON_MMUREGS, %g0)	! read fault address
+SUN_PI_(lda	[%twin_tmp2] ASI_M_MMUREGS, %g0)	! read fault address
+
+	mov	AC_M_SFSR, %twin_tmp2
+LEON_PI(lda	[%twin_tmp2] ASI_LEON_MMUREGS, %twin_tmp2) ! read fault status
+SUN_PI_(lda	[%twin_tmp2] ASI_M_MMUREGS, %twin_tmp2)	   ! read fault status
+	andcc	%twin_tmp2, 0x2, %g0			   ! did fault occur?
+
+	bne	1f					   ! yep, cleanup
+	 nop
+
+	wr	%t_psr, 0x0, %psr
+	nop
+	b	fwin_user_finish_up + 0x4
+	 nop
+
+	/* Did I ever tell you about my window lobotomy?
+	 * anyways... fwin_user_stack_is_bolixed expects
+	 * to be in window 'W' so make it happy or else
+	 * we watchdog badly.
+	 */
+1:
+	restore	%g0, %g0, %g0
+	b	fwin_user_stack_is_bolixed	! oh well
+	 restore	%g0, %g0, %g0
diff --git a/arch/sparc/lib/COPYING.LIB b/arch/sparc/lib/COPYING.LIB
new file mode 100644
index 0000000..eb685a5
--- /dev/null
+++ b/arch/sparc/lib/COPYING.LIB
@@ -0,0 +1,481 @@
+		  GNU LIBRARY GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1991 Free Software Foundation, Inc.
+                    675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the library GPL.  It is
+ numbered 2 because it goes with version 2 of the ordinary GPL.]
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Library General Public License, applies to some
+specially designated Free Software Foundation software, and to any
+other libraries whose authors decide to use it.  You can use it for
+your libraries, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if
+you distribute copies of the library, or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link a program with the library, you must provide
+complete object files to the recipients so that they can relink them
+with the library, after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  Our method of protecting your rights has two steps: (1) copyright
+the library, and (2) offer you this license which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  Also, for each distributor's protection, we want to make certain
+that everyone understands that there is no warranty for this free
+library.  If the library is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original
+version, so that any problems introduced by others will not reflect on
+the original authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that companies distributing free
+software will individually obtain patent licenses, thus in effect
+transforming the program into proprietary software.  To prevent this,
+we have made it clear that any patent must be licensed for everyone's
+free use or not licensed at all.
+
+  Most GNU software, including some libraries, is covered by the ordinary
+GNU General Public License, which was designed for utility programs.  This
+license, the GNU Library General Public License, applies to certain
+designated libraries.  This license is quite different from the ordinary
+one; be sure to read it in full, and don't assume that anything in it is
+the same as in the ordinary license.
+
+  The reason we have a separate public license for some libraries is that
+they blur the distinction we usually make between modifying or adding to a
+program and simply using it.  Linking a program with a library, without
+changing the library, is in some sense simply using the library, and is
+analogous to running a utility program or application program.  However, in
+a textual and legal sense, the linked executable is a combined work, a
+derivative of the original library, and the ordinary General Public License
+treats it as such.
+
+  Because of this blurred distinction, using the ordinary General
+Public License for libraries did not effectively promote software
+sharing, because most developers did not use the libraries.  We
+concluded that weaker conditions might promote sharing better.
+
+  However, unrestricted linking of non-free programs would deprive the
+users of those programs of all benefit from the free status of the
+libraries themselves.  This Library General Public License is intended to
+permit developers of non-free programs to use free libraries, while
+preserving your freedom as a user of such programs to change the free
+libraries that are incorporated in them.  (We have not seen how to achieve
+this as regards changes in header files, but we have achieved it as regards
+changes in the actual functions of the Library.)  The hope is that this
+will lead to faster development of free libraries.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, while the latter only
+works together with the library.
+
+  Note that it is possible for a library to be covered by the ordinary
+General Public License rather than by this special one.
+
+		  GNU LIBRARY GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library which
+contains a notice placed by the copyright holder or other authorized
+party saying it may be distributed under the terms of this Library
+General Public License (also called "this License").  Each licensee is
+addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+  
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also compile or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    c) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    d) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the source code distributed need not include anything that is normally
+distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Library General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+			    NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+     Appendix: How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/arch/sparc/lib/GENbzero.S b/arch/sparc/lib/GENbzero.S
new file mode 100644
index 0000000..63d6188
--- /dev/null
+++ b/arch/sparc/lib/GENbzero.S
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* GENbzero.S: Generic sparc64 memset/clear_user.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+#include <asm/asi.h>
+
+#define EX_ST(x,y)		\
+98:	x,y;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_o1_asi;\
+	.text;			\
+	.align 4;
+
+	.align	32
+	.text
+
+	.globl		GENmemset
+	.type		GENmemset, #function
+GENmemset:		/* %o0=buf, %o1=pat, %o2=len */
+	and		%o1, 0xff, %o3
+	mov		%o2, %o1
+	sllx		%o3, 8, %g1
+	or		%g1, %o3, %o2
+	sllx		%o2, 16, %g1
+	or		%g1, %o2, %o2
+	sllx		%o2, 32, %g1
+	ba,pt		%xcc, 1f
+	 or		%g1, %o2, %o2
+
+	.globl		GENbzero
+	.type		GENbzero, #function
+GENbzero:
+	clr		%o2
+1:	brz,pn		%o1, GENbzero_return
+	 mov		%o0, %o3
+
+	/* %o5: saved %asi, restored at GENbzero_done
+	 * %o4:	store %asi to use
+	 */
+	rd		%asi, %o5
+	mov		ASI_P, %o4
+	wr		%o4, 0x0, %asi
+
+GENbzero_from_clear_user:
+	cmp		%o1, 15
+	bl,pn		%icc, GENbzero_tiny
+	 andcc		%o0, 0x7, %g1
+	be,pt		%xcc, 2f
+	 mov		8, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 1, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 1, %o0
+2:	cmp		%o1, 128
+	bl,pn		%icc, GENbzero_medium
+	 andcc		%o0, (64 - 1), %g1
+	be,pt		%xcc, GENbzero_pre_loop
+	 mov		64, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 8, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 8, %o0
+
+GENbzero_pre_loop:
+	andn		%o1, (64 - 1), %g1
+	sub		%o1, %g1, %o1
+GENbzero_loop:
+	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x08] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x10] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x18] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x20] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x28] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x30] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x38] %asi)
+	subcc		%g1, 64, %g1
+	bne,pt		%xcc, GENbzero_loop
+	 add		%o0, 64, %o0
+
+	membar		#Sync
+	wr		%o4, 0x0, %asi
+	brz,pn		%o1, GENbzero_done
+GENbzero_medium:
+	 andncc		%o1, 0x7, %g1
+	be,pn		%xcc, 2f
+	 sub		%o1, %g1, %o1
+1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 8, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 8, %o0
+2:	brz,pt		%o1, GENbzero_done
+	 nop
+
+GENbzero_tiny:
+1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
+	subcc		%o1, 1, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+
+	/* fallthrough */
+
+GENbzero_done:
+	wr		%o5, 0x0, %asi
+
+GENbzero_return:
+	retl
+	 mov		%o3, %o0
+	.size		GENbzero, .-GENbzero
+	.size		GENmemset, .-GENmemset
+
+	.globl		GENclear_user
+	.type		GENclear_user, #function
+GENclear_user:		/* %o0=buf, %o1=len */
+	rd		%asi, %o5
+	brz,pn		%o1, GENbzero_done
+	 clr		%o3
+	cmp		%o5, ASI_AIUS
+	bne,pn		%icc, GENbzero
+	 clr		%o2
+	ba,pt		%xcc, GENbzero_from_clear_user
+	 mov		ASI_AIUS, %o4
+	.size		GENclear_user, .-GENclear_user
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define GEN_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	generic_patch_bzero
+	.type	generic_patch_bzero,#function
+generic_patch_bzero:
+	GEN_DO_PATCH(memset, GENmemset)
+	GEN_DO_PATCH(__bzero, GENbzero)
+	GEN_DO_PATCH(__clear_user, GENclear_user)
+	retl
+	 nop
+	.size	generic_patch_bzero,.-generic_patch_bzero
diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S
new file mode 100644
index 0000000..6891a56
--- /dev/null
+++ b/arch/sparc/lib/GENcopy_from_user.S
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* GENcopy_from_user.S: Generic sparc64 copy from userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		GENcopy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] ASI_AIUS, dest
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "GENmemcpy.S"
diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S
new file mode 100644
index 0000000..df75b53
--- /dev/null
+++ b/arch/sparc/lib/GENcopy_to_user.S
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* GENcopy_to_user.S: Generic sparc64 copy to userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		GENcopy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "GENmemcpy.S"
diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S
new file mode 100644
index 0000000..114340a
--- /dev/null
+++ b/arch/sparc/lib/GENmemcpy.S
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* GENmemcpy.S: Generic sparc64 memcpy.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#include <linux/linkage.h>
+#define GLOBAL_SPARE	%g7
+#else
+#define GLOBAL_SPARE	%g5
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x,y)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x,y)	x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr)	type src, [addr]
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	GENmemcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	.text
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+ENTRY(GEN_retl_o4_1)
+	add	%o4, %o2, %o4
+	retl
+	 add	%o4, 1, %o0
+ENDPROC(GEN_retl_o4_1)
+ENTRY(GEN_retl_g1_8)
+	add	%g1, %o2, %g1
+	retl
+	 add	%g1, 8, %o0
+ENDPROC(GEN_retl_g1_8)
+ENTRY(GEN_retl_o2_4)
+	retl
+	 add	%o2, 4, %o0
+ENDPROC(GEN_retl_o2_4)
+ENTRY(GEN_retl_o2_1)
+	retl
+	 add	%o2, 1, %o0
+ENDPROC(GEN_retl_o2_1)
+#endif
+
+	.align		64
+
+	.globl	FUNC_NAME
+	.type	FUNC_NAME,#function
+FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
+	srlx		%o2, 31, %g2
+	cmp		%g2, 0
+	tne		%XCC, 5
+	PREAMBLE
+	mov		%o0, GLOBAL_SPARE
+
+	cmp		%o2, 0
+	be,pn		%XCC, 85f
+	 or		%o0, %o1, %o3
+	cmp		%o2, 16
+	blu,a,pn	%XCC, 80f
+	 or		%o3, %o2, %o3
+
+	xor		%o0, %o1, %o4
+	andcc		%o4, 0x7, %g0
+	bne,a,pn	%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+	and		%o0, 0x7, %o4
+	sub		%o4, 0x8, %o4
+	sub		%g0, %o4, %o4
+	sub		%o2, %o4, %o2
+1:	subcc		%o4, 1, %o4
+	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
+	EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
+	add		%o1, 1, %o1
+	bne,pt		%XCC, 1b
+	add		%o0, 1, %o0
+
+	andn		%o2, 0x7, %g1
+	sub		%o2, %g1, %o2
+1:	subcc		%g1, 0x8, %g1
+	EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
+	EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
+	add		%o1, 0x8, %o1
+	bne,pt		%XCC, 1b
+	 add		%o0, 0x8, %o0
+
+	brz,pt		%o2, 85f
+	 sub		%o0, %o1, %o3
+	ba,a,pt		%XCC, 90f
+
+	.align		64
+80: /* 0 < len <= 16 */
+	andcc		%o3, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+1:
+	subcc		%o2, 4, %o2
+	EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
+	EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 4, %o1
+
+85:	retl
+	 mov		EX_RETVAL(GLOBAL_SPARE), %o0
+
+	.align		32
+90:
+	subcc		%o2, 1, %o2
+	EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
+	EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
+	bgu,pt		%XCC, 90b
+	 add		%o1, 1, %o1
+	retl
+	 mov		EX_RETVAL(GLOBAL_SPARE), %o0
+
+	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/GENpage.S b/arch/sparc/lib/GENpage.S
new file mode 100644
index 0000000..c143c4d
--- /dev/null
+++ b/arch/sparc/lib/GENpage.S
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* GENpage.S: Generic clear and copy page.
+ *
+ * Copyright (C) 2007 (davem@davemloft.net)
+ */
+#include <asm/page.h>
+
+	.text
+	.align	32
+
+GENcopy_user_page:
+	set	PAGE_SIZE, %g7
+1:	ldx	[%o1 + 0x00], %o2
+	ldx	[%o1 + 0x08], %o3
+	ldx	[%o1 + 0x10], %o4
+	ldx	[%o1 + 0x18], %o5
+	stx	%o2, [%o0 + 0x00]
+	stx	%o3, [%o0 + 0x08]
+	stx	%o4, [%o0 + 0x10]
+	stx	%o5, [%o0 + 0x18]
+	ldx	[%o1 + 0x20], %o2
+	ldx	[%o1 + 0x28], %o3
+	ldx	[%o1 + 0x30], %o4
+	ldx	[%o1 + 0x38], %o5
+	stx	%o2, [%o0 + 0x20]
+	stx	%o3, [%o0 + 0x28]
+	stx	%o4, [%o0 + 0x30]
+	stx	%o5, [%o0 + 0x38]
+	subcc	%g7, 64, %g7
+	add	%o1, 64, %o1
+	bne,pt	%xcc, 1b
+	 add	%o0, 64, %o0
+	retl
+	 nop
+
+GENclear_page:
+GENclear_user_page:
+	set	PAGE_SIZE, %g7
+1:	stx	%g0, [%o0 + 0x00]
+	stx	%g0, [%o0 + 0x08]
+	stx	%g0, [%o0 + 0x10]
+	stx	%g0, [%o0 + 0x18]
+	stx	%g0, [%o0 + 0x20]
+	stx	%g0, [%o0 + 0x28]
+	stx	%g0, [%o0 + 0x30]
+	stx	%g0, [%o0 + 0x38]
+	subcc	%g7, 64, %g7
+	bne,pt	%xcc, 1b
+	 add	%o0, 64, %o0
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define GEN_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	generic_patch_pageops
+	.type	generic_patch_pageops,#function
+generic_patch_pageops:
+	GEN_DO_PATCH(copy_user_page, GENcopy_user_page)
+	GEN_DO_PATCH(_clear_page, GENclear_page)
+	GEN_DO_PATCH(clear_user_page, GENclear_user_page)
+	retl
+	 nop
+	.size	generic_patch_pageops,.-generic_patch_pageops
diff --git a/arch/sparc/lib/GENpatch.S b/arch/sparc/lib/GENpatch.S
new file mode 100644
index 0000000..1ec1f02
--- /dev/null
+++ b/arch/sparc/lib/GENpatch.S
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* GENpatch.S: Patch Ultra-I routines with generic variant.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define GEN_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	generic_patch_copyops
+	.type	generic_patch_copyops,#function
+generic_patch_copyops:
+	GEN_DO_PATCH(memcpy, GENmemcpy)
+	GEN_DO_PATCH(raw_copy_from_user, GENcopy_from_user)
+	GEN_DO_PATCH(raw_copy_to_user, GENcopy_to_user)
+	retl
+	 nop
+	.size	generic_patch_copyops,.-generic_patch_copyops
diff --git a/arch/sparc/lib/M7copy_from_user.S b/arch/sparc/lib/M7copy_from_user.S
new file mode 100644
index 0000000..66464b3
--- /dev/null
+++ b/arch/sparc/lib/M7copy_from_user.S
@@ -0,0 +1,40 @@
+/*
+ * M7copy_from_user.S: SPARC M7 optimized copy from userspace.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+#define EX_LD(x, y)			\
+98:	x;				\
+	.section __ex_table,"a";	\
+	.align 4;			\
+	.word 98b, y;			\
+	.text;				\
+	.align 4;
+
+#define EX_LD_FP(x, y)			\
+98:	x;				\
+	.section __ex_table,"a";	\
+	.align 4;			\
+	.word 98b, y##_fp;		\
+	.text;				\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		M7copy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7copy_to_user.S b/arch/sparc/lib/M7copy_to_user.S
new file mode 100644
index 0000000..a60ac46
--- /dev/null
+++ b/arch/sparc/lib/M7copy_to_user.S
@@ -0,0 +1,51 @@
+/*
+ * M7copy_to_user.S: SPARC M7 optimized copy to userspace.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+
+#define EX_ST(x, y)			\
+98:	x;				\
+	.section __ex_table,"a";	\
+	.align 4;			\
+	.word 98b, y;			\
+	.text;				\
+	.align 4;
+
+#define EX_ST_FP(x, y)			\
+98:	x;				\
+	.section __ex_table,"a";	\
+	.align 4;			\
+	.word 98b, y##_fp;		\
+	.text;				\
+	.align 4;
+
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
+#endif
+
+#define FUNC_NAME		M7copy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] %asi
+#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
+#define	STORE_MRU_ASI		ASI_ST_BLKINIT_MRU_S
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "M7memcpy.S"
diff --git a/arch/sparc/lib/M7memcpy.S b/arch/sparc/lib/M7memcpy.S
new file mode 100644
index 0000000..cbd42ea
--- /dev/null
+++ b/arch/sparc/lib/M7memcpy.S
@@ -0,0 +1,923 @@
+/*
+ * M7memcpy: Optimized SPARC M7 memcpy
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ */
+
+	.file	"M7memcpy.S"
+
+/*
+ * memcpy(s1, s2, len)
+ *
+ * Copy s2 to s1, always copy n bytes.
+ * Note: this C code does not work for overlapped copies.
+ *
+ * Fast assembler language version of the following C-program for memcpy
+ * which represents the `standard' for the C-library.
+ *
+ *	void *
+ *	memcpy(void *s, const void *s0, size_t n)
+ *	{
+ *		if (n != 0) {
+ *		    char *s1 = s;
+ *		    const char *s2 = s0;
+ *		    do {
+ *			*s1++ = *s2++;
+ *		    } while (--n != 0);
+ *		}
+ *		return (s);
+ *	}
+ *
+ *
+ * SPARC T7/M7 Flow :
+ *
+ * if (count < SMALL_MAX) {
+ *   if count < SHORTCOPY              (SHORTCOPY=3)
+ *	copy bytes; exit with dst addr
+ *   if src & dst aligned on word boundary but not long word boundary,
+ *     copy with ldw/stw; branch to finish_up
+ *   if src & dst aligned on long word boundary
+ *     copy with ldx/stx; branch to finish_up
+ *   if src & dst not aligned and length <= SHORTCHECK   (SHORTCHECK=14)
+ *     copy bytes; exit with dst addr
+ *   move enough bytes to get src to word boundary
+ *   if dst now on word boundary
+ * move_words:
+ *     copy words; branch to finish_up
+ *   if dst now on half word boundary
+ *     load words, shift half words, store words; branch to finish_up
+ *   if dst on byte 1
+ *     load words, shift 3 bytes, store words; branch to finish_up
+ *   if dst on byte 3
+ *     load words, shift 1 byte, store words; branch to finish_up
+ * finish_up:
+ *     copy bytes; exit with dst addr
+ * } else {                                         More than SMALL_MAX bytes
+ *   move bytes until dst is on long word boundary
+ *   if( src is on long word boundary ) {
+ *     if (count < MED_MAX) {
+ * finish_long:					   src/dst aligned on 8 bytes
+ *       copy with ldx/stx in 8-way unrolled loop;
+ *       copy final 0-63 bytes; exit with dst addr
+ *     } else {				     src/dst aligned; count > MED_MAX
+ *       align dst on 64 byte boundary; for main data movement:
+ *       prefetch src data to L2 cache; let HW prefetch move data to L1 cache
+ *       Use BIS (block initializing store) to avoid copying store cache
+ *       lines from memory. But pre-store first element of each cache line
+ *       ST_CHUNK lines in advance of the rest of that cache line. That
+ *       gives time for replacement cache lines to be written back without
+ *       excess STQ and Miss Buffer filling. Repeat until near the end,
+ *       then finish up storing before going to finish_long.
+ *     }
+ *   } else {                                   src/dst not aligned on 8 bytes
+ *     if src is word aligned and count < MED_WMAX
+ *       move words in 8-way unrolled loop
+ *       move final 0-31 bytes; exit with dst addr
+ *     if count < MED_UMAX
+ *       use alignaddr/faligndata combined with ldd/std in 8-way
+ *       unrolled loop to move data.
+ *       go to unalign_done
+ *     else
+ *       setup alignaddr for faligndata instructions
+ *       align dst on 64 byte boundary; prefetch src data to L1 cache
+ *       loadx8, falign, block-store, prefetch loop
+ *	 (only use block-init-store when src/dst on 8 byte boundaries.)
+ * unalign_done:
+ *       move remaining bytes for unaligned cases. exit with dst addr.
+ * }
+ *
+ */
+
+#include <asm/visasm.h>
+#include <asm/asi.h>
+
+#if !defined(EX_LD) && !defined(EX_ST)
+#define NON_USER_COPY
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x,y)	x
+#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x,y)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x,y)	x
+#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x,y)	x
+#endif
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)    x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr)	type src, [addr]
+#endif
+
+/*
+ * ASI_BLK_INIT_QUAD_LDD_P/ASI_BLK_INIT_QUAD_LDD_S marks the cache
+ * line as "least recently used" which means if many threads are
+ * active, it has a high probability of being pushed out of the cache
+ * between the first initializing store and the final stores.
+ * Thus, we use ASI_ST_BLKINIT_MRU_P/ASI_ST_BLKINIT_MRU_S which
+ * marks the cache line as "most recently used" for all
+ * but the last cache line
+ */
+#ifndef STORE_ASI
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
+#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
+#else
+#define STORE_ASI	0x80		/* ASI_P */
+#endif
+#endif
+
+#ifndef STORE_MRU_ASI
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
+#define STORE_MRU_ASI	ASI_ST_BLKINIT_MRU_P
+#else
+#define STORE_MRU_ASI	0x80		/* ASI_P */
+#endif
+#endif
+
+#ifndef STORE_INIT
+#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
+#endif
+
+#ifndef STORE_INIT_MRU
+#define STORE_INIT_MRU(src,addr)	stxa src, [addr] STORE_MRU_ASI
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	M7memcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#define	BLOCK_SIZE	64
+#define	SHORTCOPY	3
+#define	SHORTCHECK	14
+#define	SHORT_LONG	64	/* max copy for short longword-aligned case */
+				/* must be at least 64 */
+#define	SMALL_MAX	128
+#define	MED_UMAX	1024	/* max copy for medium un-aligned case */
+#define	MED_WMAX	1024	/* max copy for medium word-aligned case */
+#define	MED_MAX		1024	/* max copy for medium longword-aligned case */
+#define ST_CHUNK	24	/* ST_CHUNK - block of values for BIS Store */
+#define ALIGN_PRE	24	/* distance for aligned prefetch loop */
+
+	.register	%g2,#scratch
+
+	.section	".text"
+	.global		FUNC_NAME
+	.type		FUNC_NAME, #function
+	.align		16
+FUNC_NAME:
+	srlx            %o2, 31, %g2
+	cmp             %g2, 0
+	tne             %xcc, 5
+	PREAMBLE
+	mov		%o0, %g1	! save %o0
+	brz,pn          %o2, .Lsmallx
+	 cmp            %o2, 3
+	ble,pn          %icc, .Ltiny_cp
+	 cmp            %o2, 19
+	ble,pn          %icc, .Lsmall_cp
+	 or             %o0, %o1, %g2
+	cmp             %o2, SMALL_MAX
+	bl,pn           %icc, .Lmedium_cp
+	 nop
+
+.Lmedium:
+	neg	%o0, %o5
+	andcc	%o5, 7, %o5		! bytes till DST 8 byte aligned
+	brz,pt	%o5, .Ldst_aligned_on_8
+
+	! %o5 has the bytes to be written in partial store.
+	 sub	%o2, %o5, %o2
+	sub	%o1, %o0, %o1		! %o1 gets the difference
+7:					! dst aligning loop
+	add	%o1, %o0, %o4
+	EX_LD(LOAD(ldub, %o4, %o4), memcpy_retl_o2_plus_o5)	! load one byte
+	subcc	%o5, 1, %o5
+	EX_ST(STORE(stb, %o4, %o0), memcpy_retl_o2_plus_o5_plus_1)
+	bgu,pt	%xcc, 7b
+	 add	%o0, 1, %o0		! advance dst
+	add	%o1, %o0, %o1		! restore %o1
+.Ldst_aligned_on_8:
+	andcc	%o1, 7, %o5
+	brnz,pt	%o5, .Lsrc_dst_unaligned_on_8
+	 nop
+
+.Lsrc_dst_aligned_on_8:
+	! check if we are copying MED_MAX or more bytes
+	set MED_MAX, %o3
+	cmp %o2, %o3 			! limit to store buffer size
+	bgu,pn	%xcc, .Llarge_align8_copy
+	 nop
+
+/*
+ * Special case for handling when src and dest are both long word aligned
+ * and total data to move is less than MED_MAX bytes
+ */
+.Lmedlong:
+	subcc	%o2, 63, %o2		! adjust length to allow cc test
+	ble,pn	%xcc, .Lmedl63		! skip big loop if less than 64 bytes
+	 nop
+.Lmedl64:
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_63)	! load
+	subcc	%o2, 64, %o2		! decrement length count
+	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_63_64)	! and store
+	EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_63_56)	! a block of 64
+	EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_63_56)
+	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_63_48)
+	EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_63_48)
+	EX_LD(LOAD(ldx, %o1+24, %o3), memcpy_retl_o2_plus_63_40)
+	EX_ST(STORE(stx, %o3, %o0+24), memcpy_retl_o2_plus_63_40)
+	EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_63_32)! load and store
+	EX_ST(STORE(stx, %o4, %o0+32), memcpy_retl_o2_plus_63_32)
+	EX_LD(LOAD(ldx, %o1+40, %o3), memcpy_retl_o2_plus_63_24)! a block of 64
+	add	%o1, 64, %o1		! increase src ptr by 64
+	EX_ST(STORE(stx, %o3, %o0+40), memcpy_retl_o2_plus_63_24)
+	EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_63_16)
+	add	%o0, 64, %o0		! increase dst ptr by 64
+	EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_63_16)
+	EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_63_8)
+	bgu,pt	%xcc, .Lmedl64		! repeat if at least 64 bytes left
+	 EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_63_8)
+.Lmedl63:
+	addcc	%o2, 32, %o2		! adjust remaining count
+	ble,pt	%xcc, .Lmedl31		! to skip if 31 or fewer bytes left
+	 nop
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_31)	! load
+	sub	%o2, 32, %o2		! decrement length count
+	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_31_32)	! and store
+	EX_LD(LOAD(ldx, %o1+8, %o3), memcpy_retl_o2_plus_31_24)	! a block of 32
+	add	%o1, 32, %o1		! increase src ptr by 32
+	EX_ST(STORE(stx, %o3, %o0+8), memcpy_retl_o2_plus_31_24)
+	EX_LD(LOAD(ldx, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
+	add	%o0, 32, %o0		! increase dst ptr by 32
+	EX_ST(STORE(stx, %o4, %o0-16), memcpy_retl_o2_plus_31_16)
+	EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_31_8)
+	EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_31_8)
+.Lmedl31:
+	addcc	%o2, 16, %o2		! adjust remaining count
+	ble,pt	%xcc, .Lmedl15		! skip if 15 or fewer bytes left
+	 nop				!
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_15)
+	add	%o1, 16, %o1		! increase src ptr by 16
+	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_15)
+	sub	%o2, 16, %o2		! decrease count by 16
+	EX_LD(LOAD(ldx, %o1-8, %o3), memcpy_retl_o2_plus_15_8)
+	add	%o0, 16, %o0		! increase dst ptr by 16
+	EX_ST(STORE(stx, %o3, %o0-8), memcpy_retl_o2_plus_15_8)
+.Lmedl15:
+	addcc	%o2, 15, %o2		! restore count
+	bz,pt	%xcc, .Lsmallx	! exit if finished
+	 cmp	%o2, 8
+	blt,pt	%xcc, .Lmedw7		! skip if 7 or fewer bytes left
+	 tst	%o2
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)	! load 8 bytes
+	add	%o1, 8, %o1		! increase src ptr by 8
+	add	%o0, 8, %o0		! increase dst ptr by 8
+	subcc	%o2, 8, %o2		! decrease count by 8
+	bnz,pn	%xcc, .Lmedw7
+	 EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)	! and store 8
+	retl
+	 mov	EX_RETVAL(%g1), %o0	! restore %o0
+
+	.align 16
+.Lsrc_dst_unaligned_on_8:
+	! DST is 8-byte aligned, src is not
+2:
+	andcc	%o1, 0x3, %o5		! test word alignment
+	bnz,pt	%xcc, .Lunalignsetup	! branch to skip if not word aligned
+	 nop
+
+/*
+ * Handle all cases where src and dest are aligned on word
+ * boundaries. Use unrolled loops for better performance.
+ * This option wins over standard large data move when
+ * source and destination is in cache for.Lmedium
+ * to short data moves.
+ */
+	set MED_WMAX, %o3
+	cmp %o2, %o3 			! limit to store buffer size
+	bge,pt	%xcc, .Lunalignrejoin	! otherwise rejoin main loop
+	 nop
+
+	subcc	%o2, 31, %o2		! adjust length to allow cc test
+					! for end of loop
+	ble,pt	%xcc, .Lmedw31		! skip big loop if less than 16
+.Lmedw32:
+	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_31)! move a block of 32
+	sllx	%o4, 32, %o5
+	EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_31)
+	or	%o4, %o5, %o5
+	EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_31)
+	subcc	%o2, 32, %o2		! decrement length count
+	EX_LD(LOAD(ld, %o1+8, %o4), memcpy_retl_o2_plus_31_24)
+	sllx	%o4, 32, %o5
+	EX_LD(LOAD(ld, %o1+12, %o4), memcpy_retl_o2_plus_31_24)
+	or	%o4, %o5, %o5
+	EX_ST(STORE(stx, %o5, %o0+8), memcpy_retl_o2_plus_31_24)
+	add	%o1, 32, %o1		! increase src ptr by 32
+	EX_LD(LOAD(ld, %o1-16, %o4), memcpy_retl_o2_plus_31_16)
+	sllx	%o4, 32, %o5
+	EX_LD(LOAD(ld, %o1-12, %o4), memcpy_retl_o2_plus_31_16)
+	or	%o4, %o5, %o5
+	EX_ST(STORE(stx, %o5, %o0+16), memcpy_retl_o2_plus_31_16)
+	add	%o0, 32, %o0		! increase dst ptr by 32
+	EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_31_8)
+	sllx	%o4, 32, %o5
+	EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_31_8)
+	or	%o4, %o5, %o5
+	bgu,pt	%xcc, .Lmedw32		! repeat if at least 32 bytes left
+	 EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_31_8)
+.Lmedw31:
+	addcc	%o2, 31, %o2		! restore count
+
+	bz,pt	%xcc, .Lsmallx	! exit if finished
+	 nop
+	cmp	%o2, 16
+	blt,pt	%xcc, .Lmedw15
+	 nop
+	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)! move a block of 16 bytes
+	sllx	%o4, 32, %o5
+	subcc	%o2, 16, %o2		! decrement length count
+	EX_LD(LOAD(ld, %o1+4, %o4), memcpy_retl_o2_plus_16)
+	or	%o4, %o5, %o5
+	EX_ST(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_16)
+	add	%o1, 16, %o1		! increase src ptr by 16
+	EX_LD(LOAD(ld, %o1-8, %o4), memcpy_retl_o2_plus_8)
+	add	%o0, 16, %o0		! increase dst ptr by 16
+	sllx	%o4, 32, %o5
+	EX_LD(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_8)
+	or	%o4, %o5, %o5
+	EX_ST(STORE(stx, %o5, %o0-8), memcpy_retl_o2_plus_8)
+.Lmedw15:
+	bz,pt	%xcc, .Lsmallx	! exit if finished
+	 cmp	%o2, 8
+	blt,pn	%xcc, .Lmedw7		! skip if 7 or fewer bytes left
+	 tst	%o2
+	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)	! load 4 bytes
+	subcc	%o2, 8, %o2		! decrease count by 8
+	EX_ST(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_8)! and store 4 bytes
+	add	%o1, 8, %o1		! increase src ptr by 8
+	EX_LD(LOAD(ld, %o1-4, %o3), memcpy_retl_o2_plus_4)	! load 4 bytes
+	add	%o0, 8, %o0		! increase dst ptr by 8
+	EX_ST(STORE(stw, %o3, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
+	bz,pt	%xcc, .Lsmallx	! exit if finished
+.Lmedw7:				! count is ge 1, less than 8
+	cmp	%o2, 4			! check for 4 bytes left
+	blt,pn	%xcc, .Lsmallleft3	! skip if 3 or fewer bytes left
+	 nop				!
+	EX_LD(LOAD(ld, %o1, %o4), memcpy_retl_o2)	! load 4 bytes
+	add	%o1, 4, %o1		! increase src ptr by 4
+	add	%o0, 4, %o0		! increase dst ptr by 4
+	subcc	%o2, 4, %o2		! decrease count by 4
+	bnz	.Lsmallleft3
+	 EX_ST(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_4)! and store 4 bytes
+	retl
+	 mov	EX_RETVAL(%g1), %o0
+
+	.align 16
+.Llarge_align8_copy:			! Src and dst share 8 byte alignment
+	! align dst to 64 byte boundary
+	andcc	%o0, 0x3f, %o3		! %o3 == 0 means dst is 64 byte aligned
+	brz,pn	%o3, .Laligned_to_64
+	 andcc	%o0, 8, %o3		! odd long words to move?
+	brz,pt	%o3, .Laligned_to_16
+	 nop
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)
+	sub	%o2, 8, %o2
+	add	%o1, 8, %o1		! increment src ptr
+	add	%o0, 8, %o0		! increment dst ptr
+	EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
+.Laligned_to_16:
+	andcc	%o0, 16, %o3		! pair of long words to move?
+	brz,pt	%o3, .Laligned_to_32
+	 nop
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)
+	sub	%o2, 16, %o2
+	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_16)
+	add	%o1, 16, %o1		! increment src ptr
+	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
+	add	%o0, 16, %o0		! increment dst ptr
+	EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
+.Laligned_to_32:
+	andcc	%o0, 32, %o3		! four long words to move?
+	brz,pt	%o3, .Laligned_to_64
+	 nop
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2)
+	sub	%o2, 32, %o2
+	EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_32)
+	EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_24)
+	EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_24)
+	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_16)
+	EX_ST(STORE(stx, %o4, %o0+16), memcpy_retl_o2_plus_16)
+	add	%o1, 32, %o1		! increment src ptr
+	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_8)
+	add	%o0, 32, %o0		! increment dst ptr
+	EX_ST(STORE(stx, %o4, %o0-8), memcpy_retl_o2_plus_8)
+.Laligned_to_64:
+!
+!	Using block init store (BIS) instructions to avoid fetching cache
+!	lines from memory. Use ST_CHUNK stores to first element of each cache
+!	line (similar to prefetching) to avoid overfilling STQ or miss buffers.
+!	Gives existing cache lines time to be moved out of L1/L2/L3 cache.
+!	Initial stores using MRU version of BIS to keep cache line in
+!	cache until we are ready to store final element of cache line.
+!	Then store last element using the LRU version of BIS.
+!
+	andn	%o2, 0x3f, %o5		! %o5 is multiple of block size
+	and	%o2, 0x3f, %o2		! residue bytes in %o2
+!
+!	We use STORE_MRU_ASI for the first seven stores to each cache line
+!	followed by STORE_ASI (mark as LRU) for the last store. That
+!	mixed approach reduces the probability that the cache line is removed
+!	before we finish setting it, while minimizing the effects on
+!	other cached values during a large memcpy
+!
+!	ST_CHUNK batches up initial BIS operations for several cache lines
+!	to allow multiple requests to not be blocked by overflowing the
+!	the store miss buffer. Then the matching stores for all those
+!	BIS operations are executed.
+!
+
+	sub	%o0, 8, %o0		! adjust %o0 for ASI alignment
+.Lalign_loop:
+	cmp	%o5, ST_CHUNK*64
+	blu,pt	%xcc, .Lalign_loop_fin
+	 mov	ST_CHUNK,%o3
+.Lalign_loop_start:
+	prefetch [%o1 + (ALIGN_PRE * BLOCK_SIZE)], 21
+	subcc	%o3, 1, %o3
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5)
+	add	%o1, 64, %o1
+	add	%o0, 8, %o0
+	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
+	bgu	%xcc,.Lalign_loop_start
+	 add	%o0, 56, %o0
+
+	mov	ST_CHUNK,%o3
+	sllx	%o3, 6, %o4		! ST_CHUNK*64
+	sub	%o1, %o4, %o1		! reset %o1
+	sub	%o0, %o4, %o0		! reset %o0
+
+.Lalign_loop_rest:
+	EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5)
+	add	%o0, 16, %o0
+	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5)
+	add	%o0, 8, %o0
+	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
+	subcc	%o3, 1, %o3
+	EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5)
+	add	%o0, 8, %o0
+	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5)
+	add	%o0, 8, %o0
+	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5)
+	add	%o0, 8, %o0
+	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5)
+	add	%o1, 64, %o1
+	add	%o0, 8, %o0
+	EX_ST(STORE_INIT_MRU(%o4, %o0), memcpy_retl_o2_plus_o5)
+	add	%o0, 8, %o0
+	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5)
+	sub	%o5, 64, %o5
+	bgu	%xcc,.Lalign_loop_rest
+	! mark cache line as LRU
+	 EX_ST(STORE_INIT(%o4, %o0), memcpy_retl_o2_plus_o5_plus_64)
+
+	cmp	%o5, ST_CHUNK*64
+	bgu,pt	%xcc, .Lalign_loop_start
+	 mov	ST_CHUNK,%o3
+
+	cmp	%o5, 0
+	beq	.Lalign_done
+	 nop
+.Lalign_loop_fin:
+	EX_LD(LOAD(ldx, %o1, %o4), memcpy_retl_o2_plus_o5)
+	EX_ST(STORE(stx, %o4, %o0+8), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1+8, %o4), memcpy_retl_o2_plus_o5)
+	EX_ST(STORE(stx, %o4, %o0+8+8), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1+16, %o4), memcpy_retl_o2_plus_o5)
+	EX_ST(STORE(stx, %o4, %o0+8+16), memcpy_retl_o2_plus_o5)
+	subcc	%o5, 64, %o5
+	EX_LD(LOAD(ldx, %o1+24, %o4), memcpy_retl_o2_plus_o5_64)
+	EX_ST(STORE(stx, %o4, %o0+8+24), memcpy_retl_o2_plus_o5_64)
+	EX_LD(LOAD(ldx, %o1+32, %o4), memcpy_retl_o2_plus_o5_64)
+	EX_ST(STORE(stx, %o4, %o0+8+32), memcpy_retl_o2_plus_o5_64)
+	EX_LD(LOAD(ldx, %o1+40, %o4), memcpy_retl_o2_plus_o5_64)
+	EX_ST(STORE(stx, %o4, %o0+8+40), memcpy_retl_o2_plus_o5_64)
+	EX_LD(LOAD(ldx, %o1+48, %o4), memcpy_retl_o2_plus_o5_64)
+	add	%o1, 64, %o1
+	EX_ST(STORE(stx, %o4, %o0+8+48), memcpy_retl_o2_plus_o5_64)
+	add	%o0, 64, %o0
+	EX_LD(LOAD(ldx, %o1-8, %o4), memcpy_retl_o2_plus_o5_64)
+	bgu	%xcc,.Lalign_loop_fin
+	 EX_ST(STORE(stx, %o4, %o0), memcpy_retl_o2_plus_o5_64)
+
+.Lalign_done:
+	add	%o0, 8, %o0		! restore %o0 from ASI alignment
+	membar	#StoreStore
+	sub	%o2, 63, %o2		! adjust length to allow cc test
+	ba	.Lmedl63		! in .Lmedl63
+	 nop
+
+	.align 16
+	! Dst is on 8 byte boundary; src is not; remaining count > SMALL_MAX
+.Lunalignsetup:
+.Lunalignrejoin:
+	mov	%g1, %o3	! save %g1 as VISEntryHalf clobbers it
+#ifdef NON_USER_COPY
+	VISEntryHalfFast(.Lmedium_vis_entry_fail_cp)
+#else
+	VISEntryHalf
+#endif
+	mov	%o3, %g1	! restore %g1
+
+	set MED_UMAX, %o3
+	cmp %o2, %o3 		! check for.Lmedium unaligned limit
+	bge,pt	%xcc,.Lunalign_large
+	 prefetch [%o1 + (4 * BLOCK_SIZE)], 20
+	andn	%o2, 0x3f, %o5		! %o5 is multiple of block size
+	and	%o2, 0x3f, %o2		! residue bytes in %o2
+	cmp	%o2, 8			! Insure we do not load beyond
+	bgt	.Lunalign_adjust	! end of source buffer
+	 andn	%o1, 0x7, %o4		! %o4 has long word aligned src address
+	add	%o2, 64, %o2		! adjust to leave loop
+	sub	%o5, 64, %o5		! early if necessary
+.Lunalign_adjust:
+	alignaddr %o1, %g0, %g0		! generate %gsr
+	add	%o1, %o5, %o1		! advance %o1 to after blocks
+	EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5)
+.Lunalign_loop:
+	EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5)
+	faligndata %f0, %f2, %f16
+	EX_LD_FP(LOAD(ldd, %o4+16, %f4), memcpy_retl_o2_plus_o5)
+	subcc	%o5, BLOCK_SIZE, %o5
+	EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5_plus_64)
+	faligndata %f2, %f4, %f18
+	EX_LD_FP(LOAD(ldd, %o4+24, %f6), memcpy_retl_o2_plus_o5_plus_56)
+	EX_ST_FP(STORE(std, %f18, %o0+8), memcpy_retl_o2_plus_o5_plus_56)
+	faligndata %f4, %f6, %f20
+	EX_LD_FP(LOAD(ldd, %o4+32, %f8), memcpy_retl_o2_plus_o5_plus_48)
+	EX_ST_FP(STORE(std, %f20, %o0+16), memcpy_retl_o2_plus_o5_plus_48)
+	faligndata %f6, %f8, %f22
+	EX_LD_FP(LOAD(ldd, %o4+40, %f10), memcpy_retl_o2_plus_o5_plus_40)
+	EX_ST_FP(STORE(std, %f22, %o0+24), memcpy_retl_o2_plus_o5_plus_40)
+	faligndata %f8, %f10, %f24
+	EX_LD_FP(LOAD(ldd, %o4+48, %f12), memcpy_retl_o2_plus_o5_plus_32)
+	EX_ST_FP(STORE(std, %f24, %o0+32), memcpy_retl_o2_plus_o5_plus_32)
+	faligndata %f10, %f12, %f26
+	EX_LD_FP(LOAD(ldd, %o4+56, %f14), memcpy_retl_o2_plus_o5_plus_24)
+	add	%o4, BLOCK_SIZE, %o4
+	EX_ST_FP(STORE(std, %f26, %o0+40), memcpy_retl_o2_plus_o5_plus_24)
+	faligndata %f12, %f14, %f28
+	EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5_plus_16)
+	EX_ST_FP(STORE(std, %f28, %o0+48), memcpy_retl_o2_plus_o5_plus_16)
+	faligndata %f14, %f0, %f30
+	EX_ST_FP(STORE(std, %f30, %o0+56), memcpy_retl_o2_plus_o5_plus_8)
+	add	%o0, BLOCK_SIZE, %o0
+	bgu,pt	%xcc, .Lunalign_loop
+	 prefetch [%o4 + (5 * BLOCK_SIZE)], 20
+	ba	.Lunalign_done
+	 nop
+
+.Lunalign_large:
+	andcc	%o0, 0x3f, %o3		! is dst 64-byte block aligned?
+	bz	%xcc, .Lunalignsrc
+	 sub	%o3, 64, %o3		! %o3 will be multiple of 8
+	neg	%o3			! bytes until dest is 64 byte aligned
+	sub	%o2, %o3, %o2		! update cnt with bytes to be moved
+	! Move bytes according to source alignment
+	andcc	%o1, 0x1, %o5
+	bnz	%xcc, .Lunalignbyte	! check for byte alignment
+	 nop
+	andcc	%o1, 2, %o5		! check for half word alignment
+	bnz	%xcc, .Lunalignhalf
+	 nop
+	! Src is word aligned
+.Lunalignword:
+	EX_LD_FP(LOAD(ld, %o1, %o4), memcpy_retl_o2_plus_o3)	! load 4 bytes
+	add	%o1, 8, %o1		! increase src ptr by 8
+	EX_ST_FP(STORE(stw, %o4, %o0), memcpy_retl_o2_plus_o3)	! and store 4
+	subcc	%o3, 8, %o3		! decrease count by 8
+	EX_LD_FP(LOAD(ld, %o1-4, %o4), memcpy_retl_o2_plus_o3_plus_4)! load 4
+	add	%o0, 8, %o0		! increase dst ptr by 8
+	bnz	%xcc, .Lunalignword
+	 EX_ST_FP(STORE(stw, %o4, %o0-4), memcpy_retl_o2_plus_o3_plus_4)
+	ba	.Lunalignsrc
+	 nop
+
+	! Src is half-word aligned
+.Lunalignhalf:
+	EX_LD_FP(LOAD(lduh, %o1, %o4), memcpy_retl_o2_plus_o3)	! load 2 bytes
+	sllx	%o4, 32, %o5		! shift left
+	EX_LD_FP(LOAD(lduw, %o1+2, %o4), memcpy_retl_o2_plus_o3)
+	or	%o4, %o5, %o5
+	sllx	%o5, 16, %o5
+	EX_LD_FP(LOAD(lduh, %o1+6, %o4), memcpy_retl_o2_plus_o3)
+	or	%o4, %o5, %o5
+	EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3)
+	add	%o1, 8, %o1
+	subcc	%o3, 8, %o3
+	bnz	%xcc, .Lunalignhalf
+	 add	%o0, 8, %o0
+	ba	.Lunalignsrc
+	 nop
+
+	! Src is Byte aligned
+.Lunalignbyte:
+	sub	%o0, %o1, %o0		! share pointer advance
+.Lunalignbyte_loop:
+	EX_LD_FP(LOAD(ldub, %o1, %o4), memcpy_retl_o2_plus_o3)
+	sllx	%o4, 56, %o5
+	EX_LD_FP(LOAD(lduh, %o1+1, %o4), memcpy_retl_o2_plus_o3)
+	sllx	%o4, 40, %o4
+	or	%o4, %o5, %o5
+	EX_LD_FP(LOAD(lduh, %o1+3, %o4), memcpy_retl_o2_plus_o3)
+	sllx	%o4, 24, %o4
+	or	%o4, %o5, %o5
+	EX_LD_FP(LOAD(lduh, %o1+5, %o4), memcpy_retl_o2_plus_o3)
+	sllx	%o4,  8, %o4
+	or	%o4, %o5, %o5
+	EX_LD_FP(LOAD(ldub, %o1+7, %o4), memcpy_retl_o2_plus_o3)
+	or	%o4, %o5, %o5
+	add	%o0, %o1, %o0
+	EX_ST_FP(STORE(stx, %o5, %o0), memcpy_retl_o2_plus_o3)
+	sub	%o0, %o1, %o0
+	subcc	%o3, 8, %o3
+	bnz	%xcc, .Lunalignbyte_loop
+	 add	%o1, 8, %o1
+	add	%o0,%o1, %o0 		! restore pointer
+
+	! Destination is now block (64 byte aligned)
+.Lunalignsrc:
+	andn	%o2, 0x3f, %o5		! %o5 is multiple of block size
+	and	%o2, 0x3f, %o2		! residue bytes in %o2
+	add	%o2, 64, %o2		! Insure we do not load beyond
+	sub	%o5, 64, %o5		! end of source buffer
+
+	andn	%o1, 0x7, %o4		! %o4 has long word aligned src address
+	alignaddr %o1, %g0, %g0		! generate %gsr
+	add	%o1, %o5, %o1		! advance %o1 to after blocks
+
+	EX_LD_FP(LOAD(ldd, %o4, %f14), memcpy_retl_o2_plus_o5)
+	add	%o4, 8, %o4
+.Lunalign_sloop:
+	EX_LD_FP(LOAD(ldd, %o4, %f16), memcpy_retl_o2_plus_o5)
+	faligndata %f14, %f16, %f0
+	EX_LD_FP(LOAD(ldd, %o4+8, %f18), memcpy_retl_o2_plus_o5)
+	faligndata %f16, %f18, %f2
+	EX_LD_FP(LOAD(ldd, %o4+16, %f20), memcpy_retl_o2_plus_o5)
+	faligndata %f18, %f20, %f4
+	EX_ST_FP(STORE(std, %f0, %o0), memcpy_retl_o2_plus_o5)
+	subcc	%o5, 64, %o5
+	EX_LD_FP(LOAD(ldd, %o4+24, %f22), memcpy_retl_o2_plus_o5_plus_56)
+	faligndata %f20, %f22, %f6
+	EX_ST_FP(STORE(std, %f2, %o0+8), memcpy_retl_o2_plus_o5_plus_56)
+	EX_LD_FP(LOAD(ldd, %o4+32, %f24), memcpy_retl_o2_plus_o5_plus_48)
+	faligndata %f22, %f24, %f8
+	EX_ST_FP(STORE(std, %f4, %o0+16), memcpy_retl_o2_plus_o5_plus_48)
+	EX_LD_FP(LOAD(ldd, %o4+40, %f26), memcpy_retl_o2_plus_o5_plus_40)
+	faligndata %f24, %f26, %f10
+	EX_ST_FP(STORE(std, %f6, %o0+24), memcpy_retl_o2_plus_o5_plus_40)
+	EX_LD_FP(LOAD(ldd, %o4+48, %f28), memcpy_retl_o2_plus_o5_plus_40)
+	faligndata %f26, %f28, %f12
+	EX_ST_FP(STORE(std, %f8, %o0+32), memcpy_retl_o2_plus_o5_plus_40)
+	add	%o4, 64, %o4
+	EX_LD_FP(LOAD(ldd, %o4-8, %f30), memcpy_retl_o2_plus_o5_plus_40)
+	faligndata %f28, %f30, %f14
+	EX_ST_FP(STORE(std, %f10, %o0+40), memcpy_retl_o2_plus_o5_plus_40)
+	EX_ST_FP(STORE(std, %f12, %o0+48), memcpy_retl_o2_plus_o5_plus_40)
+	add	%o0, 64, %o0
+	EX_ST_FP(STORE(std, %f14, %o0-8), memcpy_retl_o2_plus_o5_plus_40)
+	fsrc2	%f30, %f14
+	bgu,pt	%xcc, .Lunalign_sloop
+	 prefetch [%o4 + (8 * BLOCK_SIZE)], 20
+
+.Lunalign_done:
+	! Handle trailing bytes, 64 to 127
+	! Dest long word aligned, Src not long word aligned
+	cmp	%o2, 15
+	bleu	%xcc, .Lunalign_short
+
+	 andn	%o2, 0x7, %o5		! %o5 is multiple of 8
+	and	%o2, 0x7, %o2		! residue bytes in %o2
+	add	%o2, 8, %o2
+	sub	%o5, 8, %o5		! insure we do not load past end of src
+	andn	%o1, 0x7, %o4		! %o4 has long word aligned src address
+	add	%o1, %o5, %o1		! advance %o1 to after multiple of 8
+	EX_LD_FP(LOAD(ldd, %o4, %f0), memcpy_retl_o2_plus_o5)! fetch partialword
+.Lunalign_by8:
+	EX_LD_FP(LOAD(ldd, %o4+8, %f2), memcpy_retl_o2_plus_o5)
+	add	%o4, 8, %o4
+	faligndata %f0, %f2, %f16
+	subcc	%o5, 8, %o5
+	EX_ST_FP(STORE(std, %f16, %o0), memcpy_retl_o2_plus_o5)
+	fsrc2	%f2, %f0
+	bgu,pt	%xcc, .Lunalign_by8
+	 add	%o0, 8, %o0
+
+.Lunalign_short:
+#ifdef NON_USER_COPY
+	VISExitHalfFast
+#else
+	VISExitHalf
+#endif
+	ba	.Lsmallrest
+	 nop
+
+/*
+ * This is a special case of nested memcpy. This can happen when kernel
+ * calls unaligned memcpy back to back without saving FP registers. We need
+ * traps(context switch) to save/restore FP registers. If the kernel calls
+ * memcpy without this trap sequence we will hit FP corruption. Let's use
+ * the normal integer load/store method in this case.
+ */
+
+#ifdef NON_USER_COPY
+.Lmedium_vis_entry_fail_cp:
+	or	%o0, %o1, %g2
+#endif
+.Lmedium_cp:
+	LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
+	andcc	%g2, 0x7, %g0
+	bne,pn	%xcc, .Lmedium_unaligned_cp
+	 nop
+
+.Lmedium_noprefetch_cp:
+	andncc	%o2, 0x20 - 1, %o5
+	be,pn	%xcc, 2f
+	 sub	%o2, %o5, %o2
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x10, %g7), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5)
+	add	%o1, 0x20, %o1
+	subcc	%o5, 0x20, %o5
+	EX_ST(STORE(stx, %o3, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32)
+	EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, %g7, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8)
+	bne,pt	%xcc, 1b
+	 add	%o0, 0x20, %o0
+2:	andcc	%o2, 0x18, %o5
+	be,pt	%xcc, 3f
+	 sub	%o2, %o5, %o2
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5)
+	add	%o1, 0x08, %o1
+	add	%o0, 0x08, %o0
+	subcc	%o5, 0x08, %o5
+	bne,pt	%xcc, 1b
+	 EX_ST(STORE(stx, %o3, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
+3:	brz,pt	%o2, .Lexit_cp
+	 cmp	%o2, 0x04
+	bl,pn	%xcc, .Ltiny_cp
+	 nop
+	EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2)
+	add	%o1, 0x04, %o1
+	add	%o0, 0x04, %o0
+	subcc	%o2, 0x04, %o2
+	bne,pn	%xcc, .Ltiny_cp
+	 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_4)
+	ba,a,pt	%xcc, .Lexit_cp
+
+.Lmedium_unaligned_cp:
+	/* First get dest 8 byte aligned.  */
+	sub	%g0, %o0, %o3
+	and	%o3, 0x7, %o3
+	brz,pt	%o3, 2f
+	 sub	%o2, %o3, %o2
+
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
+	add	%o1, 1, %o1
+	subcc	%o3, 1, %o3
+	add	%o0, 1, %o0
+	bne,pt	%xcc, 1b
+	 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
+2:
+	and	%o1, 0x7, %o3
+	brz,pn	%o3, .Lmedium_noprefetch_cp
+	 sll	%o3, 3, %o3
+	mov	64, %g2
+	sub	%g2, %o3, %g2
+	andn	%o1, 0x7, %o1
+	EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2)
+	sllx	%o4, %o3, %o4
+	andn	%o2, 0x08 - 1, %o5
+	sub	%o2, %o5, %o2
+
+1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5)
+	add	%o1, 0x08, %o1
+	subcc	%o5, 0x08, %o5
+	srlx	%g3, %g2, %g7
+	or	%g7, %o4, %g7
+	EX_ST(STORE(stx, %g7, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8)
+	add	%o0, 0x08, %o0
+	bne,pt	%xcc, 1b
+	 sllx	%g3, %o3, %o4
+	srl	%o3, 3, %o3
+	add	%o1, %o3, %o1
+	brz,pn	%o2, .Lexit_cp
+	 nop
+	ba,pt	%xcc, .Lsmall_unaligned_cp
+
+.Ltiny_cp:
+	EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2)
+	subcc	%o2, 1, %o2
+	be,pn	%xcc, .Lexit_cp
+	 EX_ST(STORE(stb, %o3, %o0 + 0x00), memcpy_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x01, %o3), memcpy_retl_o2)
+	subcc	%o2, 1, %o2
+	be,pn	%xcc, .Lexit_cp
+	 EX_ST(STORE(stb, %o3, %o0 + 0x01), memcpy_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x02, %o3), memcpy_retl_o2)
+	ba,pt	%xcc, .Lexit_cp
+	 EX_ST(STORE(stb, %o3, %o0 + 0x02), memcpy_retl_o2)
+
+.Lsmall_cp:
+	andcc	%g2, 0x3, %g0
+	bne,pn	%xcc, .Lsmall_unaligned_cp
+	 andn	%o2, 0x4 - 1, %o5
+	sub	%o2, %o5, %o2
+1:
+	EX_LD(LOAD(lduw, %o1 + 0x00, %o3), memcpy_retl_o2_plus_o5)
+	add	%o1, 0x04, %o1
+	subcc	%o5, 0x04, %o5
+	add	%o0, 0x04, %o0
+	bne,pt	%xcc, 1b
+	 EX_ST(STORE(stw, %o3, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
+	brz,pt	%o2, .Lexit_cp
+	 nop
+	ba,a,pt	%xcc, .Ltiny_cp
+
+.Lsmall_unaligned_cp:
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %o3), memcpy_retl_o2)
+	add	%o1, 1, %o1
+	add	%o0, 1, %o0
+	subcc	%o2, 1, %o2
+	bne,pt	%xcc, 1b
+	 EX_ST(STORE(stb, %o3, %o0 - 0x01), memcpy_retl_o2_plus_1)
+	ba,a,pt	%xcc, .Lexit_cp
+
+.Lsmallrest:
+	tst	%o2
+	bz,pt	%xcc, .Lsmallx
+	 cmp	%o2, 4
+	blt,pn	%xcc, .Lsmallleft3
+	 nop
+	sub	%o2, 3, %o2
+.Lsmallnotalign4:
+	EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_3)! read byte
+	subcc	%o2, 4, %o2		! reduce count by 4
+	EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_7)! write byte & repeat
+	EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2_plus_6)! for total of 4
+	add	%o1, 4, %o1		! advance SRC by 4
+	EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_6)
+	EX_LD(LOAD(ldub, %o1-2, %o3), memcpy_retl_o2_plus_5)
+	add	%o0, 4, %o0		! advance DST by 4
+	EX_ST(STORE(stb, %o3, %o0-2), memcpy_retl_o2_plus_5)
+	EX_LD(LOAD(ldub, %o1-1, %o3), memcpy_retl_o2_plus_4)
+	bgu,pt	%xcc, .Lsmallnotalign4	! loop til 3 or fewer bytes remain
+	EX_ST(STORE(stb, %o3, %o0-1), memcpy_retl_o2_plus_4)
+	addcc	%o2, 3, %o2		! restore count
+	bz,pt	%xcc, .Lsmallx
+.Lsmallleft3:				! 1, 2, or 3 bytes remain
+	subcc	%o2, 1, %o2
+	EX_LD(LOAD(ldub, %o1, %o3), memcpy_retl_o2_plus_1)	! load one byte
+	bz,pt	%xcc, .Lsmallx
+	EX_ST(STORE(stb, %o3, %o0), memcpy_retl_o2_plus_1)	! store one byte
+	EX_LD(LOAD(ldub, %o1+1, %o3), memcpy_retl_o2)	! load second byte
+	subcc	%o2, 1, %o2
+	bz,pt	%xcc, .Lsmallx
+	EX_ST(STORE(stb, %o3, %o0+1), memcpy_retl_o2_plus_1)! store second byte
+	EX_LD(LOAD(ldub, %o1+2, %o3), memcpy_retl_o2)	! load third byte
+	EX_ST(STORE(stb, %o3, %o0+2), memcpy_retl_o2)	! store third byte
+.Lsmallx:
+	retl
+	 mov	EX_RETVAL(%g1), %o0
+.Lsmallfin:
+	tst	%o2
+	bnz,pn	%xcc, .Lsmallleft3
+	 nop
+	retl
+	 mov	EX_RETVAL(%g1), %o0	! restore %o0
+.Lexit_cp:
+	retl
+	 mov	EX_RETVAL(%g1), %o0
+	.size  FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/M7memset.S b/arch/sparc/lib/M7memset.S
new file mode 100644
index 0000000..62ea91b
--- /dev/null
+++ b/arch/sparc/lib/M7memset.S
@@ -0,0 +1,352 @@
+/*
+ * M7memset.S: SPARC M7 optimized memset.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates.  All rights reserved.
+ */
+
+/*
+ * M7memset.S: M7 optimized memset.
+ *
+ * char *memset(sp, c, n)
+ *
+ * Set an array of n chars starting at sp to the character c.
+ * Return sp.
+ *
+ * Fast assembler language version of the following C-program for memset
+ * which represents the `standard' for the C-library.
+ *
+ *	void *
+ *	memset(void *sp1, int c, size_t n)
+ *	{
+ *	    if (n != 0) {
+ *		char *sp = sp1;
+ *		do {
+ *		    *sp++ = (char)c;
+ *		} while (--n != 0);
+ *	    }
+ *	    return (sp1);
+ *	}
+ *
+ * The algorithm is as follows :
+ *
+ *	For small 6 or fewer bytes stores, bytes will be stored.
+ *
+ *	For less than 32 bytes stores, align the address on 4 byte boundary.
+ *	Then store as many 4-byte chunks, followed by trailing bytes.
+ *
+ *	For sizes greater than 32 bytes, align the address on 8 byte boundary.
+ *	if (count >= 64) {
+ *      	store 8-bytes chunks to align the address on 64 byte boundary
+ *		if (value to be set is zero && count >= MIN_ZERO) {
+ *              	Using BIS stores, set the first long word of each
+ *			64-byte cache line to zero which will also clear the
+ *			other seven long words of the cache line.
+ *       	}
+ *       	else if (count >= MIN_LOOP) {
+ *       		Using BIS stores, set the first long word of each of
+ *              	ST_CHUNK cache lines (64 bytes each) before the main
+ *			loop is entered.
+ *              	In the main loop, continue pre-setting the first long
+ *              	word of each cache line ST_CHUNK lines in advance while
+ *              	setting the other seven long words (56 bytes) of each
+ * 			cache line until fewer than ST_CHUNK*64 bytes remain.
+ *			Then set the remaining seven long words of each cache
+ * 			line that has already had its first long word set.
+ *       	}
+ *       	store remaining data in 64-byte chunks until less than
+ *       	64 bytes remain.
+ *       }
+ *       Store as many 8-byte chunks, followed by trailing bytes.
+ *
+ * BIS = Block Init Store
+ *   Doing the advance store of the first element of the cache line
+ *   initiates the displacement of a cache line while only using a single
+ *   instruction in the pipeline. That avoids various pipeline delays,
+ *   such as filling the miss buffer. The performance effect is
+ *   similar to prefetching for normal stores.
+ *   The special case for zero fills runs faster and uses fewer instruction
+ *   cycles than the normal memset loop.
+ *
+ * We only use BIS for memset of greater than MIN_LOOP bytes because a sequence
+ * BIS stores must be followed by a membar #StoreStore. The benefit of
+ * the BIS store must be balanced against the cost of the membar operation.
+ */
+
+/*
+ * ASI_STBI_P marks the cache line as "least recently used"
+ * which means if many threads are active, it has a high chance
+ * of being pushed out of the cache between the first initializing
+ * store and the final stores.
+ * Thus, we use ASI_STBIMRU_P which marks the cache line as
+ * "most recently used" for all but the last store to the cache line.
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+#define ASI_STBI_P      ASI_BLK_INIT_QUAD_LDD_P
+#define ASI_STBIMRU_P   ASI_ST_BLKINIT_MRU_P
+
+
+#define ST_CHUNK        24   /* multiple of 4 due to loop unrolling */
+#define MIN_LOOP        16320
+#define MIN_ZERO        512
+
+	.section	".text"
+	.align		32
+
+/*
+ * Define clear_page(dest) as memset(dest, 0, PAGE_SIZE)
+ * (can create a more optimized version later.)
+ */
+	.globl		M7clear_page
+	.globl		M7clear_user_page
+M7clear_page:		/* clear_page(dest) */
+M7clear_user_page:
+	set	PAGE_SIZE, %o1
+	/* fall through into bzero code */
+
+	.size		M7clear_page,.-M7clear_page
+	.size		M7clear_user_page,.-M7clear_user_page
+
+/*
+ * Define bzero(dest, n) as memset(dest, 0, n)
+ * (can create a more optimized version later.)
+ */
+	.globl		M7bzero
+M7bzero:		/* bzero(dest, size) */
+	mov	%o1, %o2
+	mov	0, %o1
+	/* fall through into memset code */
+
+	.size		M7bzero,.-M7bzero
+
+	.global		M7memset
+	.type		M7memset, #function
+	.register	%g3, #scratch
+M7memset:
+	mov     %o0, %o5                ! copy sp1 before using it
+	cmp     %o2, 7                  ! if small counts, just write bytes
+	bleu,pn %xcc, .wrchar
+	 and     %o1, 0xff, %o1          ! o1 is (char)c
+
+	sll     %o1, 8, %o3
+	or      %o1, %o3, %o1           ! now o1 has 2 bytes of c
+	sll     %o1, 16, %o3
+	cmp     %o2, 32
+	blu,pn  %xcc, .wdalign
+	 or      %o1, %o3, %o1           ! now o1 has 4 bytes of c
+
+	sllx    %o1, 32, %o3
+	or      %o1, %o3, %o1           ! now o1 has 8 bytes of c
+
+.dbalign:
+	andcc   %o5, 7, %o3             ! is sp1 aligned on a 8 byte bound?
+	bz,pt   %xcc, .blkalign         ! already long word aligned
+	 sub     %o3, 8, %o3             ! -(bytes till long word aligned)
+
+	add     %o2, %o3, %o2           ! update o2 with new count
+	! Set -(%o3) bytes till sp1 long word aligned
+1:	stb     %o1, [%o5]              ! there is at least 1 byte to set
+	inccc   %o3                     ! byte clearing loop
+	bl,pt   %xcc, 1b
+	 inc     %o5
+
+	! Now sp1 is long word aligned (sp1 is found in %o5)
+.blkalign:
+	cmp     %o2, 64                 ! check if there are 64 bytes to set
+	blu,pn  %xcc, .wrshort
+	 mov     %o2, %o3
+
+	andcc   %o5, 63, %o3            ! is sp1 block aligned?
+	bz,pt   %xcc, .blkwr            ! now block aligned
+	 sub     %o3, 64, %o3            ! o3 is -(bytes till block aligned)
+	add     %o2, %o3, %o2           ! o2 is the remainder
+
+	! Store -(%o3) bytes till dst is block (64 byte) aligned.
+	! Use long word stores.
+	! Recall that dst is already long word aligned
+1:
+	addcc   %o3, 8, %o3
+	stx     %o1, [%o5]
+	bl,pt   %xcc, 1b
+	 add     %o5, 8, %o5
+
+	! Now sp1 is block aligned
+.blkwr:
+	andn    %o2, 63, %o4            ! calculate size of blocks in bytes
+	brz,pn  %o1, .wrzero            ! special case if c == 0
+	 and     %o2, 63, %o3            ! %o3 = bytes left after blk stores.
+
+	set     MIN_LOOP, %g1
+	cmp     %o4, %g1                ! check there are enough bytes to set
+	blu,pn  %xcc, .short_set        ! to justify cost of membar
+	                                ! must be > pre-cleared lines
+	 nop
+
+	! initial cache-clearing stores
+	! get store pipeline moving
+	rd	%asi, %g3		! save %asi to be restored later
+	wr     %g0, ASI_STBIMRU_P, %asi
+
+	! Primary memset loop for large memsets
+.wr_loop:
+	sub     %o5, 8, %o5		! adjust %o5 for ASI store alignment
+	mov     ST_CHUNK, %g1
+.wr_loop_start:
+	stxa    %o1, [%o5+8]%asi
+	subcc   %g1, 4, %g1
+	stxa    %o1, [%o5+8+64]%asi
+	add     %o5, 256, %o5
+	stxa    %o1, [%o5+8-128]%asi
+	bgu     %xcc, .wr_loop_start
+	 stxa    %o1, [%o5+8-64]%asi
+
+	sub     %o5, ST_CHUNK*64, %o5	! reset %o5
+	mov     ST_CHUNK, %g1
+
+.wr_loop_rest:
+	stxa    %o1, [%o5+8+8]%asi
+	sub     %o4, 64, %o4
+	stxa    %o1, [%o5+16+8]%asi
+	subcc   %g1, 1, %g1
+	stxa    %o1, [%o5+24+8]%asi
+	stxa    %o1, [%o5+32+8]%asi
+	stxa    %o1, [%o5+40+8]%asi
+	add     %o5, 64, %o5
+	stxa    %o1, [%o5-8]%asi
+	bgu     %xcc, .wr_loop_rest
+	 stxa    %o1, [%o5]ASI_STBI_P
+
+	! If more than ST_CHUNK*64 bytes remain to set, continue
+	! setting the first long word of each cache line in advance
+	! to keep the store pipeline moving.
+
+	cmp     %o4, ST_CHUNK*64
+	bge,pt  %xcc, .wr_loop_start
+	 mov     ST_CHUNK, %g1
+
+	brz,a,pn %o4, .asi_done
+	 add     %o5, 8, %o5             ! restore %o5 offset
+
+.wr_loop_small:
+	stxa    %o1, [%o5+8]%asi
+	stxa    %o1, [%o5+8+8]%asi
+	stxa    %o1, [%o5+16+8]%asi
+	stxa    %o1, [%o5+24+8]%asi
+	stxa    %o1, [%o5+32+8]%asi
+	subcc   %o4, 64, %o4
+	stxa    %o1, [%o5+40+8]%asi
+	add     %o5, 64, %o5
+	stxa    %o1, [%o5-8]%asi
+	bgu,pt  %xcc, .wr_loop_small
+	 stxa    %o1, [%o5]ASI_STBI_P
+
+	ba      .asi_done
+	 add     %o5, 8, %o5             ! restore %o5 offset
+
+	! Special case loop for zero fill memsets
+	! For each 64 byte cache line, single STBI to first element
+	! clears line
+.wrzero:
+	cmp     %o4, MIN_ZERO           ! check if enough bytes to set
+					! to pay %asi + membar cost
+	blu     %xcc, .short_set
+	 nop
+	sub     %o4, 256, %o4
+
+.wrzero_loop:
+	mov     64, %g3
+	stxa    %o1, [%o5]ASI_STBI_P
+	subcc   %o4, 256, %o4
+	stxa    %o1, [%o5+%g3]ASI_STBI_P
+	add     %o5, 256, %o5
+	sub     %g3, 192, %g3
+	stxa    %o1, [%o5+%g3]ASI_STBI_P
+	add %g3, 64, %g3
+	bge,pt  %xcc, .wrzero_loop
+	 stxa    %o1, [%o5+%g3]ASI_STBI_P
+	add     %o4, 256, %o4
+
+	brz,pn  %o4, .bsi_done
+	 nop
+
+.wrzero_small:
+	stxa    %o1, [%o5]ASI_STBI_P
+	subcc   %o4, 64, %o4
+	bgu,pt  %xcc, .wrzero_small
+	 add     %o5, 64, %o5
+	ba,a	.bsi_done
+
+.asi_done:
+	wr	%g3, 0x0, %asi		! restored saved %asi
+.bsi_done:
+	membar  #StoreStore             ! required by use of Block Store Init
+
+.short_set:
+	cmp     %o4, 64                 ! check if 64 bytes to set
+	blu     %xcc, 5f
+	 nop
+4:                                      ! set final blocks of 64 bytes
+	stx     %o1, [%o5]
+	stx     %o1, [%o5+8]
+	stx     %o1, [%o5+16]
+	stx     %o1, [%o5+24]
+	subcc   %o4, 64, %o4
+	stx     %o1, [%o5+32]
+	stx     %o1, [%o5+40]
+	add     %o5, 64, %o5
+	stx     %o1, [%o5-16]
+	bgu,pt  %xcc, 4b
+	 stx     %o1, [%o5-8]
+
+5:
+	! Set the remaining long words
+.wrshort:
+	subcc   %o3, 8, %o3             ! Can we store any long words?
+	blu,pn  %xcc, .wrchars
+	 and     %o2, 7, %o2             ! calc bytes left after long words
+6:
+	subcc   %o3, 8, %o3
+	stx     %o1, [%o5]              ! store the long words
+	bgeu,pt %xcc, 6b
+	 add     %o5, 8, %o5
+
+.wrchars:                               ! check for extra chars
+	brnz    %o2, .wrfin
+	 nop
+	retl
+	 nop
+
+.wdalign:
+	andcc   %o5, 3, %o3             ! is sp1 aligned on a word boundary
+	bz,pn   %xcc, .wrword
+	 andn    %o2, 3, %o3             ! create word sized count in %o3
+
+	dec     %o2                     ! decrement count
+	stb     %o1, [%o5]              ! clear a byte
+	b       .wdalign
+	 inc     %o5                     ! next byte
+
+.wrword:
+	subcc   %o3, 4, %o3
+	st      %o1, [%o5]              ! 4-byte writing loop
+	bnz,pt  %xcc, .wrword
+	 add     %o5, 4, %o5
+
+	and     %o2, 3, %o2             ! leftover count, if any
+
+.wrchar:
+	! Set the remaining bytes, if any
+	brz     %o2, .exit
+	 nop
+.wrfin:
+	deccc   %o2
+	stb     %o1, [%o5]
+	bgu,pt  %xcc, .wrfin
+	 inc     %o5
+.exit:
+	retl                            ! %o0 was preserved
+	 nop
+
+	.size		M7memset,.-M7memset
diff --git a/arch/sparc/lib/M7patch.S b/arch/sparc/lib/M7patch.S
new file mode 100644
index 0000000..9000b7b
--- /dev/null
+++ b/arch/sparc/lib/M7patch.S
@@ -0,0 +1,51 @@
+/*
+ * M7patch.S: Patch generic routines with M7 variant.
+ *
+ * Copyright (c) 2016, Oracle and/or its affiliates.  All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define NG_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+ENTRY(m7_patch_copyops)
+	NG_DO_PATCH(memcpy, M7memcpy)
+	NG_DO_PATCH(raw_copy_from_user, M7copy_from_user)
+	NG_DO_PATCH(raw_copy_to_user, M7copy_to_user)
+	retl
+	 nop
+ENDPROC(m7_patch_copyops)
+
+ENTRY(m7_patch_bzero)
+	NG_DO_PATCH(memset, M7memset)
+	NG_DO_PATCH(__bzero, M7bzero)
+	NG_DO_PATCH(__clear_user, NGclear_user)
+	NG_DO_PATCH(tsb_init, NGtsb_init)
+	retl
+	 nop
+ENDPROC(m7_patch_bzero)
+
+ENTRY(m7_patch_pageops)
+	NG_DO_PATCH(copy_user_page, NG4copy_user_page)
+	NG_DO_PATCH(_clear_page, M7clear_page)
+	NG_DO_PATCH(clear_user_page, M7clear_user_page)
+	retl
+	 nop
+ENDPROC(m7_patch_pageops)
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
new file mode 100644
index 0000000..063556f
--- /dev/null
+++ b/arch/sparc/lib/Makefile
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for Sparc library files..
+#
+
+asflags-y := -ansi -DST_DIV0=0x02
+ccflags-y := -Werror
+
+lib-$(CONFIG_SPARC32) += ashrdi3.o
+lib-$(CONFIG_SPARC32) += memcpy.o memset.o
+lib-y                 += strlen.o
+lib-y                 += checksum_$(BITS).o
+lib-$(CONFIG_SPARC32) += blockops.o
+lib-y                 += memscan_$(BITS).o memcmp.o strncmp_$(BITS).o
+lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
+lib-$(CONFIG_SPARC32) += copy_user.o locks.o
+lib-$(CONFIG_SPARC64) += atomic_64.o
+lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
+lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
+lib-$(CONFIG_SPARC64) += multi3.o
+lib-$(CONFIG_SPARC64) += fls.o
+lib-$(CONFIG_SPARC64) += fls64.o
+lib-$(CONFIG_SPARC64) += NG4fls.o
+
+lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
+lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
+lib-$(CONFIG_SPARC64) += VISsave.o
+lib-$(CONFIG_SPARC64) += bitops.o
+
+lib-$(CONFIG_SPARC64) += U1memcpy.o U1copy_from_user.o U1copy_to_user.o
+
+lib-$(CONFIG_SPARC64) += U3memcpy.o U3copy_from_user.o U3copy_to_user.o
+lib-$(CONFIG_SPARC64) += U3patch.o
+
+lib-$(CONFIG_SPARC64) += NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o
+lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o
+
+lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o
+lib-$(CONFIG_SPARC64) +=  NG2patch.o
+
+lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o
+lib-$(CONFIG_SPARC64) +=  NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
+
+lib-$(CONFIG_SPARC64) += Memcpy_utils.o
+
+lib-$(CONFIG_SPARC64) += M7memcpy.o M7copy_from_user.o M7copy_to_user.o
+lib-$(CONFIG_SPARC64) += M7patch.o M7memset.o
+
+lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
+lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
+
+lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
+lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
+
+obj-$(CONFIG_SPARC64) += iomap.o
+obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
+obj-$(CONFIG_SPARC64) += PeeCeeI.o
diff --git a/arch/sparc/lib/Memcpy_utils.S b/arch/sparc/lib/Memcpy_utils.S
new file mode 100644
index 0000000..64fbac2
--- /dev/null
+++ b/arch/sparc/lib/Memcpy_utils.S
@@ -0,0 +1,345 @@
+#ifndef __ASM_MEMCPY_UTILS
+#define __ASM_MEMCPY_UTILS
+
+#include <linux/linkage.h>
+#include <asm/asi.h>
+#include <asm/visasm.h>
+
+ENTRY(__restore_asi_fp)
+	VISExitHalf
+	retl
+	 wr	%g0, ASI_AIUS, %asi
+ENDPROC(__restore_asi_fp)
+
+ENTRY(__restore_asi)
+	retl
+	 wr	%g0, ASI_AIUS, %asi
+ENDPROC(__restore_asi)
+
+ENTRY(memcpy_retl_o2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%o2, %o0
+ENDPROC(memcpy_retl_o2)
+ENTRY(memcpy_retl_o2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 1, %o0
+ENDPROC(memcpy_retl_o2_plus_1)
+ENTRY(memcpy_retl_o2_plus_3)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 3, %o0
+ENDPROC(memcpy_retl_o2_plus_3)
+ENTRY(memcpy_retl_o2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 4, %o0
+ENDPROC(memcpy_retl_o2_plus_4)
+ENTRY(memcpy_retl_o2_plus_5)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 5, %o0
+ENDPROC(memcpy_retl_o2_plus_5)
+ENTRY(memcpy_retl_o2_plus_6)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 6, %o0
+ENDPROC(memcpy_retl_o2_plus_6)
+ENTRY(memcpy_retl_o2_plus_7)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 7, %o0
+ENDPROC(memcpy_retl_o2_plus_7)
+ENTRY(memcpy_retl_o2_plus_8)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 8, %o0
+ENDPROC(memcpy_retl_o2_plus_8)
+ENTRY(memcpy_retl_o2_plus_15)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 15, %o0
+ENDPROC(memcpy_retl_o2_plus_15)
+ENTRY(memcpy_retl_o2_plus_15_8)
+	 add	%o2, 15, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 8, %o0
+ENDPROC(memcpy_retl_o2_plus_15_8)
+ENTRY(memcpy_retl_o2_plus_16)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 16, %o0
+ENDPROC(memcpy_retl_o2_plus_16)
+ENTRY(memcpy_retl_o2_plus_24)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 24, %o0
+ENDPROC(memcpy_retl_o2_plus_24)
+ENTRY(memcpy_retl_o2_plus_31)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 31, %o0
+ENDPROC(memcpy_retl_o2_plus_31)
+ENTRY(memcpy_retl_o2_plus_32)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 32, %o0
+ENDPROC(memcpy_retl_o2_plus_32)
+ENTRY(memcpy_retl_o2_plus_31_32)
+	add	%o2, 31, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 32, %o0
+ENDPROC(memcpy_retl_o2_plus_31_32)
+ENTRY(memcpy_retl_o2_plus_31_24)
+	add	%o2, 31, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 24, %o0
+ENDPROC(memcpy_retl_o2_plus_31_24)
+ENTRY(memcpy_retl_o2_plus_31_16)
+	add	%o2, 31, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 16, %o0
+ENDPROC(memcpy_retl_o2_plus_31_16)
+ENTRY(memcpy_retl_o2_plus_31_8)
+	add	%o2, 31, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 8, %o0
+ENDPROC(memcpy_retl_o2_plus_31_8)
+ENTRY(memcpy_retl_o2_plus_63)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 63, %o0
+ENDPROC(memcpy_retl_o2_plus_63)
+ENTRY(memcpy_retl_o2_plus_63_64)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 64, %o0
+ENDPROC(memcpy_retl_o2_plus_63_64)
+ENTRY(memcpy_retl_o2_plus_63_56)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 56, %o0
+ENDPROC(memcpy_retl_o2_plus_63_56)
+ENTRY(memcpy_retl_o2_plus_63_48)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 48, %o0
+ENDPROC(memcpy_retl_o2_plus_63_48)
+ENTRY(memcpy_retl_o2_plus_63_40)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 40, %o0
+ENDPROC(memcpy_retl_o2_plus_63_40)
+ENTRY(memcpy_retl_o2_plus_63_32)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 32, %o0
+ENDPROC(memcpy_retl_o2_plus_63_32)
+ENTRY(memcpy_retl_o2_plus_63_24)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 24, %o0
+ENDPROC(memcpy_retl_o2_plus_63_24)
+ENTRY(memcpy_retl_o2_plus_63_16)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 16, %o0
+ENDPROC(memcpy_retl_o2_plus_63_16)
+ENTRY(memcpy_retl_o2_plus_63_8)
+	 add	%o2, 63, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 8, %o0
+ENDPROC(memcpy_retl_o2_plus_63_8)
+ENTRY(memcpy_retl_o2_plus_o5)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5)
+ENTRY(memcpy_retl_o2_plus_o5_plus_1)
+	add	%o5, 1, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_1)
+ENTRY(memcpy_retl_o2_plus_o5_plus_4)
+	add	%o5, 4, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_4)
+ENTRY(memcpy_retl_o2_plus_o5_plus_8)
+	add	%o5, 8, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_8)
+ENTRY(memcpy_retl_o2_plus_o5_plus_16)
+	add	%o5, 16, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_16)
+ENTRY(memcpy_retl_o2_plus_o5_plus_24)
+	add	%o5, 24, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_24)
+ENTRY(memcpy_retl_o2_plus_o5_plus_32)
+	add	%o5, 32, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_32)
+ENTRY(memcpy_retl_o2_plus_o5_64)
+	add	%o5, 32, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_64)
+ENTRY(memcpy_retl_o2_plus_g1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(memcpy_retl_o2_plus_g1)
+ENTRY(memcpy_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(memcpy_retl_o2_plus_g1_plus_1)
+ENTRY(memcpy_retl_o2_plus_g1_plus_8)
+	add	%g1, 8, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(memcpy_retl_o2_plus_g1_plus_8)
+ENTRY(memcpy_retl_o2_plus_o4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4)
+ENTRY(memcpy_retl_o2_plus_o4_plus_8)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_8)
+ENTRY(memcpy_retl_o2_plus_o4_plus_16)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_16)
+ENTRY(memcpy_retl_o2_plus_o4_plus_24)
+	add	%o4, 24, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_24)
+ENTRY(memcpy_retl_o2_plus_o4_plus_32)
+	add	%o4, 32, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_32)
+ENTRY(memcpy_retl_o2_plus_o4_plus_40)
+	add	%o4, 40, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_40)
+ENTRY(memcpy_retl_o2_plus_o4_plus_48)
+	add	%o4, 48, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_48)
+ENTRY(memcpy_retl_o2_plus_o4_plus_56)
+	add	%o4, 56, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_56)
+ENTRY(memcpy_retl_o2_plus_o4_plus_64)
+	add	%o4, 64, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_64)
+ENTRY(memcpy_retl_o2_plus_o5_plus_64)
+	add	%o5, 64, %o5
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_64)
+ENTRY(memcpy_retl_o2_plus_o3_fp)
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o3, %o0
+ENDPROC(memcpy_retl_o2_plus_o3_fp)
+ENTRY(memcpy_retl_o2_plus_o3_plus_1_fp)
+	add	%o3, 1, %o3
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o3, %o0
+ENDPROC(memcpy_retl_o2_plus_o3_plus_1_fp)
+ENTRY(memcpy_retl_o2_plus_o3_plus_4_fp)
+	add	%o3, 4, %o3
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o3, %o0
+ENDPROC(memcpy_retl_o2_plus_o3_plus_4_fp)
+ENTRY(memcpy_retl_o2_plus_o4_fp)
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_8_fp)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_8_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_16_fp)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_16_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_24_fp)
+	add	%o4, 24, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_24_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_32_fp)
+	add	%o4, 32, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_32_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_40_fp)
+	add	%o4, 40, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_40_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_48_fp)
+	add	%o4, 48, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_48_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_56_fp)
+	add	%o4, 56, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_56_fp)
+ENTRY(memcpy_retl_o2_plus_o4_plus_64_fp)
+	add	%o4, 64, %o4
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o4, %o0
+ENDPROC(memcpy_retl_o2_plus_o4_plus_64_fp)
+ENTRY(memcpy_retl_o2_plus_o5_fp)
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_64_fp)
+	add	%o5, 64, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_64_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_56_fp)
+	add	%o5, 56, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_56_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_48_fp)
+	add	%o5, 48, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_48_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_40_fp)
+	add	%o5, 40, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_40_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_32_fp)
+	add	%o5, 32, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_32_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_24_fp)
+	add	%o5, 24, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_24_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_16_fp)
+	add	%o5, 16, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_16_fp)
+ENTRY(memcpy_retl_o2_plus_o5_plus_8_fp)
+	add	%o5, 8, %o5
+	ba,pt	%xcc, __restore_asi_fp
+	 add	%o2, %o5, %o0
+ENDPROC(memcpy_retl_o2_plus_o5_plus_8_fp)
+
+#endif
diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S
new file mode 100644
index 0000000..e57bc51
--- /dev/null
+++ b/arch/sparc/lib/NG2copy_from_user.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG2copy_from_user.S: Niagara-2 optimized copy from userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_LD_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y##_fp;	\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#ifndef ASI_BLK_AIUS_4V
+#define ASI_BLK_AIUS_4V	0x17
+#endif
+
+#define FUNC_NAME		NG2copy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
+#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_AIUS_4V, dest
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "NG2memcpy.S"
diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S
new file mode 100644
index 0000000..367c0bf
--- /dev/null
+++ b/arch/sparc/lib/NG2copy_to_user.S
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG2copy_to_user.S: Niagara-2 optimized copy to userspace.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_ST_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y##_fp;	\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#ifndef ASI_BLK_AIUS_4V
+#define ASI_BLK_AIUS_4V	0x17
+#endif
+
+#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
+#endif
+
+#define FUNC_NAME		NG2copy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
+#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
+#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS_4V
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "NG2memcpy.S"
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S
new file mode 100644
index 0000000..bcb21b3
--- /dev/null
+++ b/arch/sparc/lib/NG2memcpy.S
@@ -0,0 +1,594 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG2memcpy.S: Niagara-2 optimized memcpy.
+ *
+ * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+#include <asm/asi.h>
+#define GLOBAL_SPARE	%g7
+#else
+#define ASI_PNF 0x82
+#define ASI_BLK_P 0xf0
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define FPRS_FEF  0x04
+#ifdef MEMCPY_DEBUG
+#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
+		     clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#else
+#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#endif
+#define GLOBAL_SPARE	%g5
+#endif
+
+#ifndef STORE_ASI
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
+#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
+#else
+#define STORE_ASI	0x80		/* ASI_P */
+#endif
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x,y)	x
+#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x,y)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x,y)	x
+#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x,y)	x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef LOAD_BLK
+#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_P, dest
+#endif
+
+#ifndef STORE
+#ifndef MEMCPY_DEBUG
+#define STORE(type,src,addr)	type src, [addr]
+#else
+#define STORE(type,src,addr)	type##a src, [addr] 0x80
+#endif
+#endif
+
+#ifndef STORE_BLK
+#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
+#endif
+
+#ifndef STORE_INIT
+#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	NG2memcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+#define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
+	faligndata	%x0, %x1, %f0; \
+	faligndata	%x1, %x2, %f2; \
+	faligndata	%x2, %x3, %f4; \
+	faligndata	%x3, %x4, %f6; \
+	faligndata	%x4, %x5, %f8; \
+	faligndata	%x5, %x6, %f10; \
+	faligndata	%x6, %x7, %f12; \
+	faligndata	%x7, %x8, %f14;
+
+#define FREG_MOVE_1(x0) \
+	fsrc2		%x0, %f0;
+#define FREG_MOVE_2(x0, x1) \
+	fsrc2		%x0, %f0; \
+	fsrc2		%x1, %f2;
+#define FREG_MOVE_3(x0, x1, x2) \
+	fsrc2		%x0, %f0; \
+	fsrc2		%x1, %f2; \
+	fsrc2		%x2, %f4;
+#define FREG_MOVE_4(x0, x1, x2, x3) \
+	fsrc2		%x0, %f0; \
+	fsrc2		%x1, %f2; \
+	fsrc2		%x2, %f4; \
+	fsrc2		%x3, %f6;
+#define FREG_MOVE_5(x0, x1, x2, x3, x4) \
+	fsrc2		%x0, %f0; \
+	fsrc2		%x1, %f2; \
+	fsrc2		%x2, %f4; \
+	fsrc2		%x3, %f6; \
+	fsrc2		%x4, %f8;
+#define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \
+	fsrc2		%x0, %f0; \
+	fsrc2		%x1, %f2; \
+	fsrc2		%x2, %f4; \
+	fsrc2		%x3, %f6; \
+	fsrc2		%x4, %f8; \
+	fsrc2		%x5, %f10;
+#define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \
+	fsrc2		%x0, %f0; \
+	fsrc2		%x1, %f2; \
+	fsrc2		%x2, %f4; \
+	fsrc2		%x3, %f6; \
+	fsrc2		%x4, %f8; \
+	fsrc2		%x5, %f10; \
+	fsrc2		%x6, %f12;
+#define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \
+	fsrc2		%x0, %f0; \
+	fsrc2		%x1, %f2; \
+	fsrc2		%x2, %f4; \
+	fsrc2		%x3, %f6; \
+	fsrc2		%x4, %f8; \
+	fsrc2		%x5, %f10; \
+	fsrc2		%x6, %f12; \
+	fsrc2		%x7, %f14;
+#define FREG_LOAD_1(base, x0) \
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
+#define FREG_LOAD_2(base, x0, x1) \
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
+#define FREG_LOAD_3(base, x0, x1, x2) \
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
+#define FREG_LOAD_4(base, x0, x1, x2, x3) \
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
+#define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
+#define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
+#define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
+	EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
+	EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_fp:
+	VISExitHalf
+__restore_asi:
+	retl
+	 wr	%g0, ASI_AIUS, %asi
+ENTRY(NG2_retl_o2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%o2, %o0
+ENDPROC(NG2_retl_o2)
+ENTRY(NG2_retl_o2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 1, %o0
+ENDPROC(NG2_retl_o2_plus_1)
+ENTRY(NG2_retl_o2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 4, %o0
+ENDPROC(NG2_retl_o2_plus_4)
+ENTRY(NG2_retl_o2_plus_8)
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, 8, %o0
+ENDPROC(NG2_retl_o2_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_1)
+	add	%o4, 1, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_1)
+ENTRY(NG2_retl_o2_plus_o4_plus_8)
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_16)
+	add	%o4, 16, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_16)
+ENTRY(NG2_retl_o2_plus_g1_fp)
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
+	add	%g1, 64, %g1
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_1)
+ENTRY(NG2_retl_o2_and_7_plus_o4)
+	and	%o2, 7, %o2
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4)
+ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
+	and	%o2, 7, %o2
+	add	%o4, 8, %o4
+	ba,pt	%xcc, __restore_asi
+	 add	%o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
+#endif
+
+	.align		64
+
+	.globl	FUNC_NAME
+	.type	FUNC_NAME,#function
+FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
+	srlx		%o2, 31, %g2
+	cmp		%g2, 0
+	tne		%xcc, 5
+	PREAMBLE
+	mov		%o0, %o3
+	cmp		%o2, 0
+	be,pn		%XCC, 85f
+	 or		%o0, %o1, GLOBAL_SPARE
+	cmp		%o2, 16
+	blu,a,pn	%XCC, 80f
+	 or		GLOBAL_SPARE, %o2, GLOBAL_SPARE
+
+	/* 2 blocks (128 bytes) is the minimum we can do the block
+	 * copy with.  We need to ensure that we'll iterate at least
+	 * once in the block copy loop.  At worst we'll need to align
+	 * the destination to a 64-byte boundary which can chew up
+	 * to (64 - 1) bytes from the length before we perform the
+	 * block copy loop.
+	 *
+	 * However, the cut-off point, performance wise, is around
+	 * 4 64-byte blocks.
+	 */
+	cmp		%o2, (4 * 64)
+	blu,pt		%XCC, 75f
+	 andcc		GLOBAL_SPARE, 0x7, %g0
+
+	/* %o0:	dst
+	 * %o1:	src
+	 * %o2:	len  (known to be >= 128)
+	 *
+	 * The block copy loops can use %o4, %g2, %g3 as
+	 * temporaries while copying the data.  %o5 must
+	 * be preserved between VISEntryHalf and VISExitHalf
+	 */
+
+	LOAD(prefetch, %o1 + 0x000, #one_read)
+	LOAD(prefetch, %o1 + 0x040, #one_read)
+	LOAD(prefetch, %o1 + 0x080, #one_read)
+
+	/* Align destination on 64-byte boundary.  */
+	andcc		%o0, (64 - 1), %o4
+	be,pt		%XCC, 2f
+	 sub		%o4, 64, %o4
+	sub		%g0, %o4, %o4	! bytes to align dst
+	sub		%o2, %o4, %o2
+1:	subcc		%o4, 1, %o4
+	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
+	EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
+	add		%o1, 1, %o1
+	bne,pt		%XCC, 1b
+	add		%o0, 1, %o0
+
+2:
+	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  We must preserve
+	 * o5 from here until we hit VISExitHalf.
+	 */
+	VISEntryHalf
+
+	membar		#Sync
+	alignaddr	%o1, %g0, %g0
+
+	add		%o1, (64 - 1), %o4
+	andn		%o4, (64 - 1), %o4
+	andn		%o2, (64 - 1), %g1
+	sub		%o2, %g1, %o2
+
+	and		%o1, (64 - 1), %g2
+	add		%o1, %g1, %o1
+	sub		%o0, %o4, %g3
+	brz,pt		%g2, 190f
+	 cmp		%g2, 32
+	blu,a		5f
+	 cmp		%g2, 16
+	cmp		%g2, 48
+	blu,a		4f
+	 cmp		%g2, 40
+	cmp		%g2, 56
+	blu		170f
+	 nop
+	ba,a,pt		%xcc, 180f
+	 nop
+
+4:	/* 32 <= low bits < 48 */
+	blu		150f
+	 nop
+	ba,a,pt		%xcc, 160f
+	 nop
+5:	/* 0 < low bits < 32 */
+	blu,a		6f
+	 cmp		%g2, 8
+	cmp		%g2, 24
+	blu		130f
+	 nop
+	ba,a,pt		%xcc, 140f
+	 nop
+6:	/* 0 < low bits < 16 */
+	bgeu		120f
+	 nop
+	/* fall through for 0 < low bits < 8 */
+110:	sub		%o4, 64, %g2
+	EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+120:	sub		%o4, 56, %g2
+	FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+130:	sub		%o4, 48, %g2
+	FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+140:	sub		%o4, 40, %g2
+	FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_5(f22, f24, f26, f28, f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+150:	sub		%o4, 32, %g2
+	FREG_LOAD_4(%g2, f0, f2, f4, f6)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_4(f24, f26, f28, f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+160:	sub		%o4, 24, %g2
+	FREG_LOAD_3(%g2, f0, f2, f4)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_3(f26, f28, f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+170:	sub		%o4, 16, %g2
+	FREG_LOAD_2(%g2, f0, f2)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_2(f28, f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+180:	sub		%o4, 8, %g2
+	FREG_LOAD_1(%g2, f0)
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
+	FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	FREG_MOVE_1(f30)
+	subcc		%g1, 64, %g1
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+	ba,pt		%xcc, 195f
+	 nop
+
+190:
+1:	EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+	subcc		%g1, 64, %g1
+	EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
+	EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
+	add		%o4, 64, %o4
+	bne,pt		%xcc, 1b
+	 LOAD(prefetch, %o4 + 64, #one_read)
+
+195:
+	add		%o4, %g3, %o0
+	membar		#Sync
+
+	VISExitHalf
+
+	/* %o2 contains any final bytes still needed to be copied
+	 * over. If anything is left, we copy it one byte at a time.
+	 */
+	brz,pt		%o2, 85f
+	 sub		%o0, %o1, GLOBAL_SPARE
+	ba,a,pt		%XCC, 90f
+	 nop
+
+	.align		64
+75: /* 16 < len <= 64 */
+	bne,pn		%XCC, 75f
+	 sub		%o0, %o1, GLOBAL_SPARE
+
+72:
+	andn		%o2, 0xf, %o4
+	and		%o2, 0xf, %o2
+1:	subcc		%o4, 0x10, %o4
+	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
+	add		%o1, 0x08, %o1
+	EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
+	sub		%o1, 0x08, %o1
+	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
+	add		%o1, 0x8, %o1
+	EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 0x8, %o1
+73:	andcc		%o2, 0x8, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%o2, 0x8, %o2
+	EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
+	EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
+	add		%o1, 0x8, %o1
+1:	andcc		%o2, 0x4, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%o2, 0x4, %o2
+	EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
+	EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
+	add		%o1, 0x4, %o1
+1:	cmp		%o2, 0
+	be,pt		%XCC, 85f
+	 nop
+	ba,pt		%xcc, 90f
+	 nop
+
+75:
+	andcc		%o0, 0x7, %g1
+	sub		%g1, 0x8, %g1
+	be,pn		%icc, 2f
+	 sub		%g0, %g1, %g1
+	sub		%o2, %g1, %o2
+
+1:	subcc		%g1, 1, %g1
+	EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
+	bgu,pt		%icc, 1b
+	 add		%o1, 1, %o1
+
+2:	add		%o1, GLOBAL_SPARE, %o0
+	andcc		%o1, 0x7, %g1
+	bne,pt		%icc, 8f
+	 sll		%g1, 3, %g1
+
+	cmp		%o2, 16
+	bgeu,pt		%icc, 72b
+	 nop
+	ba,a,pt		%xcc, 73b
+
+8:	mov		64, GLOBAL_SPARE
+	andn		%o1, 0x7, %o1
+	EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
+	sub		GLOBAL_SPARE, %g1, GLOBAL_SPARE
+	andn		%o2, 0x7, %o4
+	sllx		%g2, %g1, %g2
+1:	add		%o1, 0x8, %o1
+	EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
+	subcc		%o4, 0x8, %o4
+	srlx		%g3, GLOBAL_SPARE, %o5
+	or		%o5, %g2, %o5
+	EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
+	add		%o0, 0x8, %o0
+	bgu,pt		%icc, 1b
+	 sllx		%g3, %g1, %g2
+
+	srl		%g1, 3, %g1
+	andcc		%o2, 0x7, %o2
+	be,pn		%icc, 85f
+	 add		%o1, %g1, %o1
+	ba,pt		%xcc, 90f
+	 sub		%o0, %o1, GLOBAL_SPARE
+
+	.align		64
+80: /* 0 < len <= 16 */
+	andcc		GLOBAL_SPARE, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 sub		%o0, %o1, GLOBAL_SPARE
+
+1:
+	subcc		%o2, 4, %o2
+	EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
+	EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 4, %o1
+
+85:	retl
+	 mov		EX_RETVAL(%o3), %o0
+
+	.align		32
+90:
+	subcc		%o2, 1, %o2
+	EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
+	EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
+	bgu,pt		%XCC, 90b
+	 add		%o1, 1, %o1
+	retl
+	 mov		EX_RETVAL(%o3), %o0
+
+	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG2patch.S b/arch/sparc/lib/NG2patch.S
new file mode 100644
index 0000000..72431b2
--- /dev/null
+++ b/arch/sparc/lib/NG2patch.S
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG2patch.S: Patch Ultra-I routines with Niagara-2 variant.
+ *
+ * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
+ */
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define NG_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	niagara2_patch_copyops
+	.type	niagara2_patch_copyops,#function
+niagara2_patch_copyops:
+	NG_DO_PATCH(memcpy, NG2memcpy)
+	NG_DO_PATCH(raw_copy_from_user, NG2copy_from_user)
+	NG_DO_PATCH(raw_copy_to_user, NG2copy_to_user)
+	retl
+	 nop
+	.size	niagara2_patch_copyops,.-niagara2_patch_copyops
diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S
new file mode 100644
index 0000000..97e2678
--- /dev/null
+++ b/arch/sparc/lib/NG4clear_page.S
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG4copy_page.S: Niagara-4 optimized clear page.
+ *
+ * Copyright (C) 2012 (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+	.text
+
+	.register	%g3, #scratch
+
+	.align		32
+	.globl		NG4clear_page
+	.globl		NG4clear_user_page
+NG4clear_page:		/* %o0=dest */
+NG4clear_user_page:	/* %o0=dest, %o1=vaddr */
+	set		PAGE_SIZE, %g7
+	mov		0x20, %g3
+1:	stxa		%g0, [%o0 + %g0] ASI_ST_BLKINIT_MRU_P
+	subcc		%g7, 0x40, %g7
+	stxa		%g0, [%o0 + %g3] ASI_ST_BLKINIT_MRU_P
+	bne,pt		%xcc, 1b
+	 add		%o0, 0x40, %o0
+	membar		#StoreLoad|#StoreStore
+	retl
+	 nop
+	.size		NG4clear_page,.-NG4clear_page
+	.size		NG4clear_user_page,.-NG4clear_user_page
\ No newline at end of file
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S
new file mode 100644
index 0000000..0cac15a
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_from_user.S
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG4copy_from_user.S: Niagara-4 optimized copy from userspace.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x, y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_LD_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y##_fp;	\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		NG4copy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S
new file mode 100644
index 0000000..581062f
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_page.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG4copy_page.S: Niagara-4 optimized copy page.
+ *
+ * Copyright (C) 2012 (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+	.text
+	.align		32
+
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+	.globl		NG4copy_user_page
+NG4copy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */
+	prefetch	[%o1 + 0x000], #n_reads_strong
+	prefetch	[%o1 + 0x040], #n_reads_strong
+	prefetch	[%o1 + 0x080], #n_reads_strong
+	prefetch	[%o1 + 0x0c0], #n_reads_strong
+	set		PAGE_SIZE, %g7
+	prefetch	[%o1 + 0x100], #n_reads_strong
+	prefetch	[%o1 + 0x140], #n_reads_strong
+	prefetch	[%o1 + 0x180], #n_reads_strong
+	prefetch	[%o1 + 0x1c0], #n_reads_strong
+1:
+	ldx		[%o1 + 0x00], %o2
+	subcc		%g7, 0x40, %g7
+	ldx		[%o1 + 0x08], %o3
+	ldx		[%o1 + 0x10], %o4
+	ldx		[%o1 + 0x18], %o5
+	ldx		[%o1 + 0x20], %g1
+	stxa		%o2, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	ldx		[%o1 + 0x28], %g2
+	stxa		%o3, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	ldx		[%o1 + 0x30], %g3
+	stxa		%o4, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	ldx		[%o1 + 0x38], %o2
+	add		%o1, 0x40, %o1
+	stxa		%o5, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	stxa		%g1, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	stxa		%g2, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	stxa		%g3, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	stxa		%o2, [%o0] ASI_ST_BLKINIT_MRU_P
+	add		%o0, 0x08, %o0
+	bne,pt		%icc, 1b
+	 prefetch	[%o1 + 0x200], #n_reads_strong
+	retl
+	 membar		#StoreLoad | #StoreStore
+	.size		NG4copy_user_page,.-NG4copy_user_page
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S
new file mode 100644
index 0000000..c5c9abb
--- /dev/null
+++ b/arch/sparc/lib/NG4copy_to_user.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG4copy_to_user.S: Niagara-4 optimized copy to userspace.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_ST_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y##_fp;	\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#ifndef ASI_BLK_INIT_QUAD_LDD_AIUS
+#define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23
+#endif
+
+#define FUNC_NAME		NG4copy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] %asi
+#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
+#define EX_RETVAL(x)		0
+
+#ifdef __KERNEL__
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "NG4memcpy.S"
diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S
new file mode 100644
index 0000000..2d0991e
--- /dev/null
+++ b/arch/sparc/lib/NG4fls.S
@@ -0,0 +1,30 @@
+/* NG4fls.S: SPARC optimized fls and __fls for T4 and above.
+ *
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+#define LZCNT_O0_G2	\
+	.word	0x85b002e8
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+ENTRY(NG4fls)
+	LZCNT_O0_G2	!lzcnt	%o0, %g2
+	mov	64, %g3
+	retl
+	 sub	%g3, %g2, %o0
+ENDPROC(NG4fls)
+
+ENTRY(__NG4fls)
+	brz,pn	%o0, 1f
+	LZCNT_O0_G2	!lzcnt	%o0, %g2
+	mov	63, %g3
+	sub	%g3, %g2, %o0
+1:
+	retl
+	 nop
+ENDPROC(__NG4fls)
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
new file mode 100644
index 0000000..7ad58eb
--- /dev/null
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG4memcpy.S: Niagara-4 optimized memcpy.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+#include <asm/asi.h>
+#define GLOBAL_SPARE	%g7
+#else
+#define ASI_BLK_INIT_QUAD_LDD_P 0xe2
+#define FPRS_FEF  0x04
+
+/* On T4 it is very expensive to access ASRs like %fprs and
+ * %asi, avoiding a read or a write can save ~50 cycles.
+ */
+#define FPU_ENTER			\
+	rd	%fprs, %o5;		\
+	andcc	%o5, FPRS_FEF, %g0;	\
+	be,a,pn	%icc, 999f;		\
+	 wr	%g0, FPRS_FEF, %fprs;	\
+	999:
+
+#ifdef MEMCPY_DEBUG
+#define VISEntryHalf FPU_ENTER; \
+		     clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#else
+#define VISEntryHalf FPU_ENTER
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#endif
+
+#define GLOBAL_SPARE	%g5
+#endif
+
+#ifndef STORE_ASI
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
+#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
+#else
+#define STORE_ASI	0x80		/* ASI_P */
+#endif
+#endif
+
+#if !defined(EX_LD) && !defined(EX_ST)
+#define NON_USER_COPY
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x,y)	x
+#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x,y)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x,y)	x
+#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x,y)	x
+#endif
+
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef STORE
+#ifndef MEMCPY_DEBUG
+#define STORE(type,src,addr)	type src, [addr]
+#else
+#define STORE(type,src,addr)	type##a src, [addr] %asi
+#endif
+#endif
+
+#ifndef STORE_INIT
+#define STORE_INIT(src,addr)	stxa src, [addr] STORE_ASI
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	NG4memcpy
+#endif
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+#endif
+	.align		64
+
+	.globl	FUNC_NAME
+	.type	FUNC_NAME,#function
+FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
+#ifdef MEMCPY_DEBUG
+	wr		%g0, 0x80, %asi
+#endif
+	srlx		%o2, 31, %g2
+	cmp		%g2, 0
+	tne		%XCC, 5
+	PREAMBLE
+	mov		%o0, %o3
+	brz,pn		%o2, .Lexit
+	 cmp		%o2, 3
+	ble,pn		%icc, .Ltiny
+	 cmp		%o2, 19
+	ble,pn		%icc, .Lsmall
+	 or		%o0, %o1, %g2
+	cmp		%o2, 128
+	bl,pn		%icc, .Lmedium
+	 nop
+
+.Llarge:/* len >= 0x80 */
+	/* First get dest 8 byte aligned.  */
+	sub		%g0, %o0, %g1
+	and		%g1, 0x7, %g1
+	brz,pt		%g1, 51f
+	 sub		%o2, %g1, %o2
+
+
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
+	add		%o1, 1, %o1
+	subcc		%g1, 1, %g1
+	add		%o0, 1, %o0
+	bne,pt		%icc, 1b
+	 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
+
+51:	LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
+	LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
+	LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
+	LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
+	LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
+	LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
+	LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
+	LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
+
+	/* Check if we can use the straight fully aligned
+	 * loop, or we require the alignaddr/faligndata variant.
+	 */
+	andcc		%o1, 0x7, %o5
+	bne,pn		%icc, .Llarge_src_unaligned
+	 sub		%g0, %o0, %g1
+
+	/* Legitimize the use of initializing stores by getting dest
+	 * to be 64-byte aligned.
+	 */
+	and		%g1, 0x3f, %g1
+	brz,pt		%g1, .Llarge_aligned
+	 sub		%o2, %g1, %o2
+
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
+	add		%o1, 8, %o1
+	subcc		%g1, 8, %g1
+	add		%o0, 8, %o0
+	bne,pt		%icc, 1b
+	 EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8)
+
+.Llarge_aligned:
+	/* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
+	andn		%o2, 0x3f, %o4
+	sub		%o2, %o4, %o2
+
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4)
+	add		%o1, 0x40, %o1
+	EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4)
+	subcc		%o4, 0x40, %o4
+	EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64)
+	EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64)
+	EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64)
+	EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64)
+	add		%o0, 0x08, %o0
+	EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56)
+	add		%o0, 0x08, %o0
+	EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48)
+	EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48)
+	add		%o0, 0x08, %o0
+	EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40)
+	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40)
+	add		%o0, 0x08, %o0
+	EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32)
+	EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32)
+	add		%o0, 0x08, %o0
+	EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24)
+	add		%o0, 0x08, %o0
+	EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16)
+	add		%o0, 0x08, %o0
+	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8)
+	add		%o0, 0x08, %o0
+	bne,pt		%icc, 1b
+	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
+
+	membar		#StoreLoad | #StoreStore
+
+	brz,pn		%o2, .Lexit
+	 cmp		%o2, 19
+	ble,pn		%icc, .Lsmall_unaligned
+	 nop
+	ba,a,pt		%icc, .Lmedium_noprefetch
+
+.Lexit:	retl
+	 mov		EX_RETVAL(%o3), %o0
+
+.Llarge_src_unaligned:
+#ifdef NON_USER_COPY
+	VISEntryHalfFast(.Lmedium_vis_entry_fail)
+#else
+	VISEntryHalf
+#endif
+	andn		%o2, 0x3f, %o4
+	sub		%o2, %o4, %o2
+	alignaddr	%o1, %g0, %g1
+	add		%o1, %o4, %o1
+	EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4)
+1:	EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4)
+	subcc		%o4, 0x40, %o4
+	EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64)
+	EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64)
+	faligndata	%f0, %f2, %f16
+	EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64)
+	faligndata	%f2, %f4, %f18
+	add		%g1, 0x40, %g1
+	faligndata	%f4, %f6, %f20
+	faligndata	%f6, %f8, %f22
+	faligndata	%f8, %f10, %f24
+	faligndata	%f10, %f12, %f26
+	faligndata	%f12, %f14, %f28
+	faligndata	%f14, %f0, %f30
+	EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64)
+	EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56)
+	EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48)
+	EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40)
+	EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32)
+	EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24)
+	EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16)
+	EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8)
+	add		%o0, 0x40, %o0
+	bne,pt		%icc, 1b
+	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
+#ifdef NON_USER_COPY
+	VISExitHalfFast
+#else
+	VISExitHalf
+#endif
+	brz,pn		%o2, .Lexit
+	 cmp		%o2, 19
+	ble,pn		%icc, .Lsmall_unaligned
+	 nop
+	ba,a,pt		%icc, .Lmedium_unaligned
+
+#ifdef NON_USER_COPY
+.Lmedium_vis_entry_fail:
+	 or		%o0, %o1, %g2
+#endif
+.Lmedium:
+	LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
+	andcc		%g2, 0x7, %g0
+	bne,pn		%icc, .Lmedium_unaligned
+	 nop
+.Lmedium_noprefetch:
+	andncc		%o2, 0x20 - 1, %o5
+	be,pn		%icc, 2f
+	 sub		%o2, %o5, %o2
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5)
+	EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5)
+	add		%o1, 0x20, %o1
+	subcc		%o5, 0x20, %o5
+	EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32)
+	EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24)
+	EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8)
+	bne,pt		%icc, 1b
+	 add		%o0, 0x20, %o0
+2:	andcc		%o2, 0x18, %o5
+	be,pt		%icc, 3f
+	 sub		%o2, %o5, %o2
+
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
+	add		%o1, 0x08, %o1
+	add		%o0, 0x08, %o0
+	subcc		%o5, 0x08, %o5
+	bne,pt		%icc, 1b
+	 EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
+3:	brz,pt		%o2, .Lexit
+	 cmp		%o2, 0x04
+	bl,pn		%icc, .Ltiny
+	 nop
+	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2)
+	add		%o1, 0x04, %o1
+	add		%o0, 0x04, %o0
+	subcc		%o2, 0x04, %o2
+	bne,pn		%icc, .Ltiny
+	 EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4)
+	ba,a,pt		%icc, .Lexit
+.Lmedium_unaligned:
+	/* First get dest 8 byte aligned.  */
+	sub		%g0, %o0, %g1
+	and		%g1, 0x7, %g1
+	brz,pt		%g1, 2f
+	 sub		%o2, %g1, %o2
+
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
+	add		%o1, 1, %o1
+	subcc		%g1, 1, %g1
+	add		%o0, 1, %o0
+	bne,pt		%icc, 1b
+	 EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
+2:
+	and		%o1, 0x7, %g1
+	brz,pn		%g1, .Lmedium_noprefetch
+	 sll		%g1, 3, %g1
+	mov		64, %g2
+	sub		%g2, %g1, %g2
+	andn		%o1, 0x7, %o1
+	EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2)
+	sllx		%o4, %g1, %o4
+	andn		%o2, 0x08 - 1, %o5
+	sub		%o2, %o5, %o2
+1:	EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5)
+	add		%o1, 0x08, %o1
+	subcc		%o5, 0x08, %o5
+	srlx		%g3, %g2, GLOBAL_SPARE
+	or		GLOBAL_SPARE, %o4, GLOBAL_SPARE
+	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8)
+	add		%o0, 0x08, %o0
+	bne,pt		%icc, 1b
+	 sllx		%g3, %g1, %o4
+	srl		%g1, 3, %g1
+	add		%o1, %g1, %o1
+	brz,pn		%o2, .Lexit
+	 nop
+	ba,pt		%icc, .Lsmall_unaligned
+
+.Ltiny:
+	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
+	subcc		%o2, 1, %o2
+	be,pn		%icc, .Lexit
+	 EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2)
+	subcc		%o2, 1, %o2
+	be,pn		%icc, .Lexit
+	 EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1)
+	EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2)
+	ba,pt		%icc, .Lexit
+	 EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2)
+
+.Lsmall:
+	andcc		%g2, 0x3, %g0
+	bne,pn		%icc, .Lsmall_unaligned
+	 andn		%o2, 0x4 - 1, %o5
+	sub		%o2, %o5, %o2
+1:
+	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
+	add		%o1, 0x04, %o1
+	subcc		%o5, 0x04, %o5
+	add		%o0, 0x04, %o0
+	bne,pt		%icc, 1b
+	 EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
+	brz,pt		%o2, .Lexit
+	 nop
+	ba,a,pt		%icc, .Ltiny
+
+.Lsmall_unaligned:
+1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
+	add		%o1, 1, %o1
+	add		%o0, 1, %o0
+	subcc		%o2, 1, %o2
+	bne,pt		%icc, 1b
+	 EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1)
+	ba,a,pt		%icc, .Lexit
+	 nop
+	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S
new file mode 100644
index 0000000..f81ee54
--- /dev/null
+++ b/arch/sparc/lib/NG4memset.S
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG4memset.S: Niagara-4 optimized memset/bzero.
+ *
+ * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+	.text
+	.align		32
+	.globl		NG4memset
+NG4memset:
+	andcc		%o1, 0xff, %o4
+	be,pt		%icc, 1f
+	 mov		%o2, %o1
+	sllx		%o4, 8, %g1
+	or		%g1, %o4, %o2
+	sllx		%o2, 16, %g1
+	or		%g1, %o2, %o2
+	sllx		%o2, 32, %g1
+	ba,pt		%icc, 1f
+	 or		%g1, %o2, %o4
+	.size		NG4memset,.-NG4memset
+
+	.align		32
+	.globl		NG4bzero
+NG4bzero:
+	clr		%o4
+1:	cmp		%o1, 16
+	ble		%icc, .Ltiny
+	 mov		%o0, %o3
+	sub		%g0, %o0, %g1
+	and		%g1, 0x7, %g1
+	brz,pt		%g1, .Laligned8
+	 sub		%o1, %g1, %o1
+1:	stb		%o4, [%o0 + 0x00]
+	subcc		%g1, 1, %g1
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+.Laligned8:
+	cmp		%o1, 64 + (64 - 8)
+	ble		.Lmedium
+	 sub		%g0, %o0, %g1
+	andcc		%g1, (64 - 1), %g1
+	brz,pn		%g1, .Laligned64
+	 sub		%o1, %g1, %o1
+1:	stx		%o4, [%o0 + 0x00]
+	subcc		%g1, 8, %g1
+	bne,pt		%icc, 1b
+	 add		%o0, 0x8, %o0
+.Laligned64:
+	andn		%o1, 64 - 1, %g1
+	sub		%o1, %g1, %o1
+	brnz,pn		%o4, .Lnon_bzero_loop
+	 mov		0x20, %g2
+1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+	subcc		%g1, 0x40, %g1
+	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+	bne,pt		%icc, 1b
+	 add		%o0, 0x40, %o0
+.Lpostloop:
+	cmp		%o1, 8
+	bl,pn		%icc, .Ltiny
+	 membar		#StoreStore|#StoreLoad
+.Lmedium:
+	andn		%o1, 0x7, %g1
+	sub		%o1, %g1, %o1
+1:	stx		%o4, [%o0 + 0x00]
+	subcc		%g1, 0x8, %g1
+	bne,pt		%icc, 1b
+	 add		%o0, 0x08, %o0
+	andcc		%o1, 0x4, %g1
+	be,pt		%icc, .Ltiny
+	 sub		%o1, %g1, %o1
+	stw		%o4, [%o0 + 0x00]
+	add		%o0, 0x4, %o0
+.Ltiny:
+	cmp		%o1, 0
+	be,pn		%icc, .Lexit
+1:	 subcc		%o1, 1, %o1
+	stb		%o4, [%o0 + 0x00]
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+.Lexit:
+	retl
+	 mov		%o3, %o0
+.Lnon_bzero_loop:
+	mov		0x08, %g3
+	mov		0x28, %o5
+1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+	subcc		%g1, 0x40, %g1
+	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
+	add		%o0, 0x10, %o0
+	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
+	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
+	bne,pt		%icc, 1b
+	 add		%o0, 0x30, %o0
+	ba,a,pt		%icc, .Lpostloop
+	 nop
+	.size		NG4bzero,.-NG4bzero
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S
new file mode 100644
index 0000000..3786617
--- /dev/null
+++ b/arch/sparc/lib/NG4patch.S
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant.
+ *
+ * Copyright (C) 2012 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/linkage.h>
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define NG_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	niagara4_patch_copyops
+	.type	niagara4_patch_copyops,#function
+niagara4_patch_copyops:
+	NG_DO_PATCH(memcpy, NG4memcpy)
+	NG_DO_PATCH(raw_copy_from_user, NG4copy_from_user)
+	NG_DO_PATCH(raw_copy_to_user, NG4copy_to_user)
+	retl
+	 nop
+	.size	niagara4_patch_copyops,.-niagara4_patch_copyops
+
+	.globl	niagara4_patch_bzero
+	.type	niagara4_patch_bzero,#function
+niagara4_patch_bzero:
+	NG_DO_PATCH(memset, NG4memset)
+	NG_DO_PATCH(__bzero, NG4bzero)
+	NG_DO_PATCH(__clear_user, NGclear_user)
+	NG_DO_PATCH(tsb_init, NGtsb_init)
+	retl
+	 nop
+	.size	niagara4_patch_bzero,.-niagara4_patch_bzero
+
+	.globl	niagara4_patch_pageops
+	.type	niagara4_patch_pageops,#function
+niagara4_patch_pageops:
+	NG_DO_PATCH(copy_user_page, NG4copy_user_page)
+	NG_DO_PATCH(_clear_page, NG4clear_page)
+	NG_DO_PATCH(clear_user_page, NG4clear_user_page)
+	retl
+	 nop
+	.size	niagara4_patch_pageops,.-niagara4_patch_pageops
+
+ENTRY(niagara4_patch_fls)
+	NG_DO_PATCH(fls, NG4fls)
+	NG_DO_PATCH(__fls, __NG4fls)
+	retl
+	 nop
+ENDPROC(niagara4_patch_fls)
diff --git a/arch/sparc/lib/NGbzero.S b/arch/sparc/lib/NGbzero.S
new file mode 100644
index 0000000..1932761
--- /dev/null
+++ b/arch/sparc/lib/NGbzero.S
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NGbzero.S: Niagara optimized memset/clear_user.
+ *
+ * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
+ */
+#include <asm/asi.h>
+
+#define EX_ST(x,y)		\
+98:	x,y;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_o1_asi;\
+	.text;			\
+	.align 4;
+
+	.text
+
+	.globl		NGmemset
+	.type		NGmemset, #function
+NGmemset:		/* %o0=buf, %o1=pat, %o2=len */
+	and		%o1, 0xff, %o3
+	mov		%o2, %o1
+	sllx		%o3, 8, %g1
+	or		%g1, %o3, %o2
+	sllx		%o2, 16, %g1
+	or		%g1, %o2, %o2
+	sllx		%o2, 32, %g1
+	ba,pt		%xcc, 1f
+	 or		%g1, %o2, %o2
+
+	.globl		NGbzero
+	.type		NGbzero, #function
+NGbzero:
+	clr		%o2
+1:	brz,pn		%o1, NGbzero_return
+	 mov		%o0, %o3
+
+	/* %o5: saved %asi, restored at NGbzero_done
+	 * %g7: store-init %asi to use
+	 * %o4:	non-store-init %asi to use
+	 */
+	rd		%asi, %o5
+	mov		ASI_BLK_INIT_QUAD_LDD_P, %g7
+	mov		ASI_P, %o4
+	wr		%o4, 0x0, %asi
+
+NGbzero_from_clear_user:
+	cmp		%o1, 15
+	bl,pn		%icc, NGbzero_tiny
+	 andcc		%o0, 0x7, %g1
+	be,pt		%xcc, 2f
+	 mov		8, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 1, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 1, %o0
+2:	cmp		%o1, 128
+	bl,pn		%icc, NGbzero_medium
+	 andcc		%o0, (64 - 1), %g1
+	be,pt		%xcc, NGbzero_pre_loop
+	 mov		64, %g2
+	sub		%g2, %g1, %g1
+	sub		%o1, %g1, %o1
+1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 8, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 8, %o0
+
+NGbzero_pre_loop:
+	wr		%g7, 0x0, %asi
+	andn		%o1, (64 - 1), %g1
+	sub		%o1, %g1, %o1
+NGbzero_loop:
+	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x08] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x10] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x18] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x20] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x28] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x30] %asi)
+	EX_ST(stxa %o2, [%o0 + 0x38] %asi)
+	subcc		%g1, 64, %g1
+	bne,pt		%xcc, NGbzero_loop
+	 add		%o0, 64, %o0
+
+	membar		#Sync
+	wr		%o4, 0x0, %asi
+	brz,pn		%o1, NGbzero_done
+NGbzero_medium:
+	 andncc		%o1, 0x7, %g1
+	be,pn		%xcc, 2f
+	 sub		%o1, %g1, %o1
+1:	EX_ST(stxa %o2, [%o0 + 0x00] %asi)
+	subcc		%g1, 8, %g1
+	bne,pt		%xcc, 1b
+	 add		%o0, 8, %o0
+2:	brz,pt		%o1, NGbzero_done
+	 nop
+
+NGbzero_tiny:
+1:	EX_ST(stba %o2, [%o0 + 0x00] %asi)
+	subcc		%o1, 1, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+
+	/* fallthrough */
+
+NGbzero_done:
+	wr		%o5, 0x0, %asi
+
+NGbzero_return:
+	retl
+	 mov		%o3, %o0
+	.size		NGbzero, .-NGbzero
+	.size		NGmemset, .-NGmemset
+
+	.globl		NGclear_user
+	.type		NGclear_user, #function
+NGclear_user:		/* %o0=buf, %o1=len */
+	rd		%asi, %o5
+	brz,pn		%o1, NGbzero_done
+	 clr		%o3
+	cmp		%o5, ASI_AIUS
+	bne,pn		%icc, NGbzero
+	 clr		%o2
+	mov		ASI_BLK_INIT_QUAD_LDD_AIUS, %g7
+	ba,pt		%xcc, NGbzero_from_clear_user
+	 mov		ASI_AIUS, %o4
+	.size		NGclear_user, .-NGclear_user
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define NG_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	niagara_patch_bzero
+	.type	niagara_patch_bzero,#function
+niagara_patch_bzero:
+	NG_DO_PATCH(memset, NGmemset)
+	NG_DO_PATCH(__bzero, NGbzero)
+	NG_DO_PATCH(__clear_user, NGclear_user)
+	NG_DO_PATCH(tsb_init, NGtsb_init)
+	retl
+	 nop
+	.size	niagara_patch_bzero,.-niagara_patch_bzero
diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S
new file mode 100644
index 0000000..9abc49f
--- /dev/null
+++ b/arch/sparc/lib/NGcopy_from_user.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NGcopy_from_user.S: Niagara optimized copy from userspace.
+ *
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		NGcopy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] ASI_AIUS, dest
+#define LOAD_TWIN(addr_reg,dest0,dest1)	\
+	ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0
+#define EX_RETVAL(x)		%g0
+
+#ifdef __KERNEL__
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "NGmemcpy.S"
diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S
new file mode 100644
index 0000000..9cbe2f1
--- /dev/null
+++ b/arch/sparc/lib/NGcopy_to_user.S
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NGcopy_to_user.S: Niagara optimized copy to userspace.
+ *
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#ifndef ASI_AIUS
+#define ASI_AIUS	0x11
+#endif
+
+#define FUNC_NAME		NGcopy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
+#define STORE_ASI		ASI_BLK_INIT_QUAD_LDD_AIUS
+#define EX_RETVAL(x)		%g0
+
+#ifdef __KERNEL__
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop
+#endif
+
+#include "NGmemcpy.S"
diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S
new file mode 100644
index 0000000..8e4d22a
--- /dev/null
+++ b/arch/sparc/lib/NGmemcpy.S
@@ -0,0 +1,510 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NGmemcpy.S: Niagara optimized memcpy.
+ *
+ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#ifdef __KERNEL__
+#include <linux/linkage.h>
+#include <asm/asi.h>
+#include <asm/thread_info.h>
+#define GLOBAL_SPARE	%g7
+#define RESTORE_ASI(TMP)	\
+	ldub	[%g6 + TI_CURRENT_DS], TMP;  \
+	wr	TMP, 0x0, %asi;
+#else
+#define GLOBAL_SPARE	%g5
+#define RESTORE_ASI(TMP)	\
+	wr	%g0, ASI_PNF, %asi
+#endif
+
+#ifdef __sparc_v9__
+#define SAVE_AMOUNT	128
+#else
+#define SAVE_AMOUNT	64
+#endif
+
+#ifndef STORE_ASI
+#define STORE_ASI	ASI_BLK_INIT_QUAD_LDD_P
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x,y)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x,y)	x
+#endif
+
+#ifndef LOAD
+#ifndef MEMCPY_DEBUG
+#define LOAD(type,addr,dest)	type [addr], dest
+#else
+#define LOAD(type,addr,dest)	type##a [addr] 0x80, dest
+#endif
+#endif
+
+#ifndef LOAD_TWIN
+#define LOAD_TWIN(addr_reg,dest0,dest1)	\
+	ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr)	type src, [addr]
+#endif
+
+#ifndef STORE_INIT
+#ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
+#define STORE_INIT(src,addr)	stxa src, [addr] %asi
+#else
+#define STORE_INIT(src,addr)	stx src, [addr + 0x00]
+#endif
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	NGmemcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_asi:
+	ret
+	wr	%g0, ASI_AIUS, %asi
+	 restore
+ENTRY(NG_ret_i2_plus_i4_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i5, %i0
+ENDPROC(NG_ret_i2_plus_i4_plus_1)
+ENTRY(NG_ret_i2_plus_g1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1)
+ENTRY(NG_ret_i2_plus_g1_minus_8)
+	sub	%g1, 8, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_8)
+ENTRY(NG_ret_i2_plus_g1_minus_16)
+	sub	%g1, 16, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_16)
+ENTRY(NG_ret_i2_plus_g1_minus_24)
+	sub	%g1, 24, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_24)
+ENTRY(NG_ret_i2_plus_g1_minus_32)
+	sub	%g1, 32, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_32)
+ENTRY(NG_ret_i2_plus_g1_minus_40)
+	sub	%g1, 40, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_40)
+ENTRY(NG_ret_i2_plus_g1_minus_48)
+	sub	%g1, 48, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_48)
+ENTRY(NG_ret_i2_plus_g1_minus_56)
+	sub	%g1, 56, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_56)
+ENTRY(NG_ret_i2_plus_i4)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4)
+ENTRY(NG_ret_i2_plus_i4_minus_8)
+	sub	%i4, 8, %i4
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4_minus_8)
+ENTRY(NG_ret_i2_plus_8)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 8, %i0
+ENDPROC(NG_ret_i2_plus_8)
+ENTRY(NG_ret_i2_plus_4)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 4, %i0
+ENDPROC(NG_ret_i2_plus_4)
+ENTRY(NG_ret_i2_plus_1)
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, 1, %i0
+ENDPROC(NG_ret_i2_plus_1)
+ENTRY(NG_ret_i2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_plus_1)
+ENTRY(NG_ret_i2)
+	ba,pt	%xcc, __restore_asi
+	 mov	%i2, %i0
+ENDPROC(NG_ret_i2)
+ENTRY(NG_ret_i2_and_7_plus_i4)
+	and	%i2, 7, %i2
+	ba,pt	%xcc, __restore_asi
+	 add	%i2, %i4, %i0
+ENDPROC(NG_ret_i2_and_7_plus_i4)
+#endif
+
+	.align		64
+
+	.globl	FUNC_NAME
+	.type	FUNC_NAME,#function
+FUNC_NAME:	/* %i0=dst, %i1=src, %i2=len */
+	PREAMBLE
+	save		%sp, -SAVE_AMOUNT, %sp
+	srlx		%i2, 31, %g2
+	cmp		%g2, 0
+	tne		%xcc, 5
+	mov		%i0, %o0
+	cmp		%i2, 0
+	be,pn		%XCC, 85f
+	 or		%o0, %i1, %i3
+	cmp		%i2, 16
+	blu,a,pn	%XCC, 80f
+	 or		%i3, %i2, %i3
+
+	/* 2 blocks (128 bytes) is the minimum we can do the block
+	 * copy with.  We need to ensure that we'll iterate at least
+	 * once in the block copy loop.  At worst we'll need to align
+	 * the destination to a 64-byte boundary which can chew up
+	 * to (64 - 1) bytes from the length before we perform the
+	 * block copy loop.
+	 */
+	cmp		%i2, (2 * 64)
+	blu,pt		%XCC, 70f
+	 andcc		%i3, 0x7, %g0
+
+	/* %o0:	dst
+	 * %i1:	src
+	 * %i2:	len  (known to be >= 128)
+	 *
+	 * The block copy loops will use %i4/%i5,%g2/%g3 as
+	 * temporaries while copying the data.
+	 */
+
+	LOAD(prefetch, %i1, #one_read)
+	wr		%g0, STORE_ASI, %asi
+
+	/* Align destination on 64-byte boundary.  */
+	andcc		%o0, (64 - 1), %i4
+	be,pt		%XCC, 2f
+	 sub		%i4, 64, %i4
+	sub		%g0, %i4, %i4	! bytes to align dst
+	sub		%i2, %i4, %i2
+1:	subcc		%i4, 1, %i4
+	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
+	EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
+	add		%i1, 1, %i1
+	bne,pt		%XCC, 1b
+	add		%o0, 1, %o0
+
+	/* If the source is on a 16-byte boundary we can do
+	 * the direct block copy loop.  If it is 8-byte aligned
+	 * we can do the 16-byte loads offset by -8 bytes and the
+	 * init stores offset by one register.
+	 *
+	 * If the source is not even 8-byte aligned, we need to do
+	 * shifting and masking (basically integer faligndata).
+	 *
+	 * The careful bit with init stores is that if we store
+	 * to any part of the cache line we have to store the whole
+	 * cacheline else we can end up with corrupt L2 cache line
+	 * contents.  Since the loop works on 64-bytes of 64-byte
+	 * aligned store data at a time, this is easy to ensure.
+	 */
+2:
+	andcc		%i1, (16 - 1), %i4
+	andn		%i2, (64 - 1), %g1	! block copy loop iterator
+	be,pt		%XCC, 50f
+	 sub		%i2, %g1, %i2		! final sub-block copy bytes
+
+	cmp		%i4, 8
+	be,pt		%XCC, 10f
+	 sub		%i1, %i4, %i1
+
+	/* Neither 8-byte nor 16-byte aligned, shift and mask.  */
+	and		%i4, 0x7, GLOBAL_SPARE
+	sll		GLOBAL_SPARE, 3, GLOBAL_SPARE
+	mov		64, %i5
+	EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
+	sub		%i5, GLOBAL_SPARE, %i5
+	mov		16, %o4
+	mov		32, %o5
+	mov		48, %o7
+	mov		64, %i3
+
+	bg,pn	   	%XCC, 9f
+	 nop
+
+#define MIX_THREE_WORDS(WORD1, WORD2, WORD3, PRE_SHIFT, POST_SHIFT, TMP) \
+	sllx		WORD1, POST_SHIFT, WORD1; \
+	srlx		WORD2, PRE_SHIFT, TMP; \
+	sllx		WORD2, POST_SHIFT, WORD2; \
+	or		WORD1, TMP, WORD1; \
+	srlx		WORD3, PRE_SHIFT, TMP; \
+	or		WORD2, TMP, WORD2;
+
+8:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
+	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+	LOAD(prefetch, %i1 + %i3, #one_read)
+
+	EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
+	EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+
+	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
+	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+
+	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+	MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
+
+	EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+
+	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
+	add		%i1, 64, %i1
+	MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
+
+	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+
+	subcc		%g1, 64, %g1
+	bne,pt		%XCC, 8b
+	 add		%o0, 64, %o0
+
+	ba,pt		%XCC, 60f
+	 add		%i1, %i4, %i1
+
+9:	EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
+	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+	LOAD(prefetch, %i1 + %i3, #one_read)
+
+	EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+
+	EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
+	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+
+	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+	MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
+
+	EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+
+	EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
+	add		%i1, 64, %i1
+	MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
+
+	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+
+	subcc		%g1, 64, %g1
+	bne,pt		%XCC, 9b
+	 add		%o0, 64, %o0
+
+	ba,pt		%XCC, 60f
+	 add		%i1, %i4, %i1
+
+10:	/* Destination is 64-byte aligned, source was only 8-byte
+	 * aligned but it has been subtracted by 8 and we perform
+	 * one twin load ahead, then add 8 back into source when
+	 * we finish the loop.
+	 */
+	EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
+	mov	16, %o7
+	mov	32, %g2
+	mov	48, %g3
+	mov	64, %o1
+1:	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
+	LOAD(prefetch, %i1 + %o1, #one_read)
+	EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
+	EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+	EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
+	add		%i1, 64, %i1
+	EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+	subcc		%g1, 64, %g1
+	bne,pt		%XCC, 1b
+	 add		%o0, 64, %o0
+
+	ba,pt		%XCC, 60f
+	 add		%i1, 0x8, %i1
+
+50:	/* Destination is 64-byte aligned, and source is 16-byte
+	 * aligned.
+	 */
+	mov	16, %o7
+	mov	32, %g2
+	mov	48, %g3
+	mov	64, %o1
+1:	EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
+	EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
+	LOAD(prefetch, %i1 + %o1, #one_read)
+	EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1)	! initializes cache line
+	EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+	EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+	EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+	add	%i1, 64, %i1
+	EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+	EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+	EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+	EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
+	subcc	%g1, 64, %g1
+	bne,pt	%XCC, 1b
+	 add	%o0, 64, %o0
+	/* fall through */
+
+60:	
+	membar		#Sync
+
+	/* %i2 contains any final bytes still needed to be copied
+	 * over. If anything is left, we copy it one byte at a time.
+	 */
+	RESTORE_ASI(%i3)
+	brz,pt		%i2, 85f
+	 sub		%o0, %i1, %i3
+	ba,a,pt		%XCC, 90f
+	 nop
+
+	.align		64
+70: /* 16 < len <= 64 */
+	bne,pn		%XCC, 75f
+	 sub		%o0, %i1, %i3
+
+72:
+	andn		%i2, 0xf, %i4
+	and		%i2, 0xf, %i2
+1:	subcc		%i4, 0x10, %i4
+	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
+	add		%i1, 0x08, %i1
+	EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
+	sub		%i1, 0x08, %i1
+	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
+	add		%i1, 0x8, %i1
+	EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
+	bgu,pt		%XCC, 1b
+	 add		%i1, 0x8, %i1
+73:	andcc		%i2, 0x8, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%i2, 0x8, %i2
+	EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
+	EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
+	add		%i1, 0x8, %i1
+1:	andcc		%i2, 0x4, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%i2, 0x4, %i2
+	EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
+	EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
+	add		%i1, 0x4, %i1
+1:	cmp		%i2, 0
+	be,pt		%XCC, 85f
+	 nop
+	ba,pt		%xcc, 90f
+	 nop
+
+75:
+	andcc		%o0, 0x7, %g1
+	sub		%g1, 0x8, %g1
+	be,pn		%icc, 2f
+	 sub		%g0, %g1, %g1
+	sub		%i2, %g1, %i2
+
+1:	subcc		%g1, 1, %g1
+	EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
+	bgu,pt		%icc, 1b
+	 add		%i1, 1, %i1
+
+2:	add		%i1, %i3, %o0
+	andcc		%i1, 0x7, %g1
+	bne,pt		%icc, 8f
+	 sll		%g1, 3, %g1
+
+	cmp		%i2, 16
+	bgeu,pt		%icc, 72b
+	 nop
+	ba,a,pt		%xcc, 73b
+
+8:	mov		64, %i3
+	andn		%i1, 0x7, %i1
+	EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
+	sub		%i3, %g1, %i3
+	andn		%i2, 0x7, %i4
+	sllx		%g2, %g1, %g2
+1:	add		%i1, 0x8, %i1
+	EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
+	subcc		%i4, 0x8, %i4
+	srlx		%g3, %i3, %i5
+	or		%i5, %g2, %i5
+	EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
+	add		%o0, 0x8, %o0
+	bgu,pt		%icc, 1b
+	 sllx		%g3, %g1, %g2
+
+	srl		%g1, 3, %g1
+	andcc		%i2, 0x7, %i2
+	be,pn		%icc, 85f
+	 add		%i1, %g1, %i1
+	ba,pt		%xcc, 90f
+	 sub		%o0, %i1, %i3
+
+	.align		64
+80: /* 0 < len <= 16 */
+	andcc		%i3, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 sub		%o0, %i1, %i3
+
+1:
+	subcc		%i2, 4, %i2
+	EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
+	EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
+	bgu,pt		%XCC, 1b
+	 add		%i1, 4, %i1
+
+85:	ret
+	 restore	EX_RETVAL(%i0), %g0, %o0
+
+	.align		32
+90:
+	subcc		%i2, 1, %i2
+	EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
+	EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
+	bgu,pt		%XCC, 90b
+	 add		%i1, 1, %i1
+	ret
+	 restore	EX_RETVAL(%i0), %g0, %o0
+
+	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S
new file mode 100644
index 0000000..88fec78
--- /dev/null
+++ b/arch/sparc/lib/NGpage.S
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NGpage.S: Niagara optimize clear and copy page.
+ *
+ * Copyright (C) 2006 (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/page.h>
+
+	.text
+	.align	32
+
+	/* This is heavily simplified from the sun4u variants
+	 * because Niagara does not have any D-cache aliasing issues
+	 * and also we don't need to use the FPU in order to implement
+	 * an optimal page copy/clear.
+	 */
+
+NGcopy_user_page:	/* %o0=dest, %o1=src, %o2=vaddr */
+	save		%sp, -192, %sp
+	rd		%asi, %g3
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	set		PAGE_SIZE, %g7
+	prefetch	[%i1 + 0x00], #one_read
+	prefetch	[%i1 + 0x40], #one_read
+
+1:	prefetch	[%i1 + 0x80], #one_read
+	prefetch	[%i1 + 0xc0], #one_read
+	ldda		[%i1 + 0x00] %asi, %o2
+	ldda		[%i1 + 0x10] %asi, %o4
+	ldda		[%i1 + 0x20] %asi, %l2
+	ldda		[%i1 + 0x30] %asi, %l4
+	stxa		%o2, [%i0 + 0x00] %asi
+	stxa		%o3, [%i0 + 0x08] %asi
+	stxa		%o4, [%i0 + 0x10] %asi
+	stxa		%o5, [%i0 + 0x18] %asi
+	stxa		%l2, [%i0 + 0x20] %asi
+	stxa		%l3, [%i0 + 0x28] %asi
+	stxa		%l4, [%i0 + 0x30] %asi
+	stxa		%l5, [%i0 + 0x38] %asi
+	ldda		[%i1 + 0x40] %asi, %o2
+	ldda		[%i1 + 0x50] %asi, %o4
+	ldda		[%i1 + 0x60] %asi, %l2
+	ldda		[%i1 + 0x70] %asi, %l4
+	stxa		%o2, [%i0 + 0x40] %asi
+	stxa		%o3, [%i0 + 0x48] %asi
+	stxa		%o4, [%i0 + 0x50] %asi
+	stxa		%o5, [%i0 + 0x58] %asi
+	stxa		%l2, [%i0 + 0x60] %asi
+	stxa		%l3, [%i0 + 0x68] %asi
+	stxa		%l4, [%i0 + 0x70] %asi
+	stxa		%l5, [%i0 + 0x78] %asi
+	add		%i1, 128, %i1
+	subcc		%g7, 128, %g7
+	bne,pt		%xcc, 1b
+	 add		%i0, 128, %i0
+	wr		%g3, 0x0, %asi
+	membar		#Sync
+	ret
+	 restore
+
+	.align		32
+	.globl		NGclear_page
+	.globl		NGclear_user_page
+NGclear_page:		/* %o0=dest */
+NGclear_user_page:	/* %o0=dest, %o1=vaddr */
+	rd		%asi, %g3
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	set		PAGE_SIZE, %g7
+
+1:	stxa		%g0, [%o0 + 0x00] %asi
+	stxa		%g0, [%o0 + 0x08] %asi
+	stxa		%g0, [%o0 + 0x10] %asi
+	stxa		%g0, [%o0 + 0x18] %asi
+	stxa		%g0, [%o0 + 0x20] %asi
+	stxa		%g0, [%o0 + 0x28] %asi
+	stxa		%g0, [%o0 + 0x30] %asi
+	stxa		%g0, [%o0 + 0x38] %asi
+	stxa		%g0, [%o0 + 0x40] %asi
+	stxa		%g0, [%o0 + 0x48] %asi
+	stxa		%g0, [%o0 + 0x50] %asi
+	stxa		%g0, [%o0 + 0x58] %asi
+	stxa		%g0, [%o0 + 0x60] %asi
+	stxa		%g0, [%o0 + 0x68] %asi
+	stxa		%g0, [%o0 + 0x70] %asi
+	stxa		%g0, [%o0 + 0x78] %asi
+	stxa		%g0, [%o0 + 0x80] %asi
+	stxa		%g0, [%o0 + 0x88] %asi
+	stxa		%g0, [%o0 + 0x90] %asi
+	stxa		%g0, [%o0 + 0x98] %asi
+	stxa		%g0, [%o0 + 0xa0] %asi
+	stxa		%g0, [%o0 + 0xa8] %asi
+	stxa		%g0, [%o0 + 0xb0] %asi
+	stxa		%g0, [%o0 + 0xb8] %asi
+	stxa		%g0, [%o0 + 0xc0] %asi
+	stxa		%g0, [%o0 + 0xc8] %asi
+	stxa		%g0, [%o0 + 0xd0] %asi
+	stxa		%g0, [%o0 + 0xd8] %asi
+	stxa		%g0, [%o0 + 0xe0] %asi
+	stxa		%g0, [%o0 + 0xe8] %asi
+	stxa		%g0, [%o0 + 0xf0] %asi
+	stxa		%g0, [%o0 + 0xf8] %asi
+	subcc		%g7, 256, %g7
+	bne,pt		%xcc, 1b
+	 add		%o0, 256, %o0
+	wr		%g3, 0x0, %asi
+	membar		#Sync
+	retl
+	 nop
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define NG_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	niagara_patch_pageops
+	.type	niagara_patch_pageops,#function
+niagara_patch_pageops:
+	NG_DO_PATCH(copy_user_page, NGcopy_user_page)
+	NG_DO_PATCH(_clear_page, NGclear_page)
+	NG_DO_PATCH(clear_user_page, NGclear_user_page)
+	retl
+	 nop
+	.size	niagara_patch_pageops,.-niagara_patch_pageops
diff --git a/arch/sparc/lib/NGpatch.S b/arch/sparc/lib/NGpatch.S
new file mode 100644
index 0000000..e9f843f
--- /dev/null
+++ b/arch/sparc/lib/NGpatch.S
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* NGpatch.S: Patch Ultra-I routines with Niagara variant.
+ *
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define NG_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	niagara_patch_copyops
+	.type	niagara_patch_copyops,#function
+niagara_patch_copyops:
+	NG_DO_PATCH(memcpy, NGmemcpy)
+	NG_DO_PATCH(raw_copy_from_user, NGcopy_from_user)
+	NG_DO_PATCH(raw_copy_to_user, NGcopy_to_user)
+	retl
+	 nop
+	.size	niagara_patch_copyops,.-niagara_patch_copyops
diff --git a/arch/sparc/lib/PeeCeeI.c b/arch/sparc/lib/PeeCeeI.c
new file mode 100644
index 0000000..cde4c9a
--- /dev/null
+++ b/arch/sparc/lib/PeeCeeI.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PeeCeeI.c: The emerging standard...
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/module.h>
+
+#include <asm/io.h>
+#include <asm/byteorder.h>
+
+void outsb(unsigned long __addr, const void *src, unsigned long count)
+{
+	void __iomem *addr = (void __iomem *) __addr;
+	const u8 *p = src;
+
+	while (count--)
+		__raw_writeb(*p++, addr);
+}
+EXPORT_SYMBOL(outsb);
+
+void outsw(unsigned long __addr, const void *src, unsigned long count)
+{
+	void __iomem *addr = (void __iomem *) __addr;
+
+	while (count--) {
+		__raw_writew(*(u16 *)src, addr);
+		src += sizeof(u16);
+	}
+}
+EXPORT_SYMBOL(outsw);
+
+void outsl(unsigned long __addr, const void *src, unsigned long count)
+{
+	void __iomem *addr = (void __iomem *) __addr;
+	u32 l, l2;
+
+	if (!count)
+		return;
+
+	switch (((unsigned long)src) & 0x3) {
+	case 0x0:
+		/* src is naturally aligned */
+		while (count--) {
+			__raw_writel(*(u32 *)src, addr);
+			src += sizeof(u32);
+		}
+		break;
+	case 0x2:
+		/* 2-byte alignment */
+		while (count--) {
+			l = (*(u16 *)src) << 16;
+			l |= *(u16 *)(src + sizeof(u16));
+			__raw_writel(l, addr);
+			src += sizeof(u32);
+		}
+		break;
+	case 0x1:
+		/* Hold three bytes in l each time, grab a byte from l2 */
+		l = (*(u8 *)src) << 24;
+		l |= (*(u16 *)(src + sizeof(u8))) << 8;
+		src += sizeof(u8) + sizeof(u16);
+		while (count--) {
+			l2 = *(u32 *)src;
+			l |= (l2 >> 24);
+			__raw_writel(l, addr);
+			l = l2 << 8;
+			src += sizeof(u32);
+		}
+		break;
+	case 0x3:
+		/* Hold a byte in l each time, grab 3 bytes from l2 */
+		l = (*(u8 *)src) << 24;
+		src += sizeof(u8);
+		while (count--) {
+			l2 = *(u32 *)src;
+			l |= (l2 >> 8);
+			__raw_writel(l, addr);
+			l = l2 << 24;
+			src += sizeof(u32);
+		}
+		break;
+	}
+}
+EXPORT_SYMBOL(outsl);
+
+void insb(unsigned long __addr, void *dst, unsigned long count)
+{
+	void __iomem *addr = (void __iomem *) __addr;
+
+	if (count) {
+		u32 *pi;
+		u8 *pb = dst;
+
+		while ((((unsigned long)pb) & 0x3) && count--)
+			*pb++ = __raw_readb(addr);
+		pi = (u32 *)pb;
+		while (count >= 4) {
+			u32 w;
+
+			w  = (__raw_readb(addr) << 24);
+			w |= (__raw_readb(addr) << 16);
+			w |= (__raw_readb(addr) << 8);
+			w |= (__raw_readb(addr) << 0);
+			*pi++ = w;
+			count -= 4;
+		}
+		pb = (u8 *)pi;
+		while (count--)
+			*pb++ = __raw_readb(addr);
+	}
+}
+EXPORT_SYMBOL(insb);
+
+void insw(unsigned long __addr, void *dst, unsigned long count)
+{
+	void __iomem *addr = (void __iomem *) __addr;
+
+	if (count) {
+		u16 *ps = dst;
+		u32 *pi;
+
+		if (((unsigned long)ps) & 0x2) {
+			*ps++ = __raw_readw(addr);
+			count--;
+		}
+		pi = (u32 *)ps;
+		while (count >= 2) {
+			u32 w;
+
+			w  = __raw_readw(addr) << 16;
+			w |= __raw_readw(addr) << 0;
+			*pi++ = w;
+			count -= 2;
+		}
+		ps = (u16 *)pi;
+		if (count)
+			*ps = __raw_readw(addr);
+	}
+}
+EXPORT_SYMBOL(insw);
+
+void insl(unsigned long __addr, void *dst, unsigned long count)
+{
+	void __iomem *addr = (void __iomem *) __addr;
+
+	if (count) {
+		if ((((unsigned long)dst) & 0x3) == 0) {
+			u32 *pi = dst;
+			while (count--)
+				*pi++ = __raw_readl(addr);
+		} else {
+			u32 l = 0, l2, *pi;
+			u16 *ps;
+			u8 *pb;
+
+			switch (((unsigned long)dst) & 3) {
+			case 0x2:
+				ps = dst;
+				count -= 1;
+				l = __raw_readl(addr);
+				*ps++ = l;
+				pi = (u32 *)ps;
+				while (count--) {
+					l2 = __raw_readl(addr);
+					*pi++ = (l << 16) | (l2 >> 16);
+					l = l2;
+				}
+				ps = (u16 *)pi;
+				*ps = l;
+				break;
+
+			case 0x1:
+				pb = dst;
+				count -= 1;
+				l = __raw_readl(addr);
+				*pb++ = l >> 24;
+				ps = (u16 *)pb;
+				*ps++ = ((l >> 8) & 0xffff);
+				pi = (u32 *)ps;
+				while (count--) {
+					l2 = __raw_readl(addr);
+					*pi++ = (l << 24) | (l2 >> 8);
+					l = l2;
+				}
+				pb = (u8 *)pi;
+				*pb = l;
+				break;
+
+			case 0x3:
+				pb = (u8 *)dst;
+				count -= 1;
+				l = __raw_readl(addr);
+				*pb++ = l >> 24;
+				pi = (u32 *)pb;
+				while (count--) {
+					l2 = __raw_readl(addr);
+					*pi++ = (l << 8) | (l2 >> 24);
+					l = l2;
+				}
+				ps = (u16 *)pi;
+				*ps++ = ((l >> 8) & 0xffff);
+				pb = (u8 *)ps;
+				*pb = l;
+				break;
+			}
+		}
+	}
+}
+EXPORT_SYMBOL(insl);
+
diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S
new file mode 100644
index 0000000..bf08d1c
--- /dev/null
+++ b/arch/sparc/lib/U1copy_from_user.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* U1copy_from_user.S: UltraSparc-I/II/IIi/IIe optimized copy from userspace.
+ *
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+#define EX_LD(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_LD_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define FUNC_NAME		raw_copy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
+#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_AIUS, dest
+#define EX_RETVAL(x)		0
+
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop;						\
+
+#include "U1memcpy.S"
diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S
new file mode 100644
index 0000000..1516985
--- /dev/null
+++ b/arch/sparc/lib/U1copy_to_user.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* U1copy_to_user.S: UltraSparc-I/II/IIi/IIe optimized copy to userspace.
+ *
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+#define EX_ST(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_ST_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define FUNC_NAME		raw_copy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
+#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS
+#define EX_RETVAL(x)		0
+
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop;						\
+
+#include "U1memcpy.S"
diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S
new file mode 100644
index 0000000..a6f4ee3
--- /dev/null
+++ b/arch/sparc/lib/U1memcpy.S
@@ -0,0 +1,685 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy.
+ *
+ * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#ifdef __KERNEL__
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+#include <asm/asi.h>
+#include <asm/export.h>
+#define GLOBAL_SPARE	g7
+#else
+#define GLOBAL_SPARE	g5
+#define ASI_BLK_P 0xf0
+#define FPRS_FEF  0x04
+#ifdef MEMCPY_DEBUG
+#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
+		 clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
+#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#else
+#define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
+#define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#endif
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x,y)	x
+#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x,y)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x,y)	x
+#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x,y)	x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef LOAD_BLK
+#define LOAD_BLK(addr,dest)	ldda [addr] ASI_BLK_P, dest
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr)	type src, [addr]
+#endif
+
+#ifndef STORE_BLK
+#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	memcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)		\
+	faligndata		%f1, %f2, %f48;			\
+	faligndata		%f2, %f3, %f50;			\
+	faligndata		%f3, %f4, %f52;			\
+	faligndata		%f4, %f5, %f54;			\
+	faligndata		%f5, %f6, %f56;			\
+	faligndata		%f6, %f7, %f58;			\
+	faligndata		%f7, %f8, %f60;			\
+	faligndata		%f8, %f9, %f62;
+
+#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt)			\
+	EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp);			\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);			\
+	add			%src, 0x40, %src;			\
+	subcc			%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE;	\
+	be,pn			%xcc, jmptgt;				\
+	 add			%dest, 0x40, %dest;			\
+
+#define LOOP_CHUNK1(src, dest, branch_dest)		\
+	MAIN_LOOP_CHUNK(src, dest, f0,  f48, branch_dest)
+#define LOOP_CHUNK2(src, dest, branch_dest)		\
+	MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
+#define LOOP_CHUNK3(src, dest, branch_dest)		\
+	MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
+
+#define DO_SYNC			membar	#Sync;
+#define STORE_SYNC(dest, fsrc)				\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);	\
+	add			%dest, 0x40, %dest;	\
+	DO_SYNC
+
+#define STORE_JUMP(dest, fsrc, target)			\
+	EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp);	\
+	add			%dest, 0x40, %dest;	\
+	ba,pt			%xcc, target;		\
+	 nop;
+
+#define FINISH_VISCHUNK(dest, f0, f1)			\
+	subcc			%g3, 8, %g3;		\
+	bl,pn			%xcc, 95f;		\
+	 faligndata		%f0, %f1, %f48;		\
+	EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp);	\
+	add			%dest, 8, %dest;
+
+#define UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
+	subcc			%g3, 8, %g3;	\
+	bl,pn			%xcc, 95f;	\
+	 fsrc2			%f0, %f1;
+
+#define UNEVEN_VISCHUNK(dest, f0, f1)		\
+	UNEVEN_VISCHUNK_LAST(dest, f0, f1)	\
+	ba,a,pt			%xcc, 93f;
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+ENTRY(U1_g1_1_fp)
+	VISExitHalf
+	add		%g1, 1, %g1
+	add		%g1, %g2, %g1
+	retl
+	 add		%g1, %o2, %o0
+ENDPROC(U1_g1_1_fp)
+ENTRY(U1_g2_0_fp)
+	VISExitHalf
+	retl
+	 add		%g2, %o2, %o0
+ENDPROC(U1_g2_0_fp)
+ENTRY(U1_g2_8_fp)
+	VISExitHalf
+	add		%g2, 8, %g2
+	retl
+	 add		%g2, %o2, %o0
+ENDPROC(U1_g2_8_fp)
+ENTRY(U1_gs_0_fp)
+	VISExitHalf
+	add		%GLOBAL_SPARE, %g3, %o0
+	retl
+	 add		%o0, %o2, %o0
+ENDPROC(U1_gs_0_fp)
+ENTRY(U1_gs_80_fp)
+	VISExitHalf
+	add		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
+	add		%GLOBAL_SPARE, %g3, %o0
+	retl
+	 add		%o0, %o2, %o0
+ENDPROC(U1_gs_80_fp)
+ENTRY(U1_gs_40_fp)
+	VISExitHalf
+	add		%GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
+	add		%GLOBAL_SPARE, %g3, %o0
+	retl
+	 add		%o0, %o2, %o0
+ENDPROC(U1_gs_40_fp)
+ENTRY(U1_g3_0_fp)
+	VISExitHalf
+	retl
+	 add		%g3, %o2, %o0
+ENDPROC(U1_g3_0_fp)
+ENTRY(U1_g3_8_fp)
+	VISExitHalf
+	add		%g3, 8, %g3
+	retl
+	 add		%g3, %o2, %o0
+ENDPROC(U1_g3_8_fp)
+ENTRY(U1_o2_0_fp)
+	VISExitHalf
+	retl
+	 mov		%o2, %o0
+ENDPROC(U1_o2_0_fp)
+ENTRY(U1_o2_1_fp)
+	VISExitHalf
+	retl
+	 add		%o2, 1, %o0
+ENDPROC(U1_o2_1_fp)
+ENTRY(U1_gs_0)
+	VISExitHalf
+	retl
+	 add		%GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0)
+ENTRY(U1_gs_8)
+	VISExitHalf
+	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+	retl
+	 add		%GLOBAL_SPARE, 0x8, %o0
+ENDPROC(U1_gs_8)
+ENTRY(U1_gs_10)
+	VISExitHalf
+	add		%GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+	retl
+	 add		%GLOBAL_SPARE, 0x10, %o0
+ENDPROC(U1_gs_10)
+ENTRY(U1_o2_0)
+	retl
+	 mov		%o2, %o0
+ENDPROC(U1_o2_0)
+ENTRY(U1_o2_8)
+	retl
+	 add		%o2, 8, %o0
+ENDPROC(U1_o2_8)
+ENTRY(U1_o2_4)
+	retl
+	 add		%o2, 4, %o0
+ENDPROC(U1_o2_4)
+ENTRY(U1_o2_1)
+	retl
+	 add		%o2, 1, %o0
+ENDPROC(U1_o2_1)
+ENTRY(U1_g1_0)
+	retl
+	 add		%g1, %o2, %o0
+ENDPROC(U1_g1_0)
+ENTRY(U1_g1_1)
+	add		%g1, 1, %g1
+	retl
+	 add		%g1, %o2, %o0
+ENDPROC(U1_g1_1)
+ENTRY(U1_gs_0_o2_adj)
+	and		%o2, 7, %o2
+	retl
+	 add		%GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0_o2_adj)
+ENTRY(U1_gs_8_o2_adj)
+	and		%o2, 7, %o2
+	add		%GLOBAL_SPARE, 8, %GLOBAL_SPARE
+	retl
+	 add		%GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_8_o2_adj)
+#endif
+
+	.align		64
+
+	.globl		FUNC_NAME
+	.type		FUNC_NAME,#function
+FUNC_NAME:		/* %o0=dst, %o1=src, %o2=len */
+	srlx		%o2, 31, %g2
+	cmp		%g2, 0
+	tne		%xcc, 5
+	PREAMBLE
+	mov		%o0, %o4
+	cmp		%o2, 0
+	be,pn		%XCC, 85f
+	 or		%o0, %o1, %o3
+	cmp		%o2, 16
+	blu,a,pn	%XCC, 80f
+	 or		%o3, %o2, %o3
+
+	cmp		%o2, (5 * 64)
+	blu,pt		%XCC, 70f
+	 andcc		%o3, 0x7, %g0
+
+	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  */
+	VISEntry
+
+	/* Is 'dst' already aligned on an 64-byte boundary? */
+	andcc		%o0, 0x3f, %g2
+	be,pt		%XCC, 2f
+
+	/* Compute abs((dst & 0x3f) - 0x40) into %g2.  This is the number
+	 * of bytes to copy to make 'dst' 64-byte aligned.  We pre-
+	 * subtract this from 'len'.
+	 */
+	 sub		%o0, %o1, %GLOBAL_SPARE
+	sub		%g2, 0x40, %g2
+	sub		%g0, %g2, %g2
+	sub		%o2, %g2, %o2
+	andcc		%g2, 0x7, %g1
+	be,pt		%icc, 2f
+	 and		%g2, 0x38, %g2
+
+1:	subcc		%g1, 0x1, %g1
+	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
+	EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 0x1, %o1
+
+	add		%o1, %GLOBAL_SPARE, %o0
+
+2:	cmp		%g2, 0x0
+	and		%o1, 0x7, %g1
+	be,pt		%icc, 3f
+	 alignaddr	%o1, %g0, %o1
+
+	EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
+	add		%o1, 0x8, %o1
+	subcc		%g2, 0x8, %g2
+	faligndata	%f4, %f6, %f0
+	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
+	be,pn		%icc, 3f
+	 add		%o0, 0x8, %o0
+
+	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
+	add		%o1, 0x8, %o1
+	subcc		%g2, 0x8, %g2
+	faligndata	%f6, %f4, %f0
+	EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
+	bne,pt		%icc, 1b
+	 add		%o0, 0x8, %o0
+
+	/* Destination is 64-byte aligned.  */
+3:	
+	membar		  #LoadStore | #StoreStore | #StoreLoad
+
+	subcc		%o2, 0x40, %GLOBAL_SPARE
+	add		%o1, %g1, %g1
+	andncc		%GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE
+	srl		%g1, 3, %g2
+	sub		%o2, %GLOBAL_SPARE, %g3
+	andn		%o1, (0x40 - 1), %o1
+	and		%g2, 7, %g2
+	andncc		%g3, 0x7, %g3
+	fsrc2		%f0, %f2
+	sub		%g3, 0x8, %g3
+	sub		%o2, %GLOBAL_SPARE, %o2
+
+	add		%g1, %GLOBAL_SPARE, %g1
+	subcc		%o2, %g3, %o2
+
+	EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
+	add		%o1, 0x40, %o1
+	add		%g1, %g3, %g1
+	EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
+	add		%o1, 0x40, %o1
+	sub		%GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
+	EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
+	add		%o1, 0x40, %o1
+
+	/* There are 8 instances of the unrolled loop,
+	 * one for each possible alignment of the
+	 * source buffer.  Each loop instance is 452
+	 * bytes.
+	 */
+	sll		%g2, 3, %o3
+	sub		%o3, %g2, %o3
+	sllx		%o3, 4, %o3
+	add		%o3, %g2, %o3
+	sllx		%o3, 2, %g2
+1:	rd		%pc, %o3
+	add		%o3, %lo(1f - 1b), %o3
+	jmpl		%o3 + %g2, %g0
+	 nop
+
+	.align		64
+1:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f0, %f2, %f48
+1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
+	STORE_JUMP(o0, f48, 40f)
+2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
+	STORE_JUMP(o0, f48, 48f)
+3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
+	STORE_JUMP(o0, f48, 56f)
+
+1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f2, %f4, %f48
+1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
+	STORE_JUMP(o0, f48, 41f)
+2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
+	STORE_JUMP(o0, f48, 49f)
+3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
+	STORE_JUMP(o0, f48, 57f)
+
+1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f4, %f6, %f48
+1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
+	STORE_JUMP(o0, f48, 42f)
+2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
+	STORE_JUMP(o0, f48, 50f)
+3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
+	STORE_JUMP(o0, f48, 58f)
+
+1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f6, %f8, %f48
+1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
+	STORE_JUMP(o0, f48, 43f)
+2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
+	STORE_JUMP(o0, f48, 51f)
+3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
+	STORE_JUMP(o0, f48, 59f)
+
+1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f8, %f10, %f48
+1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
+	STORE_JUMP(o0, f48, 44f)
+2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
+	STORE_JUMP(o0, f48, 52f)
+3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
+	STORE_JUMP(o0, f48, 60f)
+
+1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f10, %f12, %f48
+1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
+	STORE_JUMP(o0, f48, 45f)
+2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
+	STORE_JUMP(o0, f48, 53f)
+3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
+	STORE_JUMP(o0, f48, 61f)
+
+1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f12, %f14, %f48
+1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
+	STORE_JUMP(o0, f48, 46f)
+2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
+	STORE_JUMP(o0, f48, 54f)
+3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
+	STORE_JUMP(o0, f48, 62f)
+
+1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
+	LOOP_CHUNK1(o1, o0, 1f)
+	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+	LOOP_CHUNK2(o1, o0, 2f)
+	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
+	LOOP_CHUNK3(o1, o0, 3f)
+	ba,pt		%xcc, 1b+4
+	 faligndata	%f14, %f16, %f48
+1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
+	STORE_JUMP(o0, f48, 47f)
+2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
+	STORE_JUMP(o0, f48, 55f)
+3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
+	STORE_SYNC(o0, f48)
+	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
+	STORE_JUMP(o0, f48, 63f)
+
+40:	FINISH_VISCHUNK(o0, f0,  f2)
+41:	FINISH_VISCHUNK(o0, f2,  f4)
+42:	FINISH_VISCHUNK(o0, f4,  f6)
+43:	FINISH_VISCHUNK(o0, f6,  f8)
+44:	FINISH_VISCHUNK(o0, f8,  f10)
+45:	FINISH_VISCHUNK(o0, f10, f12)
+46:	FINISH_VISCHUNK(o0, f12, f14)
+47:	UNEVEN_VISCHUNK(o0, f14, f0)
+48:	FINISH_VISCHUNK(o0, f16, f18)
+49:	FINISH_VISCHUNK(o0, f18, f20)
+50:	FINISH_VISCHUNK(o0, f20, f22)
+51:	FINISH_VISCHUNK(o0, f22, f24)
+52:	FINISH_VISCHUNK(o0, f24, f26)
+53:	FINISH_VISCHUNK(o0, f26, f28)
+54:	FINISH_VISCHUNK(o0, f28, f30)
+55:	UNEVEN_VISCHUNK(o0, f30, f0)
+56:	FINISH_VISCHUNK(o0, f32, f34)
+57:	FINISH_VISCHUNK(o0, f34, f36)
+58:	FINISH_VISCHUNK(o0, f36, f38)
+59:	FINISH_VISCHUNK(o0, f38, f40)
+60:	FINISH_VISCHUNK(o0, f40, f42)
+61:	FINISH_VISCHUNK(o0, f42, f44)
+62:	FINISH_VISCHUNK(o0, f44, f46)
+63:	UNEVEN_VISCHUNK_LAST(o0, f46, f0)
+
+93:	EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
+	add		%o1, 8, %o1
+	subcc		%g3, 8, %g3
+	faligndata	%f0, %f2, %f8
+	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
+	bl,pn		%xcc, 95f
+	 add		%o0, 8, %o0
+	EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
+	add		%o1, 8, %o1
+	subcc		%g3, 8, %g3
+	faligndata	%f2, %f0, %f8
+	EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
+	bge,pt		%xcc, 93b
+	 add		%o0, 8, %o0
+
+95:	brz,pt		%o2, 2f
+	 mov		%g1, %o1
+
+1:	EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
+	add		%o1, 1, %o1
+	subcc		%o2, 1, %o2
+	EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
+	bne,pt		%xcc, 1b
+	 add		%o0, 1, %o0
+
+2:	membar		#StoreLoad | #StoreStore
+	VISExit
+	retl
+	 mov		EX_RETVAL(%o4), %o0
+
+	.align		64
+70:	/* 16 < len <= (5 * 64) */
+	bne,pn		%XCC, 75f
+	 sub		%o0, %o1, %o3
+
+72:	andn		%o2, 0xf, %GLOBAL_SPARE
+	and		%o2, 0xf, %o2
+1:	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
+	subcc		%GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
+	add		%o1, 0x8, %o1
+	EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 0x8, %o1
+73:	andcc		%o2, 0x8, %g0
+	be,pt		%XCC, 1f
+	 nop
+	EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
+	sub		%o2, 0x8, %o2
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
+	add		%o1, 0x8, %o1
+1:	andcc		%o2, 0x4, %g0
+	be,pt		%XCC, 1f
+	 nop
+	EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
+	sub		%o2, 0x4, %o2
+	EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
+	add		%o1, 0x4, %o1
+1:	cmp		%o2, 0
+	be,pt		%XCC, 85f
+	 nop
+	ba,pt		%xcc, 90f
+	 nop
+
+75:	andcc		%o0, 0x7, %g1
+	sub		%g1, 0x8, %g1
+	be,pn		%icc, 2f
+	 sub		%g0, %g1, %g1
+	sub		%o2, %g1, %o2
+
+1:	EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
+	subcc		%g1, 1, %g1
+	EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
+	bgu,pt		%icc, 1b
+	 add		%o1, 1, %o1
+
+2:	add		%o1, %o3, %o0
+	andcc		%o1, 0x7, %g1
+	bne,pt		%icc, 8f
+	 sll		%g1, 3, %g1
+
+	cmp		%o2, 16
+	bgeu,pt		%icc, 72b
+	 nop
+	ba,a,pt		%xcc, 73b
+
+8:	mov		64, %o3
+	andn		%o1, 0x7, %o1
+	EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
+	sub		%o3, %g1, %o3
+	andn		%o2, 0x7, %GLOBAL_SPARE
+	sllx		%g2, %g1, %g2
+1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
+	subcc		%GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
+	add		%o1, 0x8, %o1
+	srlx		%g3, %o3, %o5
+	or		%o5, %g2, %o5
+	EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
+	add		%o0, 0x8, %o0
+	bgu,pt		%icc, 1b
+	 sllx		%g3, %g1, %g2
+
+	srl		%g1, 3, %g1
+	andcc		%o2, 0x7, %o2
+	be,pn		%icc, 85f
+	 add		%o1, %g1, %o1
+	ba,pt		%xcc, 90f
+	 sub		%o0, %o1, %o3
+
+	.align		64
+80:	/* 0 < len <= 16 */
+	andcc		%o3, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+1:	EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
+	subcc		%o2, 4, %o2
+	EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 4, %o1
+
+85:	retl
+	 mov		EX_RETVAL(%o4), %o0
+
+	.align		32
+90:	EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
+	subcc		%o2, 1, %o2
+	EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
+	bgu,pt		%XCC, 90b
+	 add		%o1, 1, %o1
+	retl
+	 mov		EX_RETVAL(%o4), %o0
+
+	.size		FUNC_NAME, .-FUNC_NAME
+EXPORT_SYMBOL(FUNC_NAME)
diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S
new file mode 100644
index 0000000..9c891e9
--- /dev/null
+++ b/arch/sparc/lib/U3copy_from_user.S
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* U3copy_from_user.S: UltraSparc-III optimized copy from userspace.
+ *
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+#define EX_LD(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_LD_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y##_fp;	\
+	.text;			\
+	.align 4;
+
+#define FUNC_NAME		U3copy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
+#define EX_RETVAL(x)		0
+
+#include "U3memcpy.S"
diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S
new file mode 100644
index 0000000..da42460
--- /dev/null
+++ b/arch/sparc/lib/U3copy_to_user.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* U3copy_to_user.S: UltraSparc-III optimized copy to userspace.
+ *
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+#define EX_ST(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y;		\
+	.text;			\
+	.align 4;
+
+#define EX_ST_FP(x,y)		\
+98:	x;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, y##_fp;	\
+	.text;			\
+	.align 4;
+
+#define FUNC_NAME		U3copy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] ASI_AIUS
+#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_AIUS
+#define EX_RETVAL(x)		0
+
+	/* Writing to %asi is _expensive_ so we hardcode it.
+	 * Reading %asi to check for KERNEL_DS is comparatively
+	 * cheap.
+	 */
+#define PREAMBLE					\
+	rd		%asi, %g1;			\
+	cmp		%g1, ASI_AIUS;			\
+	bne,pn		%icc, raw_copy_in_user;		\
+	 nop;						\
+
+#include "U3memcpy.S"
diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S
new file mode 100644
index 0000000..9248d59
--- /dev/null
+++ b/arch/sparc/lib/U3memcpy.S
@@ -0,0 +1,520 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* U3memcpy.S: UltraSparc-III optimized memcpy.
+ *
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+#ifdef __KERNEL__
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+#include <asm/asi.h>
+#define GLOBAL_SPARE	%g7
+#else
+#define ASI_BLK_P 0xf0
+#define FPRS_FEF  0x04
+#ifdef MEMCPY_DEBUG
+#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
+		     clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#else
+#define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
+#define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
+#endif
+#define GLOBAL_SPARE	%g5
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x,y)	x
+#endif
+#ifndef EX_LD_FP
+#define EX_LD_FP(x,y)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x,y)	x
+#endif
+#ifndef EX_ST_FP
+#define EX_ST_FP(x,y)	x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr)	type src, [addr]
+#endif
+
+#ifndef STORE_BLK
+#define STORE_BLK(src,addr)	stda src, [addr] ASI_BLK_P
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	U3memcpy
+#endif
+
+#ifndef PREAMBLE
+#define PREAMBLE
+#endif
+
+#ifndef XCC
+#define XCC xcc
+#endif
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	/* Special/non-trivial issues of this code:
+	 *
+	 * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
+	 * 2) Only low 32 FPU registers are used so that only the
+	 *    lower half of the FPU register set is dirtied by this
+	 *    code.  This is especially important in the kernel.
+	 * 3) This code never prefetches cachelines past the end
+	 *    of the source buffer.
+	 */
+
+	.text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+__restore_fp:
+	VISExitHalf
+	retl
+	 nop
+ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+	add	%g1, 1, %g1
+	add	%g2, %g1, %g2
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+ENTRY(U3_retl_o2_plus_g2_fp)
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_fp)
+ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
+	add	%g2, 8, %g2
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
+ENTRY(U3_retl_o2)
+	retl
+	 mov	%o2, %o0
+ENDPROC(U3_retl_o2)
+ENTRY(U3_retl_o2_plus_1)
+	retl
+	 add	%o2, 1, %o0
+ENDPROC(U3_retl_o2_plus_1)
+ENTRY(U3_retl_o2_plus_4)
+	retl
+	 add	%o2, 4, %o0
+ENDPROC(U3_retl_o2_plus_4)
+ENTRY(U3_retl_o2_plus_8)
+	retl
+	 add	%o2, 8, %o0
+ENDPROC(U3_retl_o2_plus_8)
+ENTRY(U3_retl_o2_plus_g1_plus_1)
+	add	%g1, 1, %g1
+	retl
+	 add	%o2, %g1, %o0
+ENDPROC(U3_retl_o2_plus_g1_plus_1)
+ENTRY(U3_retl_o2_fp)
+	ba,pt	%xcc, __restore_fp
+	 mov	%o2, %o0
+ENDPROC(U3_retl_o2_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+	sll	%o3, 6, %o3
+	add	%o3, 0x80, %o3
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+	sll	%o3, 6, %o3
+	add	%o3, 0x40, %o3
+	ba,pt	%xcc, __restore_fp
+	 add	%o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+ENTRY(U3_retl_o2_plus_GS_plus_0x10)
+	add	GLOBAL_SPARE, 0x10, GLOBAL_SPARE
+	retl
+	 add	%o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
+ENTRY(U3_retl_o2_plus_GS_plus_0x08)
+	add	GLOBAL_SPARE, 0x08, GLOBAL_SPARE
+	retl
+	 add	%o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
+ENTRY(U3_retl_o2_and_7_plus_GS)
+	and	%o2, 7, %o2
+	retl
+	 add	%o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_and_7_plus_GS)
+ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
+	add	GLOBAL_SPARE, 8, GLOBAL_SPARE
+	and	%o2, 7, %o2
+	retl
+	 add	%o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
+#endif
+
+	.align		64
+
+	/* The cheetah's flexible spine, oversized liver, enlarged heart,
+	 * slender muscular body, and claws make it the swiftest hunter
+	 * in Africa and the fastest animal on land.  Can reach speeds
+	 * of up to 2.4GB per second.
+	 */
+
+	.globl	FUNC_NAME
+	.type	FUNC_NAME,#function
+FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
+	srlx		%o2, 31, %g2
+	cmp		%g2, 0
+
+	/* software trap 5 "Range Check" if dst >= 0x80000000 */
+	tne		%xcc, 5
+	PREAMBLE
+	mov		%o0, %o4
+
+	/* if len == 0 */
+	cmp		%o2, 0
+	be,pn		%XCC, end_return
+	 or		%o0, %o1, %o3
+
+	/* if len < 16 */
+	cmp		%o2, 16
+	blu,a,pn	%XCC, less_than_16
+	 or		%o3, %o2, %o3
+
+	/* if len < 192 */
+	cmp		%o2, (3 * 64)
+	blu,pt		%XCC, less_than_192
+	 andcc		%o3, 0x7, %g0
+
+	/* Clobbers o5/g1/g2/g3/g7/icc/xcc.  We must preserve
+	 * o5 from here until we hit VISExitHalf.
+	 */
+	VISEntryHalf
+
+	/* Is 'dst' already aligned on an 64-byte boundary? */
+	andcc		%o0, 0x3f, %g2
+	be,pt		%XCC, 2f
+
+	/* Compute abs((dst & 0x3f) - 0x40) into %g2.  This is the number
+	 * of bytes to copy to make 'dst' 64-byte aligned.  We pre-
+	 * subtract this from 'len'.
+	 */
+	 sub		%o0, %o1, GLOBAL_SPARE
+	sub		%g2, 0x40, %g2
+	sub		%g0, %g2, %g2
+	sub		%o2, %g2, %o2
+	andcc		%g2, 0x7, %g1
+	be,pt		%icc, 2f
+	 and		%g2, 0x38, %g2
+
+1:	subcc		%g1, 0x1, %g1
+	EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
+	EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 0x1, %o1
+
+	add		%o1, GLOBAL_SPARE, %o0
+
+2:	cmp		%g2, 0x0
+	and		%o1, 0x7, %g1
+	be,pt		%icc, 3f
+	 alignaddr	%o1, %g0, %o1
+
+	EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
+	add		%o1, 0x8, %o1
+	subcc		%g2, 0x8, %g2
+	faligndata	%f4, %f6, %f0
+	EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
+	be,pn		%icc, 3f
+	 add		%o0, 0x8, %o0
+
+	EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
+	add		%o1, 0x8, %o1
+	subcc		%g2, 0x8, %g2
+	faligndata	%f6, %f4, %f2
+	EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
+	bne,pt		%icc, 1b
+	 add		%o0, 0x8, %o0
+
+3:	LOAD(prefetch, %o1 + 0x000, #one_read)
+	LOAD(prefetch, %o1 + 0x040, #one_read)
+	andn		%o2, (0x40 - 1), GLOBAL_SPARE
+	LOAD(prefetch, %o1 + 0x080, #one_read)
+	LOAD(prefetch, %o1 + 0x0c0, #one_read)
+	LOAD(prefetch, %o1 + 0x100, #one_read)
+	EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
+	LOAD(prefetch, %o1 + 0x140, #one_read)
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
+	LOAD(prefetch, %o1 + 0x180, #one_read)
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
+	LOAD(prefetch, %o1 + 0x1c0, #one_read)
+	faligndata	%f0, %f2, %f16
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
+	faligndata	%f2, %f4, %f18
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
+	faligndata	%f4, %f6, %f20
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
+	faligndata	%f6, %f8, %f22
+
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
+	faligndata	%f8, %f10, %f24
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
+	faligndata	%f10, %f12, %f26
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
+
+	subcc		GLOBAL_SPARE, 0x80, GLOBAL_SPARE
+	add		%o1, 0x40, %o1
+	bgu,pt		%XCC, 1f
+	 srl		GLOBAL_SPARE, 6, %o3
+	ba,pt		%xcc, 2f
+	 nop
+
+	.align		64
+1:
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	faligndata	%f12, %f14, %f28
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	faligndata	%f14, %f0, %f30
+	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f0, %f2, %f16
+	add		%o0, 0x40, %o0
+
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f2, %f4, %f18
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f4, %f6, %f20
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	subcc		%o3, 0x01, %o3
+	faligndata	%f6, %f8, %f22
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+
+	faligndata	%f8, %f10, %f24
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	LOAD(prefetch, %o1 + 0x1c0, #one_read)
+	faligndata	%f10, %f12, %f26
+	bg,pt		%XCC, 1b
+	 add		%o1, 0x40, %o1
+
+	/* Finally we copy the last full 64-byte block. */
+2:
+	EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	faligndata	%f12, %f14, %f28
+	EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	faligndata	%f14, %f0, %f30
+	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+	EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f0, %f2, %f16
+	EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f2, %f4, %f18
+	EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f4, %f6, %f20
+	EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f6, %f8, %f22
+	EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	faligndata	%f8, %f10, %f24
+	cmp		%g1, 0
+	be,pt		%XCC, 1f
+	 add		%o0, 0x40, %o0
+	EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+1:	faligndata	%f10, %f12, %f26
+	faligndata	%f12, %f14, %f28
+	faligndata	%f14, %f0, %f30
+	EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
+	add		%o0, 0x40, %o0
+	add		%o1, 0x40, %o1
+	membar		#Sync
+
+	/* Now we copy the (len modulo 64) bytes at the end.
+	 * Note how we borrow the %f0 loaded above.
+	 *
+	 * Also notice how this code is careful not to perform a
+	 * load past the end of the src buffer.
+	 */
+	and		%o2, 0x3f, %o2
+	andcc		%o2, 0x38, %g2
+	be,pn		%XCC, 2f
+	 subcc		%g2, 0x8, %g2
+	be,pn		%XCC, 2f
+	 cmp		%g1, 0
+
+	sub		%o2, %g2, %o2
+	be,a,pt		%XCC, 1f
+	 EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
+
+1:	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
+	add		%o1, 0x8, %o1
+	subcc		%g2, 0x8, %g2
+	faligndata	%f0, %f2, %f8
+	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
+	be,pn		%XCC, 2f
+	 add		%o0, 0x8, %o0
+	EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
+	add		%o1, 0x8, %o1
+	subcc		%g2, 0x8, %g2
+	faligndata	%f2, %f0, %f8
+	EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
+	bne,pn		%XCC, 1b
+	 add		%o0, 0x8, %o0
+
+	/* If anything is left, we copy it one byte at a time.
+	 * Note that %g1 is (src & 0x3) saved above before the
+	 * alignaddr was performed.
+	 */
+2:
+	cmp		%o2, 0
+	add		%o1, %g1, %o1
+	VISExitHalf
+	be,pn		%XCC, end_return
+	 sub		%o0, %o1, %o3
+
+	andcc		%g1, 0x7, %g0
+	bne,pn		%icc, 90f
+	 andcc		%o2, 0x8, %g0
+	be,pt		%icc, 1f
+	 nop
+	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
+	add		%o1, 0x8, %o1
+	sub		%o2, 8, %o2
+
+1:	andcc		%o2, 0x4, %g0
+	be,pt		%icc, 1f
+	 nop
+	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
+	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
+	add		%o1, 0x4, %o1
+	sub		%o2, 4, %o2
+
+1:	andcc		%o2, 0x2, %g0
+	be,pt		%icc, 1f
+	 nop
+	EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
+	EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
+	add		%o1, 0x2, %o1
+	sub		%o2, 2, %o2
+
+1:	andcc		%o2, 0x1, %g0
+	be,pt		%icc, end_return
+	 nop
+	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
+	ba,pt		%xcc, end_return
+	 EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
+
+	.align		64
+	/* 16 <= len < 192 */
+less_than_192:
+	bne,pn		%XCC, 75f
+	 sub		%o0, %o1, %o3
+
+72:
+	andn		%o2, 0xf, GLOBAL_SPARE
+	and		%o2, 0xf, %o2
+1:	subcc		GLOBAL_SPARE, 0x10, GLOBAL_SPARE
+	EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
+	EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
+	add		%o1, 0x8, %o1
+	EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 0x8, %o1
+73:	andcc		%o2, 0x8, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%o2, 0x8, %o2
+	EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
+	EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
+	add		%o1, 0x8, %o1
+1:	andcc		%o2, 0x4, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%o2, 0x4, %o2
+	EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
+	EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
+	add		%o1, 0x4, %o1
+1:	cmp		%o2, 0
+	be,pt		%XCC, end_return
+	 nop
+	ba,pt		%xcc, 90f
+	 nop
+
+75:
+	andcc		%o0, 0x7, %g1
+	sub		%g1, 0x8, %g1
+	be,pn		%icc, 2f
+	 sub		%g0, %g1, %g1
+	sub		%o2, %g1, %o2
+
+1:	subcc		%g1, 1, %g1
+	EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
+	EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
+	bgu,pt		%icc, 1b
+	 add		%o1, 1, %o1
+
+2:	add		%o1, %o3, %o0
+	andcc		%o1, 0x7, %g1
+	bne,pt		%icc, 8f
+	 sll		%g1, 3, %g1
+
+	cmp		%o2, 16
+	bgeu,pt		%icc, 72b
+	 nop
+	ba,a,pt		%xcc, 73b
+
+8:	mov		64, %o3
+	andn		%o1, 0x7, %o1
+	EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
+	sub		%o3, %g1, %o3
+	andn		%o2, 0x7, GLOBAL_SPARE
+	sllx		%g2, %g1, %g2
+1:	EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
+	subcc		GLOBAL_SPARE, 0x8, GLOBAL_SPARE
+	add		%o1, 0x8, %o1
+	srlx		%g3, %o3, %o5
+	or		%o5, %g2, %o5
+	EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
+	add		%o0, 0x8, %o0
+	bgu,pt		%icc, 1b
+	 sllx		%g3, %g1, %g2
+
+	srl		%g1, 3, %g1
+	andcc		%o2, 0x7, %o2
+	be,pn		%icc, end_return
+	 add		%o1, %g1, %o1
+	ba,pt		%xcc, 90f
+	 sub		%o0, %o1, %o3
+
+	.align		64
+	/* 0 < len < 16 */
+less_than_16:
+	andcc		%o3, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 sub		%o0, %o1, %o3
+
+1:
+	subcc		%o2, 4, %o2
+	EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
+	EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
+	bgu,pt		%XCC, 1b
+	 add		%o1, 4, %o1
+
+end_return:
+	retl
+	 mov		EX_RETVAL(%o4), %o0
+
+	.align		32
+90:
+	subcc		%o2, 1, %o2
+	EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
+	EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
+	bgu,pt		%XCC, 90b
+	 add		%o1, 1, %o1
+	retl
+	 mov		EX_RETVAL(%o4), %o0
+
+	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/U3patch.S b/arch/sparc/lib/U3patch.S
new file mode 100644
index 0000000..9a88808
--- /dev/null
+++ b/arch/sparc/lib/U3patch.S
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* U3patch.S: Patch Ultra-I routines with Ultra-III variant.
+ *
+ * Copyright (C) 2004 David S. Miller <davem@redhat.com>
+ */
+
+#define BRANCH_ALWAYS	0x10680000
+#define NOP		0x01000000
+#define ULTRA3_DO_PATCH(OLD, NEW)	\
+	sethi	%hi(NEW), %g1; \
+	or	%g1, %lo(NEW), %g1; \
+	sethi	%hi(OLD), %g2; \
+	or	%g2, %lo(OLD), %g2; \
+	sub	%g1, %g2, %g1; \
+	sethi	%hi(BRANCH_ALWAYS), %g3; \
+	sll	%g1, 11, %g1; \
+	srl	%g1, 11 + 2, %g1; \
+	or	%g3, %lo(BRANCH_ALWAYS), %g3; \
+	or	%g3, %g1, %g3; \
+	stw	%g3, [%g2]; \
+	sethi	%hi(NOP), %g3; \
+	or	%g3, %lo(NOP), %g3; \
+	stw	%g3, [%g2 + 0x4]; \
+	flush	%g2;
+
+	.globl	cheetah_patch_copyops
+	.type	cheetah_patch_copyops,#function
+cheetah_patch_copyops:
+	ULTRA3_DO_PATCH(memcpy, U3memcpy)
+	ULTRA3_DO_PATCH(raw_copy_from_user, U3copy_from_user)
+	ULTRA3_DO_PATCH(raw_copy_to_user, U3copy_to_user)
+	retl
+	 nop
+	.size	cheetah_patch_copyops,.-cheetah_patch_copyops
diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S
new file mode 100644
index 0000000..9c8eb20
--- /dev/null
+++ b/arch/sparc/lib/VISsave.S
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * VISsave.S: Code for saving FPU register state for
+ *            VIS routines. One should not call this directly,
+ *            but use macros provided in <asm/visasm.h>.
+ *
+ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/visasm.h>
+#include <asm/thread_info.h>
+#include <asm/export.h>
+
+	/* On entry: %o5=current FPRS value, %g7 is callers address */
+	/* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
+
+	/* Nothing special need be done here to handle pre-emption, this
+	 * FPU save/restore mechanism is already preemption safe.
+	 */
+	.text
+	.align		32
+ENTRY(VISenter)
+	ldub		[%g6 + TI_FPDEPTH], %g1
+	brnz,a,pn	%g1, 1f
+	 cmp		%g1, 1
+	stb		%g0, [%g6 + TI_FPSAVED]
+	stx		%fsr, [%g6 + TI_XFSR]
+9:	jmpl		%g7 + %g0, %g0
+	 nop
+1:	bne,pn		%icc, 2f
+
+	 srl		%g1, 1, %g1
+vis1:	ldub		[%g6 + TI_FPSAVED], %g3
+	stx		%fsr, [%g6 + TI_XFSR]
+	or		%g3, %o5, %g3
+	stb		%g3, [%g6 + TI_FPSAVED]
+	rd		%gsr, %g3
+	clr		%g1
+	ba,pt		%xcc, 3f
+
+	 stx		%g3, [%g6 + TI_GSR]
+2:	add		%g6, %g1, %g3
+	mov		FPRS_DU | FPRS_DL | FPRS_FEF, %o5
+	sll		%g1, 3, %g1
+	stb		%o5, [%g3 + TI_FPSAVED]
+	rd		%gsr, %g2
+	add		%g6, %g1, %g3
+	stx		%g2, [%g3 + TI_GSR]
+
+	add		%g6, %g1, %g2
+	stx		%fsr, [%g2 + TI_XFSR]
+	sll		%g1, 5, %g1
+3:	andcc		%o5, FPRS_DL|FPRS_DU, %g0
+	be,pn		%icc, 9b
+	 add		%g6, TI_FPREGS, %g2
+	andcc		%o5, FPRS_DL, %g0
+
+	be,pn		%icc, 4f
+	 add		%g6, TI_FPREGS+0x40, %g3
+	membar		#Sync
+	stda		%f0, [%g2 + %g1] ASI_BLK_P
+	stda		%f16, [%g3 + %g1] ASI_BLK_P
+	membar		#Sync
+	andcc		%o5, FPRS_DU, %g0
+	be,pn		%icc, 5f
+4:	 add		%g1, 128, %g1
+	membar		#Sync
+	stda		%f32, [%g2 + %g1] ASI_BLK_P
+
+	stda		%f48, [%g3 + %g1] ASI_BLK_P
+5:	membar		#Sync
+	ba,pt		%xcc, 80f
+	 nop
+
+	.align		32
+80:	jmpl		%g7 + %g0, %g0
+	 nop
+ENDPROC(VISenter)
+EXPORT_SYMBOL(VISenter)
diff --git a/arch/sparc/lib/ashldi3.S b/arch/sparc/lib/ashldi3.S
new file mode 100644
index 0000000..2d72de8
--- /dev/null
+++ b/arch/sparc/lib/ashldi3.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ashldi3.S:	GCC emits these for certain drivers playing
+ *		with long longs.
+ *
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+ENTRY(__ashldi3)
+	cmp	%o2, 0
+	be	9f
+	 mov	0x20, %g2
+
+	sub	%g2, %o2, %g2
+	cmp	%g2, 0
+	bg	7f
+	 sll	%o0, %o2, %g3
+
+	neg	%g2
+	clr	%o5
+	b	8f
+	 sll	%o1, %g2, %o4
+7:
+	srl	%o1, %g2, %g2
+	sll	%o1, %o2, %o5
+	or	%g3, %g2, %o4
+8:
+	mov	%o4, %o0
+	mov	%o5, %o1
+9:
+	retl
+	 nop
+ENDPROC(__ashldi3)
+EXPORT_SYMBOL(__ashldi3)
diff --git a/arch/sparc/lib/ashrdi3.S b/arch/sparc/lib/ashrdi3.S
new file mode 100644
index 0000000..05dfda9
--- /dev/null
+++ b/arch/sparc/lib/ashrdi3.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ashrdi3.S:	The filesystem code creates all kinds of references to
+ *              this little routine on the sparc with gcc.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+ENTRY(__ashrdi3)
+	tst	%o2
+	be	3f
+	 or	%g0, 32, %g2
+
+	sub	%g2, %o2, %g2
+
+	tst	%g2
+	bg	1f
+	 sra	%o0, %o2, %o4
+
+	sra	%o0, 31, %o4
+	sub	%g0, %g2, %g2
+	ba	2f
+	 sra	%o0, %g2, %o5
+
+1:
+	sll	%o0, %g2, %g3
+	srl	%o1, %o2, %g2
+	or	%g2, %g3, %o5
+2:
+	or	%g0, %o4, %o0
+	or	%g0, %o5, %o1
+3:
+	jmpl	%o7 + 8, %g0
+	 nop
+ENDPROC(__ashrdi3)
+EXPORT_SYMBOL(__ashrdi3)
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
new file mode 100644
index 0000000..281fa63
--- /dev/null
+++ b/arch/sparc/lib/atomic32.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * atomic32.c: 32-bit atomic_t implementation
+ *
+ * Copyright (C) 2004 Keith M Wesolowski
+ * Copyright (C) 2007 Kyle McMartin
+ * 
+ * Based on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf
+ */
+
+#include <linux/atomic.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+
+#ifdef CONFIG_SMP
+#define ATOMIC_HASH_SIZE	4
+#define ATOMIC_HASH(a)	(&__atomic_hash[(((unsigned long)a)>>8) & (ATOMIC_HASH_SIZE-1)])
+
+spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] = {
+	[0 ... (ATOMIC_HASH_SIZE-1)] = __SPIN_LOCK_UNLOCKED(__atomic_hash)
+};
+
+#else /* SMP */
+
+static DEFINE_SPINLOCK(dummy);
+#define ATOMIC_HASH_SIZE	1
+#define ATOMIC_HASH(a)		(&dummy)
+
+#endif /* SMP */
+
+#define ATOMIC_FETCH_OP(op, c_op)					\
+int atomic_fetch_##op(int i, atomic_t *v)				\
+{									\
+	int ret;							\
+	unsigned long flags;						\
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
+									\
+	ret = v->counter;						\
+	v->counter c_op i;						\
+									\
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
+	return ret;							\
+}									\
+EXPORT_SYMBOL(atomic_fetch_##op);
+
+#define ATOMIC_OP_RETURN(op, c_op)					\
+int atomic_##op##_return(int i, atomic_t *v)				\
+{									\
+	int ret;							\
+	unsigned long flags;						\
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);			\
+									\
+	ret = (v->counter c_op i);					\
+									\
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
+	return ret;							\
+}									\
+EXPORT_SYMBOL(atomic_##op##_return);
+
+ATOMIC_OP_RETURN(add, +=)
+
+ATOMIC_FETCH_OP(add, +=)
+ATOMIC_FETCH_OP(and, &=)
+ATOMIC_FETCH_OP(or, |=)
+ATOMIC_FETCH_OP(xor, ^=)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+
+int atomic_xchg(atomic_t *v, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	ret = v->counter;
+	v->counter = new;
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+	return ret;
+}
+EXPORT_SYMBOL(atomic_xchg);
+
+int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	ret = v->counter;
+	if (likely(ret == old))
+		v->counter = new;
+
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+	return ret;
+}
+EXPORT_SYMBOL(atomic_cmpxchg);
+
+int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	ret = v->counter;
+	if (ret != u)
+		v->counter += a;
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+	return ret;
+}
+EXPORT_SYMBOL(atomic_fetch_add_unless);
+
+/* Atomic operations are already serializing */
+void atomic_set(atomic_t *v, int i)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(v), flags);
+	v->counter = i;
+	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
+}
+EXPORT_SYMBOL(atomic_set);
+
+unsigned long ___set_bit(unsigned long *addr, unsigned long mask)
+{
+	unsigned long old, flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(addr), flags);
+	old = *addr;
+	*addr = old | mask;
+	spin_unlock_irqrestore(ATOMIC_HASH(addr), flags);
+
+	return old & mask;
+}
+EXPORT_SYMBOL(___set_bit);
+
+unsigned long ___clear_bit(unsigned long *addr, unsigned long mask)
+{
+	unsigned long old, flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(addr), flags);
+	old = *addr;
+	*addr = old & ~mask;
+	spin_unlock_irqrestore(ATOMIC_HASH(addr), flags);
+
+	return old & mask;
+}
+EXPORT_SYMBOL(___clear_bit);
+
+unsigned long ___change_bit(unsigned long *addr, unsigned long mask)
+{
+	unsigned long old, flags;
+
+	spin_lock_irqsave(ATOMIC_HASH(addr), flags);
+	old = *addr;
+	*addr = old ^ mask;
+	spin_unlock_irqrestore(ATOMIC_HASH(addr), flags);
+
+	return old & mask;
+}
+EXPORT_SYMBOL(___change_bit);
+
+unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new)
+{
+	unsigned long flags;
+	u32 prev;
+
+	spin_lock_irqsave(ATOMIC_HASH(ptr), flags);
+	if ((prev = *ptr) == old)
+		*ptr = new;
+	spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags);
+
+	return (unsigned long)prev;
+}
+EXPORT_SYMBOL(__cmpxchg_u32);
+
+u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new)
+{
+	unsigned long flags;
+	u64 prev;
+
+	spin_lock_irqsave(ATOMIC_HASH(ptr), flags);
+	if ((prev = *ptr) == old)
+		*ptr = new;
+	spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags);
+
+	return prev;
+}
+EXPORT_SYMBOL(__cmpxchg_u64);
+
+unsigned long __xchg_u32(volatile u32 *ptr, u32 new)
+{
+	unsigned long flags;
+	u32 prev;
+
+	spin_lock_irqsave(ATOMIC_HASH(ptr), flags);
+	prev = *ptr;
+	*ptr = new;
+	spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags);
+
+	return (unsigned long)prev;
+}
+EXPORT_SYMBOL(__xchg_u32);
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
new file mode 100644
index 0000000..456b65a
--- /dev/null
+++ b/arch/sparc/lib/atomic_64.S
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* atomic.S: These things are too big to do inline.
+ *
+ * Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/linkage.h>
+#include <asm/asi.h>
+#include <asm/backoff.h>
+#include <asm/export.h>
+
+	.text
+
+	/* Three versions of the atomic routines, one that
+	 * does not return a value and does not perform
+	 * memory barriers, and a two which return
+	 * a value, the new and old value resp. and does the
+	 * barriers.
+	 */
+
+#define ATOMIC_OP(op)							\
+ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
+	BACKOFF_SETUP(%o2);						\
+1:	lduw	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	cas	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 nop;								\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic_##op);							\
+EXPORT_SYMBOL(atomic_##op);
+
+#define ATOMIC_OP_RETURN(op)						\
+ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	lduw	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	cas	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
+	 op	%g1, %o0, %g1;						\
+	retl;								\
+	 sra	%g1, 0, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic_##op##_return);						\
+EXPORT_SYMBOL(atomic_##op##_return);
+
+#define ATOMIC_FETCH_OP(op)						\
+ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	lduw	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	cas	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%icc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 sra	%g1, 0, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic_fetch_##op);						\
+EXPORT_SYMBOL(atomic_fetch_##op);
+
+ATOMIC_OP(add)
+ATOMIC_OP_RETURN(add)
+ATOMIC_FETCH_OP(add)
+
+ATOMIC_OP(sub)
+ATOMIC_OP_RETURN(sub)
+ATOMIC_FETCH_OP(sub)
+
+ATOMIC_OP(and)
+ATOMIC_FETCH_OP(and)
+
+ATOMIC_OP(or)
+ATOMIC_FETCH_OP(or)
+
+ATOMIC_OP(xor)
+ATOMIC_FETCH_OP(xor)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define ATOMIC64_OP(op)							\
+ENTRY(atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
+	BACKOFF_SETUP(%o2);						\
+1:	ldx	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	casx	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 nop;								\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic64_##op);							\
+EXPORT_SYMBOL(atomic64_##op);
+
+#define ATOMIC64_OP_RETURN(op)						\
+ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	ldx	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	casx	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 op	%g1, %o0, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic64_##op##_return);					\
+EXPORT_SYMBOL(atomic64_##op##_return);
+
+#define ATOMIC64_FETCH_OP(op)						\
+ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+	BACKOFF_SETUP(%o2);						\
+1:	ldx	[%o1], %g1;						\
+	op	%g1, %o0, %g7;						\
+	casx	[%o1], %g1, %g7;					\
+	cmp	%g1, %g7;						\
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b);				\
+	 nop;								\
+	retl;								\
+	 mov	%g1, %o0;						\
+2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
+ENDPROC(atomic64_fetch_##op);						\
+EXPORT_SYMBOL(atomic64_fetch_##op);
+
+ATOMIC64_OP(add)
+ATOMIC64_OP_RETURN(add)
+ATOMIC64_FETCH_OP(add)
+
+ATOMIC64_OP(sub)
+ATOMIC64_OP_RETURN(sub)
+ATOMIC64_FETCH_OP(sub)
+
+ATOMIC64_OP(and)
+ATOMIC64_FETCH_OP(and)
+
+ATOMIC64_OP(or)
+ATOMIC64_FETCH_OP(or)
+
+ATOMIC64_OP(xor)
+ATOMIC64_FETCH_OP(xor)
+
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
+	BACKOFF_SETUP(%o2)
+1:	ldx	[%o0], %g1
+	brlez,pn %g1, 3f
+	 sub	%g1, 1, %g7
+	casx	[%o0], %g1, %g7
+	cmp	%g1, %g7
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 nop
+3:	retl
+	 sub	%g1, 1, %o0
+2:	BACKOFF_SPIN(%o2, %o3, 1b)
+ENDPROC(atomic64_dec_if_positive)
+EXPORT_SYMBOL(atomic64_dec_if_positive)
diff --git a/arch/sparc/lib/bitext.c b/arch/sparc/lib/bitext.c
new file mode 100644
index 0000000..32a5c1d
--- /dev/null
+++ b/arch/sparc/lib/bitext.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bitext.c: kernel little helper (of bit shuffling variety).
+ *
+ * Copyright (C) 2002 Pete Zaitcev <zaitcev@yahoo.com>
+ *
+ * The algorithm to search a zero bit string is geared towards its application.
+ * We expect a couple of fixed sizes of requests, so a rotating counter, reset
+ * by align size, should provide fast enough search while maintaining low
+ * fragmentation.
+ */
+
+#include <linux/string.h>
+#include <linux/bitmap.h>
+
+#include <asm/bitext.h>
+
+/**
+ * bit_map_string_get - find and set a bit string in bit map.
+ * @t: the bit map.
+ * @len: requested string length
+ * @align: requested alignment
+ *
+ * Returns offset in the map or -1 if out of space.
+ *
+ * Not safe to call from an interrupt (uses spin_lock).
+ */
+int bit_map_string_get(struct bit_map *t, int len, int align)
+{
+	int offset, count;	/* siamese twins */
+	int off_new;
+	int align1;
+	int i, color;
+
+	if (t->num_colors) {
+		/* align is overloaded to be the page color */
+		color = align;
+		align = t->num_colors;
+	} else {
+		color = 0;
+		if (align == 0)
+			align = 1;
+	}
+	align1 = align - 1;
+	if ((align & align1) != 0)
+		BUG();
+	if (align < 0 || align >= t->size)
+		BUG();
+	if (len <= 0 || len > t->size)
+		BUG();
+	color &= align1;
+
+	spin_lock(&t->lock);
+	if (len < t->last_size)
+		offset = t->first_free;
+	else
+		offset = t->last_off & ~align1;
+	count = 0;
+	for (;;) {
+		off_new = find_next_zero_bit(t->map, t->size, offset);
+		off_new = ((off_new + align1) & ~align1) + color;
+		count += off_new - offset;
+		offset = off_new;
+		if (offset >= t->size)
+			offset = 0;
+		if (count + len > t->size) {
+			spin_unlock(&t->lock);
+/* P3 */ printk(KERN_ERR
+  "bitmap out: size %d used %d off %d len %d align %d count %d\n",
+  t->size, t->used, offset, len, align, count);
+			return -1;
+		}
+
+		if (offset + len > t->size) {
+			count += t->size - offset;
+			offset = 0;
+			continue;
+		}
+
+		i = 0;
+		while (test_bit(offset + i, t->map) == 0) {
+			i++;
+			if (i == len) {
+				bitmap_set(t->map, offset, len);
+				if (offset == t->first_free)
+					t->first_free = find_next_zero_bit
+							(t->map, t->size,
+							 t->first_free + len);
+				if ((t->last_off = offset + len) >= t->size)
+					t->last_off = 0;
+				t->used += len;
+				t->last_size = len;
+				spin_unlock(&t->lock);
+				return offset;
+			}
+		}
+		count += i + 1;
+		if ((offset += i + 1) >= t->size)
+			offset = 0;
+	}
+}
+
+void bit_map_clear(struct bit_map *t, int offset, int len)
+{
+	int i;
+
+	if (t->used < len)
+		BUG();		/* Much too late to do any good, but alas... */
+	spin_lock(&t->lock);
+	for (i = 0; i < len; i++) {
+		if (test_bit(offset + i, t->map) == 0)
+			BUG();
+		__clear_bit(offset + i, t->map);
+	}
+	if (offset < t->first_free)
+		t->first_free = offset;
+	t->used -= len;
+	spin_unlock(&t->lock);
+}
+
+void bit_map_init(struct bit_map *t, unsigned long *map, int size)
+{
+	bitmap_zero(map, size);
+	memset(t, 0, sizeof *t);
+	spin_lock_init(&t->lock);
+	t->map = map;
+	t->size = size;
+}
diff --git a/arch/sparc/lib/bitops.S b/arch/sparc/lib/bitops.S
new file mode 100644
index 0000000..9d647f9
--- /dev/null
+++ b/arch/sparc/lib/bitops.S
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* bitops.S: Sparc64 atomic bit operations.
+ *
+ * Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/linkage.h>
+#include <asm/asi.h>
+#include <asm/backoff.h>
+#include <asm/export.h>
+
+	.text
+
+ENTRY(test_and_set_bit)	/* %o0=nr, %o1=addr */
+	BACKOFF_SETUP(%o3)
+	srlx	%o0, 6, %g1
+	mov	1, %o2
+	sllx	%g1, 3, %g3
+	and	%o0, 63, %g2
+	sllx	%o2, %g2, %o2
+	add	%o1, %g3, %o1
+1:	ldx	[%o1], %g7
+	or	%g7, %o2, %g1
+	casx	[%o1], %g7, %g1
+	cmp	%g7, %g1
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 and	%g7, %o2, %g2
+	clr	%o0
+	movrne	%g2, 1, %o0
+	retl
+	 nop
+2:	BACKOFF_SPIN(%o3, %o4, 1b)
+ENDPROC(test_and_set_bit)
+EXPORT_SYMBOL(test_and_set_bit)
+
+ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */
+	BACKOFF_SETUP(%o3)
+	srlx	%o0, 6, %g1
+	mov	1, %o2
+	sllx	%g1, 3, %g3
+	and	%o0, 63, %g2
+	sllx	%o2, %g2, %o2
+	add	%o1, %g3, %o1
+1:	ldx	[%o1], %g7
+	andn	%g7, %o2, %g1
+	casx	[%o1], %g7, %g1
+	cmp	%g7, %g1
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 and	%g7, %o2, %g2
+	clr	%o0
+	movrne	%g2, 1, %o0
+	retl
+	 nop
+2:	BACKOFF_SPIN(%o3, %o4, 1b)
+ENDPROC(test_and_clear_bit)
+EXPORT_SYMBOL(test_and_clear_bit)
+
+ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */
+	BACKOFF_SETUP(%o3)
+	srlx	%o0, 6, %g1
+	mov	1, %o2
+	sllx	%g1, 3, %g3
+	and	%o0, 63, %g2
+	sllx	%o2, %g2, %o2
+	add	%o1, %g3, %o1
+1:	ldx	[%o1], %g7
+	xor	%g7, %o2, %g1
+	casx	[%o1], %g7, %g1
+	cmp	%g7, %g1
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 and	%g7, %o2, %g2
+	clr	%o0
+	movrne	%g2, 1, %o0
+	retl
+	 nop
+2:	BACKOFF_SPIN(%o3, %o4, 1b)
+ENDPROC(test_and_change_bit)
+EXPORT_SYMBOL(test_and_change_bit)
+
+ENTRY(set_bit) /* %o0=nr, %o1=addr */
+	BACKOFF_SETUP(%o3)
+	srlx	%o0, 6, %g1
+	mov	1, %o2
+	sllx	%g1, 3, %g3
+	and	%o0, 63, %g2
+	sllx	%o2, %g2, %o2
+	add	%o1, %g3, %o1
+1:	ldx	[%o1], %g7
+	or	%g7, %o2, %g1
+	casx	[%o1], %g7, %g1
+	cmp	%g7, %g1
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 nop
+	retl
+	 nop
+2:	BACKOFF_SPIN(%o3, %o4, 1b)
+ENDPROC(set_bit)
+EXPORT_SYMBOL(set_bit)
+
+ENTRY(clear_bit) /* %o0=nr, %o1=addr */
+	BACKOFF_SETUP(%o3)
+	srlx	%o0, 6, %g1
+	mov	1, %o2
+	sllx	%g1, 3, %g3
+	and	%o0, 63, %g2
+	sllx	%o2, %g2, %o2
+	add	%o1, %g3, %o1
+1:	ldx	[%o1], %g7
+	andn	%g7, %o2, %g1
+	casx	[%o1], %g7, %g1
+	cmp	%g7, %g1
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 nop
+	retl
+	 nop
+2:	BACKOFF_SPIN(%o3, %o4, 1b)
+ENDPROC(clear_bit)
+EXPORT_SYMBOL(clear_bit)
+
+ENTRY(change_bit) /* %o0=nr, %o1=addr */
+	BACKOFF_SETUP(%o3)
+	srlx	%o0, 6, %g1
+	mov	1, %o2
+	sllx	%g1, 3, %g3
+	and	%o0, 63, %g2
+	sllx	%o2, %g2, %o2
+	add	%o1, %g3, %o1
+1:	ldx	[%o1], %g7
+	xor	%g7, %o2, %g1
+	casx	[%o1], %g7, %g1
+	cmp	%g7, %g1
+	bne,pn	%xcc, BACKOFF_LABEL(2f, 1b)
+	 nop
+	retl
+	 nop
+2:	BACKOFF_SPIN(%o3, %o4, 1b)
+ENDPROC(change_bit)
+EXPORT_SYMBOL(change_bit)
diff --git a/arch/sparc/lib/blockops.S b/arch/sparc/lib/blockops.S
new file mode 100644
index 0000000..76ddd1f
--- /dev/null
+++ b/arch/sparc/lib/blockops.S
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * blockops.S: Common block zero optimized routines.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/export.h>
+
+	/* Zero out 64 bytes of memory at (buf + offset).
+	 * Assumes %g1 contains zero.
+	 */
+#define BLAST_BLOCK(buf, offset) \
+	std	%g0, [buf + offset + 0x38]; \
+	std	%g0, [buf + offset + 0x30]; \
+	std	%g0, [buf + offset + 0x28]; \
+	std	%g0, [buf + offset + 0x20]; \
+	std	%g0, [buf + offset + 0x18]; \
+	std	%g0, [buf + offset + 0x10]; \
+	std	%g0, [buf + offset + 0x08]; \
+	std	%g0, [buf + offset + 0x00];
+
+	/* Copy 32 bytes of memory at (src + offset) to
+	 * (dst + offset).
+	 */
+#define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
+	ldd	[src + offset + 0x18], t0; \
+	ldd	[src + offset + 0x10], t2; \
+	ldd	[src + offset + 0x08], t4; \
+	ldd	[src + offset + 0x00], t6; \
+	std	t0, [dst + offset + 0x18]; \
+	std	t2, [dst + offset + 0x10]; \
+	std	t4, [dst + offset + 0x08]; \
+	std	t6, [dst + offset + 0x00];
+
+	/* Profiling evidence indicates that memset() is
+	 * commonly called for blocks of size PAGE_SIZE,
+	 * and (2 * PAGE_SIZE) (for kernel stacks)
+	 * and with a second arg of zero.  We assume in
+	 * all of these cases that the buffer is aligned
+	 * on at least an 8 byte boundary.
+	 *
+	 * Therefore we special case them to make them
+	 * as fast as possible.
+	 */
+
+	.text
+ENTRY(bzero_1page)
+/* NOTE: If you change the number of insns of this routine, please check
+ * arch/sparc/mm/hypersparc.S */
+	/* %o0 = buf */
+	or	%g0, %g0, %g1
+	or	%o0, %g0, %o1
+	or	%g0, (PAGE_SIZE >> 8), %g2
+1:
+	BLAST_BLOCK(%o0, 0x00)
+	BLAST_BLOCK(%o0, 0x40)
+	BLAST_BLOCK(%o0, 0x80)
+	BLAST_BLOCK(%o0, 0xc0)
+	subcc	%g2, 1, %g2
+	bne	1b
+	 add	%o0, 0x100, %o0
+
+	retl
+	 nop
+ENDPROC(bzero_1page)
+EXPORT_SYMBOL(bzero_1page)
+
+ENTRY(__copy_1page)
+/* NOTE: If you change the number of insns of this routine, please check
+ * arch/sparc/mm/hypersparc.S */
+	/* %o0 = dst, %o1 = src */
+	or	%g0, (PAGE_SIZE >> 8), %g1
+1:
+	MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5, %g2, %g3, %g4, %g5)
+	subcc	%g1, 1, %g1
+	add	%o0, 0x100, %o0
+	bne	1b
+	 add	%o1, 0x100, %o1
+
+	retl
+	 nop
+ENDPROC(__copy_1page)
+EXPORT_SYMBOL(__copy_1page)
diff --git a/arch/sparc/lib/bzero.S b/arch/sparc/lib/bzero.S
new file mode 100644
index 0000000..87fec4c
--- /dev/null
+++ b/arch/sparc/lib/bzero.S
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* bzero.S: Simple prefetching memset, bzero, and clear_user
+ *          implementations.
+ *
+ * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+
+ENTRY(memset) /* %o0=buf, %o1=pat, %o2=len */
+	and		%o1, 0xff, %o3
+	mov		%o2, %o1
+	sllx		%o3, 8, %g1
+	or		%g1, %o3, %o2
+	sllx		%o2, 16, %g1
+	or		%g1, %o2, %o2
+	sllx		%o2, 32, %g1
+	ba,pt		%xcc, 1f
+	 or		%g1, %o2, %o2
+
+ENTRY(__bzero) /* %o0=buf, %o1=len */
+	clr		%o2
+1:	mov		%o0, %o3
+	brz,pn		%o1, __bzero_done
+	 cmp		%o1, 16
+	bl,pn		%icc, __bzero_tiny
+	 prefetch	[%o0 + 0x000], #n_writes
+	andcc		%o0, 0x3, %g0
+	be,pt		%icc, 2f
+1:	 stb		%o2, [%o0 + 0x00]
+	add		%o0, 1, %o0
+	andcc		%o0, 0x3, %g0
+	bne,pn		%icc, 1b
+	 sub		%o1, 1, %o1
+2:	andcc		%o0, 0x7, %g0
+	be,pt		%icc, 3f
+	 stw		%o2, [%o0 + 0x00]
+	sub		%o1, 4, %o1
+	add		%o0, 4, %o0
+3:	and		%o1, 0x38, %g1
+	cmp		%o1, 0x40
+	andn		%o1, 0x3f, %o4
+	bl,pn		%icc, 5f
+	 and		%o1, 0x7, %o1
+	prefetch	[%o0 + 0x040], #n_writes
+	prefetch	[%o0 + 0x080], #n_writes
+	prefetch	[%o0 + 0x0c0], #n_writes
+	prefetch	[%o0 + 0x100], #n_writes
+	prefetch	[%o0 + 0x140], #n_writes
+4:	prefetch	[%o0 + 0x180], #n_writes
+	stx		%o2, [%o0 + 0x00]
+	stx		%o2, [%o0 + 0x08]
+	stx		%o2, [%o0 + 0x10]
+	stx		%o2, [%o0 + 0x18]
+	stx		%o2, [%o0 + 0x20]
+	stx		%o2, [%o0 + 0x28]
+	stx		%o2, [%o0 + 0x30]
+	stx		%o2, [%o0 + 0x38]
+	subcc		%o4, 0x40, %o4
+	bne,pt		%icc, 4b
+	 add		%o0, 0x40, %o0
+	brz,pn		%g1, 6f
+	 nop
+5:	stx		%o2, [%o0 + 0x00]
+	subcc		%g1, 8, %g1
+	bne,pt		%icc, 5b
+	 add		%o0, 0x8, %o0
+6:	brz,pt		%o1, __bzero_done
+	 nop
+__bzero_tiny:
+1:	stb		%o2, [%o0 + 0x00]
+	subcc		%o1, 1, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+__bzero_done:
+	retl
+	 mov		%o3, %o0
+ENDPROC(__bzero)
+ENDPROC(memset)
+EXPORT_SYMBOL(__bzero)
+EXPORT_SYMBOL(memset)
+
+#define EX_ST(x,y)		\
+98:	x,y;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, __retl_o1;	\
+	.text;			\
+	.align 4;
+
+ENTRY(__clear_user) /* %o0=buf, %o1=len */
+	brz,pn		%o1, __clear_user_done
+	 cmp		%o1, 16
+	bl,pn		%icc, __clear_user_tiny
+	 EX_ST(prefetcha [%o0 + 0x00] %asi, #n_writes)
+	andcc		%o0, 0x3, %g0
+	be,pt		%icc, 2f
+1:	 EX_ST(stba	%g0, [%o0 + 0x00] %asi)
+	add		%o0, 1, %o0
+	andcc		%o0, 0x3, %g0
+	bne,pn		%icc, 1b
+	 sub		%o1, 1, %o1
+2:	andcc		%o0, 0x7, %g0
+	be,pt		%icc, 3f
+	 EX_ST(stwa	%g0, [%o0 + 0x00] %asi)
+	sub		%o1, 4, %o1
+	add		%o0, 4, %o0
+3:	and		%o1, 0x38, %g1
+	cmp		%o1, 0x40
+	andn		%o1, 0x3f, %o4
+	bl,pn		%icc, 5f
+	 and		%o1, 0x7, %o1
+	EX_ST(prefetcha	[%o0 + 0x040] %asi, #n_writes)
+	EX_ST(prefetcha	[%o0 + 0x080] %asi, #n_writes)
+	EX_ST(prefetcha	[%o0 + 0x0c0] %asi, #n_writes)
+	EX_ST(prefetcha	[%o0 + 0x100] %asi, #n_writes)
+	EX_ST(prefetcha	[%o0 + 0x140] %asi, #n_writes)
+4:	EX_ST(prefetcha	[%o0 + 0x180] %asi, #n_writes)
+	EX_ST(stxa	%g0, [%o0 + 0x00] %asi)
+	EX_ST(stxa	%g0, [%o0 + 0x08] %asi)
+	EX_ST(stxa	%g0, [%o0 + 0x10] %asi)
+	EX_ST(stxa	%g0, [%o0 + 0x18] %asi)
+	EX_ST(stxa	%g0, [%o0 + 0x20] %asi)
+	EX_ST(stxa	%g0, [%o0 + 0x28] %asi)
+	EX_ST(stxa	%g0, [%o0 + 0x30] %asi)
+	EX_ST(stxa	%g0, [%o0 + 0x38] %asi)
+	subcc		%o4, 0x40, %o4
+	bne,pt		%icc, 4b
+	 add		%o0, 0x40, %o0
+	brz,pn		%g1, 6f
+	 nop
+5:	EX_ST(stxa	%g0, [%o0 + 0x00] %asi)
+	subcc		%g1, 8, %g1
+	bne,pt		%icc, 5b
+	 add		%o0, 0x8, %o0
+6:	brz,pt		%o1, __clear_user_done
+	 nop
+__clear_user_tiny:
+1:	EX_ST(stba	%g0, [%o0 + 0x00] %asi)
+	subcc		%o1, 1, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 1, %o0
+__clear_user_done:
+	retl
+	 clr		%o0
+ENDPROC(__clear_user)
+EXPORT_SYMBOL(__clear_user)
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
new file mode 100644
index 0000000..6a5469c
--- /dev/null
+++ b/arch/sparc/lib/checksum_32.S
@@ -0,0 +1,593 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* checksum.S: Sparc optimized checksum code.
+ *
+ *  Copyright(C) 1995 Linus Torvalds
+ *  Copyright(C) 1995 Miguel de Icaza
+ *  Copyright(C) 1996 David S. Miller
+ *  Copyright(C) 1997 Jakub Jelinek
+ *
+ * derived from:
+ *	Linux/Alpha checksum c-code
+ *      Linux/ix86 inline checksum assembly
+ *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
+ *	David Mosberger-Tang for optimized reference c-code
+ *	BSD4.4 portable checksum routine
+ */
+
+#include <asm/errno.h>
+#include <asm/export.h>
+
+#define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5)	\
+	ldd	[buf + offset + 0x00], t0;			\
+	ldd	[buf + offset + 0x08], t2;			\
+	addxcc	t0, sum, sum;					\
+	addxcc	t1, sum, sum;					\
+	ldd	[buf + offset + 0x10], t4;			\
+	addxcc	t2, sum, sum;					\
+	addxcc	t3, sum, sum;					\
+	ldd	[buf + offset + 0x18], t0;			\
+	addxcc	t4, sum, sum;					\
+	addxcc	t5, sum, sum;					\
+	addxcc	t0, sum, sum;					\
+	addxcc	t1, sum, sum;
+
+#define CSUM_LASTCHUNK(buf, offset, sum, t0, t1, t2, t3)	\
+	ldd	[buf - offset - 0x08], t0;			\
+	ldd	[buf - offset - 0x00], t2;			\
+	addxcc	t0, sum, sum;					\
+	addxcc	t1, sum, sum;					\
+	addxcc	t2, sum, sum;					\
+	addxcc	t3, sum, sum;
+
+	/* Do end cruft out of band to get better cache patterns. */
+csum_partial_end_cruft:
+	be	1f				! caller asks %o1 & 0x8
+	 andcc	%o1, 4, %g0			! nope, check for word remaining
+	ldd	[%o0], %g2			! load two
+	addcc	%g2, %o2, %o2			! add first word to sum
+	addxcc	%g3, %o2, %o2			! add second word as well
+	add	%o0, 8, %o0			! advance buf ptr
+	addx	%g0, %o2, %o2			! add in final carry
+	andcc	%o1, 4, %g0			! check again for word remaining
+1:	be	1f				! nope, skip this code
+	 andcc	%o1, 3, %o1			! check for trailing bytes
+	ld	[%o0], %g2			! load it
+	addcc	%g2, %o2, %o2			! add to sum
+	add	%o0, 4, %o0			! advance buf ptr
+	addx	%g0, %o2, %o2			! add in final carry
+	andcc	%o1, 3, %g0			! check again for trailing bytes
+1:	be	1f				! no trailing bytes, return
+	 addcc	%o1, -1, %g0			! only one byte remains?
+	bne	2f				! at least two bytes more
+	 subcc	%o1, 2, %o1			! only two bytes more?
+	b	4f				! only one byte remains
+	 or	%g0, %g0, %o4			! clear fake hword value
+2:	lduh	[%o0], %o4			! get hword
+	be	6f				! jmp if only hword remains
+	 add	%o0, 2, %o0			! advance buf ptr either way
+	sll	%o4, 16, %o4			! create upper hword
+4:	ldub	[%o0], %o5			! get final byte
+	sll	%o5, 8, %o5			! put into place
+	or	%o5, %o4, %o4			! coalese with hword (if any)
+6:	addcc	%o4, %o2, %o2			! add to sum
+1:	retl					! get outta here
+	 addx	%g0, %o2, %o0			! add final carry into retval
+
+	/* Also do alignment out of band to get better cache patterns. */
+csum_partial_fix_alignment:
+	cmp	%o1, 6
+	bl	cpte - 0x4
+	 andcc	%o0, 0x2, %g0
+	be	1f
+	 andcc	%o0, 0x4, %g0
+	lduh	[%o0 + 0x00], %g2
+	sub	%o1, 2, %o1
+	add	%o0, 2, %o0
+	sll	%g2, 16, %g2
+	addcc	%g2, %o2, %o2
+	srl	%o2, 16, %g3
+	addx	%g0, %g3, %g2
+	sll	%o2, 16, %o2
+	sll	%g2, 16, %g3
+	srl	%o2, 16, %o2
+	andcc	%o0, 0x4, %g0
+	or	%g3, %o2, %o2
+1:	be	cpa
+	 andcc	%o1, 0xffffff80, %o3
+	ld	[%o0 + 0x00], %g2
+	sub	%o1, 4, %o1
+	addcc	%g2, %o2, %o2
+	add	%o0, 4, %o0
+	addx	%g0, %o2, %o2
+	b	cpa
+	 andcc	%o1, 0xffffff80, %o3
+
+	/* The common case is to get called with a nicely aligned
+	 * buffer of size 0x20.  Follow the code path for that case.
+	 */
+	.globl	csum_partial
+	EXPORT_SYMBOL(csum_partial)
+csum_partial:			/* %o0=buf, %o1=len, %o2=sum */
+	andcc	%o0, 0x7, %g0				! alignment problems?
+	bne	csum_partial_fix_alignment		! yep, handle it
+	 sethi	%hi(cpte - 8), %g7			! prepare table jmp ptr
+	andcc	%o1, 0xffffff80, %o3			! num loop iterations
+cpa:	be	3f					! none to do
+	 andcc	%o1, 0x70, %g1				! clears carry flag too
+5:	CSUM_BIGCHUNK(%o0, 0x00, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
+	CSUM_BIGCHUNK(%o0, 0x20, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
+	CSUM_BIGCHUNK(%o0, 0x40, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
+	CSUM_BIGCHUNK(%o0, 0x60, %o2, %o4, %o5, %g2, %g3, %g4, %g5)
+	addx	%g0, %o2, %o2				! sink in final carry
+	subcc	%o3, 128, %o3				! detract from loop iters
+	bne	5b					! more to do
+	 add	%o0, 128, %o0				! advance buf ptr
+	andcc	%o1, 0x70, %g1				! clears carry flag too
+3:	be	cpte					! nope
+	 andcc	%o1, 0xf, %g0				! anything left at all?
+	srl	%g1, 1, %o4				! compute offset
+	sub	%g7, %g1, %g7				! adjust jmp ptr
+	sub	%g7, %o4, %g7				! final jmp ptr adjust
+	jmp	%g7 + %lo(cpte - 8)			! enter the table
+	 add	%o0, %g1, %o0				! advance buf ptr
+cptbl:	CSUM_LASTCHUNK(%o0, 0x68, %o2, %g2, %g3, %g4, %g5)
+	CSUM_LASTCHUNK(%o0, 0x58, %o2, %g2, %g3, %g4, %g5)
+	CSUM_LASTCHUNK(%o0, 0x48, %o2, %g2, %g3, %g4, %g5)
+	CSUM_LASTCHUNK(%o0, 0x38, %o2, %g2, %g3, %g4, %g5)
+	CSUM_LASTCHUNK(%o0, 0x28, %o2, %g2, %g3, %g4, %g5)
+	CSUM_LASTCHUNK(%o0, 0x18, %o2, %g2, %g3, %g4, %g5)
+	CSUM_LASTCHUNK(%o0, 0x08, %o2, %g2, %g3, %g4, %g5)
+	addx	%g0, %o2, %o2				! fetch final carry
+	andcc	%o1, 0xf, %g0				! anything left at all?
+cpte:	bne	csum_partial_end_cruft			! yep, handle it
+	 andcc	%o1, 8, %g0				! check how much
+cpout:	retl						! get outta here
+	 mov	%o2, %o0				! return computed csum
+
+	.globl __csum_partial_copy_start, __csum_partial_copy_end
+__csum_partial_copy_start:
+
+/* Work around cpp -rob */
+#define ALLOC #alloc
+#define EXECINSTR #execinstr
+#define EX(x,y,a,b)				\
+98:     x,y;                                    \
+        .section .fixup,ALLOC,EXECINSTR;	\
+        .align  4;                              \
+99:     ba 30f;                                 \
+         a, b, %o3;                             \
+        .section __ex_table,ALLOC;		\
+        .align  4;                              \
+        .word   98b, 99b;                       \
+        .text;                                  \
+        .align  4
+
+#define EX2(x,y)				\
+98:     x,y;                                    \
+        .section __ex_table,ALLOC;		\
+        .align  4;                              \
+        .word   98b, 30f;                       \
+        .text;                                  \
+        .align  4
+
+#define EX3(x,y)				\
+98:     x,y;                                    \
+        .section __ex_table,ALLOC;		\
+        .align  4;                              \
+        .word   98b, 96f;                       \
+        .text;                                  \
+        .align  4
+
+#define EXT(start,end,handler)			\
+        .section __ex_table,ALLOC;		\
+        .align  4;                              \
+        .word   start, 0, end, handler;         \
+        .text;                                  \
+        .align  4
+
+	/* This aligned version executes typically in 8.5 superscalar cycles, this
+	 * is the best I can do.  I say 8.5 because the final add will pair with
+	 * the next ldd in the main unrolled loop.  Thus the pipe is always full.
+	 * If you change these macros (including order of instructions),
+	 * please check the fixup code below as well.
+	 */
+#define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
+	ldd	[src + off + 0x00], t0;							\
+	ldd	[src + off + 0x08], t2;							\
+	addxcc	t0, sum, sum;								\
+	ldd	[src + off + 0x10], t4;							\
+	addxcc	t1, sum, sum;								\
+	ldd	[src + off + 0x18], t6;							\
+	addxcc	t2, sum, sum;								\
+	std	t0, [dst + off + 0x00];							\
+	addxcc	t3, sum, sum;								\
+	std	t2, [dst + off + 0x08];							\
+	addxcc	t4, sum, sum;								\
+	std	t4, [dst + off + 0x10];							\
+	addxcc	t5, sum, sum;								\
+	std	t6, [dst + off + 0x18];							\
+	addxcc	t6, sum, sum;								\
+	addxcc	t7, sum, sum;
+
+	/* 12 superscalar cycles seems to be the limit for this case,
+	 * because of this we thus do all the ldd's together to get
+	 * Viking MXCC into streaming mode.  Ho hum...
+	 */
+#define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
+	ldd	[src + off + 0x00], t0;						\
+	ldd	[src + off + 0x08], t2;						\
+	ldd	[src + off + 0x10], t4;						\
+	ldd	[src + off + 0x18], t6;						\
+	st	t0, [dst + off + 0x00];						\
+	addxcc	t0, sum, sum;							\
+	st	t1, [dst + off + 0x04];						\
+	addxcc	t1, sum, sum;							\
+	st	t2, [dst + off + 0x08];						\
+	addxcc	t2, sum, sum;							\
+	st	t3, [dst + off + 0x0c];						\
+	addxcc	t3, sum, sum;							\
+	st	t4, [dst + off + 0x10];						\
+	addxcc	t4, sum, sum;							\
+	st	t5, [dst + off + 0x14];						\
+	addxcc	t5, sum, sum;							\
+	st	t6, [dst + off + 0x18];						\
+	addxcc	t6, sum, sum;							\
+	st	t7, [dst + off + 0x1c];						\
+	addxcc	t7, sum, sum;
+
+	/* Yuck, 6 superscalar cycles... */
+#define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3)	\
+	ldd	[src - off - 0x08], t0;				\
+	ldd	[src - off - 0x00], t2;				\
+	addxcc	t0, sum, sum;					\
+	st	t0, [dst - off - 0x08];				\
+	addxcc	t1, sum, sum;					\
+	st	t1, [dst - off - 0x04];				\
+	addxcc	t2, sum, sum;					\
+	st	t2, [dst - off - 0x00];				\
+	addxcc	t3, sum, sum;					\
+	st	t3, [dst - off + 0x04];
+
+	/* Handle the end cruft code out of band for better cache patterns. */
+cc_end_cruft:
+	be	1f
+	 andcc	%o3, 4, %g0
+	EX(ldd	[%o0 + 0x00], %g2, and %o3, 0xf)
+	add	%o1, 8, %o1
+	addcc	%g2, %g7, %g7
+	add	%o0, 8, %o0
+	addxcc	%g3, %g7, %g7
+	EX2(st	%g2, [%o1 - 0x08])
+	addx	%g0, %g7, %g7
+	andcc	%o3, 4, %g0
+	EX2(st	%g3, [%o1 - 0x04])
+1:	be	1f
+	 andcc	%o3, 3, %o3
+	EX(ld	[%o0 + 0x00], %g2, add %o3, 4)
+	add	%o1, 4, %o1
+	addcc	%g2, %g7, %g7
+	EX2(st	%g2, [%o1 - 0x04])
+	addx	%g0, %g7, %g7
+	andcc	%o3, 3, %g0
+	add	%o0, 4, %o0
+1:	be	1f
+	 addcc	%o3, -1, %g0
+	bne	2f
+	 subcc	%o3, 2, %o3
+	b	4f
+	 or	%g0, %g0, %o4
+2:	EX(lduh	[%o0 + 0x00], %o4, add %o3, 2)
+	add	%o0, 2, %o0
+	EX2(sth	%o4, [%o1 + 0x00])
+	be	6f
+	 add	%o1, 2, %o1
+	sll	%o4, 16, %o4
+4:	EX(ldub	[%o0 + 0x00], %o5, add %g0, 1)
+	EX2(stb	%o5, [%o1 + 0x00])
+	sll	%o5, 8, %o5
+	or	%o5, %o4, %o4
+6:	addcc	%o4, %g7, %g7
+1:	retl
+	 addx	%g0, %g7, %o0
+
+	/* Also, handle the alignment code out of band. */
+cc_dword_align:
+	cmp	%g1, 16
+	bge	1f
+	 srl	%g1, 1, %o3
+2:	cmp	%o3, 0
+	be,a	ccte
+	 andcc	%g1, 0xf, %o3
+	andcc	%o3, %o0, %g0	! Check %o0 only (%o1 has the same last 2 bits)
+	be,a	2b
+	 srl	%o3, 1, %o3
+1:	andcc	%o0, 0x1, %g0
+	bne	ccslow
+	 andcc	%o0, 0x2, %g0
+	be	1f
+	 andcc	%o0, 0x4, %g0
+	EX(lduh	[%o0 + 0x00], %g4, add %g1, 0)
+	sub	%g1, 2, %g1
+	EX2(sth	%g4, [%o1 + 0x00])
+	add	%o0, 2, %o0
+	sll	%g4, 16, %g4
+	addcc	%g4, %g7, %g7
+	add	%o1, 2, %o1
+	srl	%g7, 16, %g3
+	addx	%g0, %g3, %g4
+	sll	%g7, 16, %g7
+	sll	%g4, 16, %g3
+	srl	%g7, 16, %g7
+	andcc	%o0, 0x4, %g0
+	or	%g3, %g7, %g7
+1:	be	3f
+	 andcc	%g1, 0xffffff80, %g0
+	EX(ld	[%o0 + 0x00], %g4, add %g1, 0)
+	sub	%g1, 4, %g1
+	EX2(st	%g4, [%o1 + 0x00])
+	add	%o0, 4, %o0
+	addcc	%g4, %g7, %g7
+	add	%o1, 4, %o1
+	addx	%g0, %g7, %g7
+	b	3f
+	 andcc	%g1, 0xffffff80, %g0
+
+	/* Sun, you just can't beat me, you just can't.  Stop trying,
+	 * give up.  I'm serious, I am going to kick the living shit
+	 * out of you, game over, lights out.
+	 */
+	.align	8
+	.globl	__csum_partial_copy_sparc_generic
+	EXPORT_SYMBOL(__csum_partial_copy_sparc_generic)
+__csum_partial_copy_sparc_generic:
+					/* %o0=src, %o1=dest, %g1=len, %g7=sum */
+	xor	%o0, %o1, %o4		! get changing bits
+	andcc	%o4, 3, %g0		! check for mismatched alignment
+	bne	ccslow			! better this than unaligned/fixups
+	 andcc	%o0, 7, %g0		! need to align things?
+	bne	cc_dword_align		! yes, we check for short lengths there
+	 andcc	%g1, 0xffffff80, %g0	! can we use unrolled loop?
+3:	be	3f			! nope, less than one loop remains
+	 andcc	%o1, 4, %g0		! dest aligned on 4 or 8 byte boundary?
+	be	ccdbl + 4		! 8 byte aligned, kick ass
+5:	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+10:	EXT(5b, 10b, 20f)		! note for exception handling
+	sub	%g1, 128, %g1		! detract from length
+	addx	%g0, %g7, %g7		! add in last carry bit
+	andcc	%g1, 0xffffff80, %g0	! more to csum?
+	add	%o0, 128, %o0		! advance src ptr
+	bne	5b			! we did not go negative, continue looping
+	 add	%o1, 128, %o1		! advance dest ptr
+3:	andcc	%g1, 0x70, %o2		! can use table?
+ccmerge:be	ccte			! nope, go and check for end cruft
+	 andcc	%g1, 0xf, %o3		! get low bits of length (clears carry btw)
+	srl	%o2, 1, %o4		! begin negative offset computation
+	sethi	%hi(12f), %o5		! set up table ptr end
+	add	%o0, %o2, %o0		! advance src ptr
+	sub	%o5, %o4, %o5		! continue table calculation
+	sll	%o2, 1, %g2		! constant multiplies are fun...
+	sub	%o5, %g2, %o5		! some more adjustments
+	jmp	%o5 + %lo(12f)		! jump into it, duff style, wheee...
+	 add	%o1, %o2, %o1		! advance dest ptr (carry is clear btw)
+cctbl:	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
+	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x58,%g2,%g3,%g4,%g5)
+	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x48,%g2,%g3,%g4,%g5)
+	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x38,%g2,%g3,%g4,%g5)
+	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
+	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
+	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
+12:	EXT(cctbl, 12b, 22f)		! note for exception table handling
+	addx	%g0, %g7, %g7
+	andcc	%o3, 0xf, %g0		! check for low bits set
+ccte:	bne	cc_end_cruft		! something left, handle it out of band
+	 andcc	%o3, 8, %g0		! begin checks for that code
+	retl				! return
+	 mov	%g7, %o0		! give em the computed checksum
+ccdbl:	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
+11:	EXT(ccdbl, 11b, 21f)		! note for exception table handling
+	sub	%g1, 128, %g1		! detract from length
+	addx	%g0, %g7, %g7		! add in last carry bit
+	andcc	%g1, 0xffffff80, %g0	! more to csum?
+	add	%o0, 128, %o0		! advance src ptr
+	bne	ccdbl			! we did not go negative, continue looping
+	 add	%o1, 128, %o1		! advance dest ptr
+	b	ccmerge			! finish it off, above
+	 andcc	%g1, 0x70, %o2		! can use table? (clears carry btw)
+
+ccslow:	cmp	%g1, 0
+	mov	0, %g5
+	bleu	4f
+	 andcc	%o0, 1, %o5		
+	be,a	1f
+	 srl	%g1, 1, %g4		
+	sub	%g1, 1, %g1	
+	EX(ldub	[%o0], %g5, add %g1, 1)
+	add	%o0, 1, %o0	
+	EX2(stb	%g5, [%o1])
+	srl	%g1, 1, %g4
+	add	%o1, 1, %o1
+1:	cmp	%g4, 0		
+	be,a	3f
+	 andcc	%g1, 1, %g0
+	andcc	%o0, 2, %g0	
+	be,a	1f
+	 srl	%g4, 1, %g4
+	EX(lduh	[%o0], %o4, add %g1, 0)
+	sub	%g1, 2, %g1	
+	srl	%o4, 8, %g2
+	sub	%g4, 1, %g4	
+	EX2(stb	%g2, [%o1])
+	add	%o4, %g5, %g5
+	EX2(stb	%o4, [%o1 + 1])
+	add	%o0, 2, %o0	
+	srl	%g4, 1, %g4
+	add	%o1, 2, %o1
+1:	cmp	%g4, 0		
+	be,a	2f
+	 andcc	%g1, 2, %g0
+	EX3(ld	[%o0], %o4)
+5:	srl	%o4, 24, %g2
+	srl	%o4, 16, %g3
+	EX2(stb	%g2, [%o1])
+	srl	%o4, 8, %g2
+	EX2(stb	%g3, [%o1 + 1])
+	add	%o0, 4, %o0
+	EX2(stb	%g2, [%o1 + 2])
+	addcc	%o4, %g5, %g5
+	EX2(stb	%o4, [%o1 + 3])
+	addx	%g5, %g0, %g5	! I am now to lazy to optimize this (question it
+	add	%o1, 4, %o1	! is worthy). Maybe some day - with the sll/srl
+	subcc	%g4, 1, %g4	! tricks
+	bne,a	5b
+	 EX3(ld	[%o0], %o4)
+	sll	%g5, 16, %g2
+	srl	%g5, 16, %g5
+	srl	%g2, 16, %g2
+	andcc	%g1, 2, %g0
+	add	%g2, %g5, %g5 
+2:	be,a	3f		
+	 andcc	%g1, 1, %g0
+	EX(lduh	[%o0], %o4, and %g1, 3)
+	andcc	%g1, 1, %g0
+	srl	%o4, 8, %g2
+	add	%o0, 2, %o0	
+	EX2(stb	%g2, [%o1])
+	add	%g5, %o4, %g5
+	EX2(stb	%o4, [%o1 + 1])
+	add	%o1, 2, %o1
+3:	be,a	1f		
+	 sll	%g5, 16, %o4
+	EX(ldub	[%o0], %g2, add %g0, 1)
+	sll	%g2, 8, %o4	
+	EX2(stb	%g2, [%o1])
+	add	%g5, %o4, %g5
+	sll	%g5, 16, %o4
+1:	addcc	%o4, %g5, %g5
+	srl	%g5, 16, %o4
+	addx	%g0, %o4, %g5
+	orcc	%o5, %g0, %g0
+	be	4f
+	 srl	%g5, 8, %o4
+	and	%g5, 0xff, %g2
+	and	%o4, 0xff, %o4
+	sll	%g2, 8, %g2
+	or	%g2, %o4, %g5
+4:	addcc	%g7, %g5, %g7
+	retl	
+	 addx	%g0, %g7, %o0
+__csum_partial_copy_end:
+
+/* We do these strange calculations for the csum_*_from_user case only, ie.
+ * we only bother with faults on loads... */
+
+/* o2 = ((g2%20)&3)*8
+ * o3 = g1 - (g2/20)*32 - o2 */
+20:
+	cmp	%g2, 20
+	blu,a	1f
+	 and	%g2, 3, %o2
+	sub	%g1, 32, %g1
+	b	20b
+	 sub	%g2, 20, %g2
+1:
+	sll	%o2, 3, %o2
+	b	31f
+	 sub	%g1, %o2, %o3
+
+/* o2 = (!(g2 & 15) ? 0 : (((g2 & 15) + 1) & ~1)*8)
+ * o3 = g1 - (g2/16)*32 - o2 */
+21:
+	andcc	%g2, 15, %o3
+	srl	%g2, 4, %g2
+	be,a	1f
+	 clr	%o2
+	add	%o3, 1, %o3
+	and	%o3, 14, %o3
+	sll	%o3, 3, %o2
+1:
+	sll	%g2, 5, %g2
+	sub	%g1, %g2, %o3
+	b	31f
+	 sub	%o3, %o2, %o3
+
+/* o0 += (g2/10)*16 - 0x70
+ * 01 += (g2/10)*16 - 0x70
+ * o2 = (g2 % 10) ? 8 : 0
+ * o3 += 0x70 - (g2/10)*16 - o2 */
+22:
+	cmp	%g2, 10
+	blu,a	1f
+	 sub	%o0, 0x70, %o0
+	add	%o0, 16, %o0
+	add	%o1, 16, %o1
+	sub	%o3, 16, %o3
+	b	22b
+	 sub	%g2, 10, %g2
+1:
+	sub	%o1, 0x70, %o1
+	add	%o3, 0x70, %o3
+	clr	%o2
+	tst	%g2
+	bne,a	1f
+	 mov	8, %o2
+1:
+	b	31f
+	 sub	%o3, %o2, %o3
+96:
+	and	%g1, 3, %g1
+	sll	%g4, 2, %g4
+	add	%g1, %g4, %o3
+30:
+/* %o1 is dst
+ * %o3 is # bytes to zero out
+ * %o4 is faulting address
+ * %o5 is %pc where fault occurred */
+	clr	%o2
+31:
+/* %o0 is src
+ * %o1 is dst
+ * %o2 is # of bytes to copy from src to dst
+ * %o3 is # bytes to zero out
+ * %o4 is faulting address
+ * %o5 is %pc where fault occurred */
+	save	%sp, -104, %sp
+        mov     %i5, %o0
+        mov     %i7, %o1
+        mov	%i4, %o2
+        call    lookup_fault
+	 mov	%g7, %i4
+	cmp	%o0, 2
+	bne	1f	
+	 add	%g0, -EFAULT, %i5
+	tst	%i2
+	be	2f
+	 mov	%i0, %o1
+	mov	%i1, %o0
+5:
+	call	memcpy
+	 mov	%i2, %o2
+	tst	%o0
+	bne,a	2f
+	 add	%i3, %i2, %i3
+	add	%i1, %i2, %i1
+2:
+	mov	%i1, %o0
+6:
+	call	__bzero
+	 mov	%i3, %o1
+1:
+	ld	[%sp + 168], %o2		! struct_ptr of parent
+	st	%i5, [%o2]
+	ret
+	 restore
+
+        .section __ex_table,#alloc
+        .align 4
+        .word 5b,2
+	.word 6b,2
diff --git a/arch/sparc/lib/checksum_64.S b/arch/sparc/lib/checksum_64.S
new file mode 100644
index 0000000..9700ef1
--- /dev/null
+++ b/arch/sparc/lib/checksum_64.S
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* checksum.S: Sparc V9 optimized checksum code.
+ *
+ *  Copyright(C) 1995 Linus Torvalds
+ *  Copyright(C) 1995 Miguel de Icaza
+ *  Copyright(C) 1996, 2000 David S. Miller
+ *  Copyright(C) 1997 Jakub Jelinek
+ *
+ * derived from:
+ *	Linux/Alpha checksum c-code
+ *      Linux/ix86 inline checksum assembly
+ *      RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
+ *	David Mosberger-Tang for optimized reference c-code
+ *	BSD4.4 portable checksum routine
+ */
+
+#include <asm/export.h>
+	.text
+
+csum_partial_fix_alignment:
+	/* We checked for zero length already, so there must be
+	 * at least one byte.
+	 */
+	be,pt		%icc, 1f
+	 nop
+	ldub		[%o0 + 0x00], %o4
+	add		%o0, 1, %o0
+	sub		%o1, 1, %o1
+1:	andcc		%o0, 0x2, %g0
+	be,pn		%icc, csum_partial_post_align
+	 cmp		%o1, 2
+	blu,pn		%icc, csum_partial_end_cruft
+	 nop
+	lduh		[%o0 + 0x00], %o5
+	add		%o0, 2, %o0
+	sub		%o1, 2, %o1
+	ba,pt		%xcc, csum_partial_post_align
+	 add		%o5, %o4, %o4
+
+	.align		32
+	.globl		csum_partial
+	.type		csum_partial,#function
+	EXPORT_SYMBOL(csum_partial)
+csum_partial:		/* %o0=buff, %o1=len, %o2=sum */
+	prefetch	[%o0 + 0x000], #n_reads
+	clr		%o4
+	prefetch	[%o0 + 0x040], #n_reads
+	brz,pn		%o1, csum_partial_finish
+	 andcc		%o0, 0x3, %g0
+
+	/* We "remember" whether the lowest bit in the address
+	 * was set in %g7.  Because if it is, we have to swap
+	 * upper and lower 8 bit fields of the sum we calculate.
+	*/
+	bne,pn		%icc, csum_partial_fix_alignment
+	 andcc		%o0, 0x1, %g7
+
+csum_partial_post_align:
+	prefetch	[%o0 + 0x080], #n_reads
+	andncc		%o1, 0x3f, %o3
+
+	prefetch	[%o0 + 0x0c0], #n_reads
+	sub		%o1, %o3, %o1
+	brz,pn		%o3, 2f
+	 prefetch	[%o0 + 0x100], #n_reads
+
+	/* So that we don't need to use the non-pairing
+	 * add-with-carry instructions we accumulate 32-bit
+	 * values into a 64-bit register.  At the end of the
+	 * loop we fold it down to 32-bits and so on.
+	 */
+	prefetch	[%o0 + 0x140], #n_reads
+1:	lduw		[%o0 + 0x00], %o5
+	lduw		[%o0 + 0x04], %g1
+	lduw		[%o0 + 0x08], %g2
+	add		%o4, %o5, %o4
+	lduw		[%o0 + 0x0c], %g3
+	add		%o4, %g1, %o4
+	lduw		[%o0 + 0x10], %o5
+	add		%o4, %g2, %o4
+	lduw		[%o0 + 0x14], %g1
+	add		%o4, %g3, %o4
+	lduw		[%o0 + 0x18], %g2
+	add		%o4, %o5, %o4
+	lduw		[%o0 + 0x1c], %g3
+	add		%o4, %g1, %o4
+	lduw		[%o0 + 0x20], %o5
+	add		%o4, %g2, %o4
+	lduw		[%o0 + 0x24], %g1
+	add		%o4, %g3, %o4
+	lduw		[%o0 + 0x28], %g2
+	add		%o4, %o5, %o4
+	lduw		[%o0 + 0x2c], %g3
+	add		%o4, %g1, %o4
+	lduw		[%o0 + 0x30], %o5
+	add		%o4, %g2, %o4
+	lduw		[%o0 + 0x34], %g1
+	add		%o4, %g3, %o4
+	lduw		[%o0 + 0x38], %g2
+	add		%o4, %o5, %o4
+	lduw		[%o0 + 0x3c], %g3
+	add		%o4, %g1, %o4
+	prefetch	[%o0 + 0x180], #n_reads
+	add		%o4, %g2, %o4
+	subcc		%o3, 0x40, %o3
+	add		%o0, 0x40, %o0
+	bne,pt		%icc, 1b
+	 add		%o4, %g3, %o4
+
+2:	and		%o1, 0x3c, %o3
+	brz,pn		%o3, 2f
+	 sub		%o1, %o3, %o1
+1:	lduw		[%o0 + 0x00], %o5
+	subcc		%o3, 0x4, %o3
+	add		%o0, 0x4, %o0
+	bne,pt		%icc, 1b
+	 add		%o4, %o5, %o4
+
+2:
+	/* fold 64-->32 */
+	srlx		%o4, 32, %o5
+	srl		%o4, 0, %o4
+	add		%o4, %o5, %o4
+	srlx		%o4, 32, %o5
+	srl		%o4, 0, %o4
+	add		%o4, %o5, %o4
+
+	/* fold 32-->16 */
+	sethi		%hi(0xffff0000), %g1
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+
+csum_partial_end_cruft:
+	/* %o4 has the 16-bit sum we have calculated so-far.  */
+	cmp		%o1, 2
+	blu,pt		%icc, 1f
+	 nop
+	lduh		[%o0 + 0x00], %o5
+	sub		%o1, 2, %o1
+	add		%o0, 2, %o0
+	add		%o4, %o5, %o4
+1:	brz,pt		%o1, 1f
+	 nop
+	ldub		[%o0 + 0x00], %o5
+	sub		%o1, 1, %o1
+	add		%o0, 1, %o0
+	sllx		%o5, 8, %o5
+	add		%o4, %o5, %o4
+1:
+	/* fold 32-->16 */
+	sethi		%hi(0xffff0000), %g1
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+
+1:	brz,pt		%g7, 1f
+	 nop
+
+	/* We started with an odd byte, byte-swap the result.  */
+	srl		%o4, 8, %o5
+	and		%o4, 0xff, %g1
+	sll		%g1, 8, %g1
+	or		%o5, %g1, %o4
+
+1:	addcc		%o2, %o4, %o2
+	addc		%g0, %o2, %o2
+
+csum_partial_finish:
+	retl
+	 srl		%o2, 0, %o0
diff --git a/arch/sparc/lib/clear_page.S b/arch/sparc/lib/clear_page.S
new file mode 100644
index 0000000..8a6c783
--- /dev/null
+++ b/arch/sparc/lib/clear_page.S
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* clear_page.S: UltraSparc optimized clear page.
+ *
+ * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <asm/visasm.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/spitfire.h>
+#include <asm/head.h>
+#include <asm/export.h>
+
+	/* What we used to do was lock a TLB entry into a specific
+	 * TLB slot, clear the page with interrupts disabled, then
+	 * restore the original TLB entry.  This was great for
+	 * disturbing the TLB as little as possible, but it meant
+	 * we had to keep interrupts disabled for a long time.
+	 *
+	 * Now, we simply use the normal TLB loading mechanism,
+	 * and this makes the cpu choose a slot all by itself.
+	 * Then we do a normal TLB flush on exit.  We need only
+	 * disable preemption during the clear.
+	 */
+
+	.text
+
+	.globl		_clear_page
+	EXPORT_SYMBOL(_clear_page)
+_clear_page:		/* %o0=dest */
+	ba,pt		%xcc, clear_page_common
+	 clr		%o4
+
+	/* This thing is pretty important, it shows up
+	 * on the profiles via do_anonymous_page().
+	 */
+	.align		32
+	.globl		clear_user_page
+	EXPORT_SYMBOL(clear_user_page)
+clear_user_page:	/* %o0=dest, %o1=vaddr */
+	lduw		[%g6 + TI_PRE_COUNT], %o2
+	sethi		%hi(PAGE_OFFSET), %g2
+	sethi		%hi(PAGE_SIZE), %o4
+
+	ldx		[%g2 + %lo(PAGE_OFFSET)], %g2
+	sethi		%hi(PAGE_KERNEL_LOCKED), %g3
+
+	ldx		[%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
+	sub		%o0, %g2, %g1		! paddr
+
+	and		%o1, %o4, %o0		! vaddr D-cache alias bit
+
+	or		%g1, %g3, %g1		! TTE data
+	sethi		%hi(TLBTEMP_BASE), %o3
+
+	add		%o2, 1, %o4
+	add		%o0, %o3, %o0		! TTE vaddr
+
+	/* Disable preemption.  */
+	mov		TLB_TAG_ACCESS, %g3
+	stw		%o4, [%g6 + TI_PRE_COUNT]
+
+	/* Load TLB entry.  */
+	rdpr		%pstate, %o4
+	wrpr		%o4, PSTATE_IE, %pstate
+	stxa		%o0, [%g3] ASI_DMMU
+	stxa		%g1, [%g0] ASI_DTLB_DATA_IN
+	sethi		%hi(KERNBASE), %g1
+	flush		%g1
+	wrpr		%o4, 0x0, %pstate
+
+	mov		1, %o4
+
+clear_page_common:
+	VISEntryHalf
+	membar		#StoreLoad | #StoreStore | #LoadStore
+	fzero		%f0
+	sethi		%hi(PAGE_SIZE/64), %o1
+	mov		%o0, %g1		! remember vaddr for tlbflush
+	fzero		%f2
+	or		%o1, %lo(PAGE_SIZE/64), %o1
+	faddd		%f0, %f2, %f4
+	fmuld		%f0, %f2, %f6
+	faddd		%f0, %f2, %f8
+	fmuld		%f0, %f2, %f10
+
+	faddd		%f0, %f2, %f12
+	fmuld		%f0, %f2, %f14
+1:	stda		%f0, [%o0 + %g0] ASI_BLK_P
+	subcc		%o1, 1, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 0x40, %o0
+	membar		#Sync
+	VISExitHalf
+
+	brz,pn		%o4, out
+	 nop
+
+	stxa		%g0, [%g1] ASI_DMMU_DEMAP
+	membar		#Sync
+	stw		%o2, [%g6 + TI_PRE_COUNT]
+
+out:	retl
+	 nop
+
diff --git a/arch/sparc/lib/cmpdi2.c b/arch/sparc/lib/cmpdi2.c
new file mode 100644
index 0000000..333367f
--- /dev/null
+++ b/arch/sparc/lib/cmpdi2.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+
+#include "libgcc.h"
+
+word_type __cmpdi2(long long a, long long b)
+{
+	const DWunion au = {
+		.ll = a
+	};
+	const DWunion bu = {
+		.ll = b
+	};
+
+	if (au.s.high < bu.s.high)
+		return 0;
+	else if (au.s.high > bu.s.high)
+		return 2;
+
+	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+		return 0;
+	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+		return 2;
+
+	return 1;
+}
+
+EXPORT_SYMBOL(__cmpdi2);
diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S
new file mode 100644
index 0000000..66e90bf
--- /dev/null
+++ b/arch/sparc/lib/copy_in_user.S
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* copy_in_user.S: Copy from userspace to userspace.
+ *
+ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/linkage.h>
+#include <asm/asi.h>
+#include <asm/export.h>
+
+#define XCC xcc
+
+#define EX(x,y,z)		\
+98:	x,y;			\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, z;		\
+	.text;			\
+	.align 4;
+
+#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
+#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
+#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
+
+	.register	%g2,#scratch
+	.register	%g3,#scratch
+
+	.text
+__retl_o4_plus_8:
+	add	%o4, %o2, %o4
+	retl
+	 add	%o4, 8, %o0
+__retl_o2_plus_4:
+	retl
+	 add	%o2, 4, %o0
+__retl_o2_plus_1:
+	retl
+	 add	%o2, 1, %o0
+
+	.align	32
+
+	/* Don't try to get too fancy here, just nice and
+	 * simple.  This is predominantly used for well aligned
+	 * small copies in the compat layer.  It is also used
+	 * to copy register windows around during thread cloning.
+	 */
+
+ENTRY(raw_copy_in_user)	/* %o0=dst, %o1=src, %o2=len */
+	cmp		%o2, 0
+	be,pn		%XCC, 85f
+	 or		%o0, %o1, %o3
+	cmp		%o2, 16
+	bleu,a,pn	%XCC, 80f
+	 or		%o3, %o2, %o3
+
+	/* 16 < len <= 64 */
+	andcc		%o3, 0x7, %g0
+	bne,pn		%XCC, 90f
+	 nop
+
+	andn		%o2, 0x7, %o4
+	and		%o2, 0x7, %o2
+1:	subcc		%o4, 0x8, %o4
+	EX_O4(ldxa [%o1] %asi, %o5)
+	EX_O4(stxa %o5, [%o0] %asi)
+	add		%o1, 0x8, %o1
+	bgu,pt		%XCC, 1b
+	 add		%o0, 0x8, %o0
+	andcc		%o2, 0x4, %g0
+	be,pt		%XCC, 1f
+	 nop
+	sub		%o2, 0x4, %o2
+	EX_O2_4(lduwa [%o1] %asi, %o5)
+	EX_O2_4(stwa %o5, [%o0] %asi)
+	add		%o1, 0x4, %o1
+	add		%o0, 0x4, %o0
+1:	cmp		%o2, 0
+	be,pt		%XCC, 85f
+	 nop
+	ba,pt		%xcc, 90f
+	 nop
+
+80:	/* 0 < len <= 16 */
+	andcc		%o3, 0x3, %g0
+	bne,pn		%XCC, 90f
+	 nop
+
+82:
+	subcc		%o2, 4, %o2
+	EX_O2_4(lduwa [%o1] %asi, %g1)
+	EX_O2_4(stwa %g1, [%o0] %asi)
+	add		%o1, 4, %o1
+	bgu,pt		%XCC, 82b
+	 add		%o0, 4, %o0
+
+85:	retl
+	 clr		%o0
+
+	.align	32
+90:
+	subcc		%o2, 1, %o2
+	EX_O2_1(lduba [%o1] %asi, %g1)
+	EX_O2_1(stba %g1, [%o0] %asi)
+	add		%o1, 1, %o1
+	bgu,pt		%XCC, 90b
+	 add		%o0, 1, %o0
+	retl
+	 clr		%o0
+ENDPROC(raw_copy_in_user)
+EXPORT_SYMBOL(raw_copy_in_user)
diff --git a/arch/sparc/lib/copy_page.S b/arch/sparc/lib/copy_page.S
new file mode 100644
index 0000000..c088e87
--- /dev/null
+++ b/arch/sparc/lib/copy_page.S
@@ -0,0 +1,253 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* clear_page.S: UltraSparc optimized copy page.
+ *
+ * Copyright (C) 1996, 1998, 1999, 2000, 2004 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
+ */
+
+#include <asm/visasm.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/spitfire.h>
+#include <asm/head.h>
+#include <asm/export.h>
+
+	/* What we used to do was lock a TLB entry into a specific
+	 * TLB slot, clear the page with interrupts disabled, then
+	 * restore the original TLB entry.  This was great for
+	 * disturbing the TLB as little as possible, but it meant
+	 * we had to keep interrupts disabled for a long time.
+	 *
+	 * Now, we simply use the normal TLB loading mechanism,
+	 * and this makes the cpu choose a slot all by itself.
+	 * Then we do a normal TLB flush on exit.  We need only
+	 * disable preemption during the clear.
+	 */
+
+#define	DCACHE_SIZE	(PAGE_SIZE * 2)
+
+#if (PAGE_SHIFT == 13)
+#define PAGE_SIZE_REM	0x80
+#elif (PAGE_SHIFT == 16)
+#define PAGE_SIZE_REM	0x100
+#else
+#error Wrong PAGE_SHIFT specified
+#endif
+
+#define TOUCH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7)	\
+	fsrc2	%reg0, %f48; 	fsrc2	%reg1, %f50;		\
+	fsrc2	%reg2, %f52; 	fsrc2	%reg3, %f54;		\
+	fsrc2	%reg4, %f56; 	fsrc2	%reg5, %f58;		\
+	fsrc2	%reg6, %f60; 	fsrc2	%reg7, %f62;
+
+	.text
+
+	.align		32
+	.globl		copy_user_page
+	.type		copy_user_page,#function
+	EXPORT_SYMBOL(copy_user_page)
+copy_user_page:		/* %o0=dest, %o1=src, %o2=vaddr */
+	lduw		[%g6 + TI_PRE_COUNT], %o4
+	sethi		%hi(PAGE_OFFSET), %g2
+	sethi		%hi(PAGE_SIZE), %o3
+
+	ldx		[%g2 + %lo(PAGE_OFFSET)], %g2
+	sethi		%hi(PAGE_KERNEL_LOCKED), %g3
+
+	ldx		[%g3 + %lo(PAGE_KERNEL_LOCKED)], %g3
+	sub		%o0, %g2, %g1		! dest paddr
+
+	sub		%o1, %g2, %g2		! src paddr
+
+	and		%o2, %o3, %o0		! vaddr D-cache alias bit
+	or		%g1, %g3, %g1		! dest TTE data
+
+	or		%g2, %g3, %g2		! src TTE data
+	sethi		%hi(TLBTEMP_BASE), %o3
+
+	sethi		%hi(DCACHE_SIZE), %o1
+	add		%o0, %o3, %o0		! dest TTE vaddr
+
+	add		%o4, 1, %o2
+	add		%o0, %o1, %o1		! src TTE vaddr
+
+	/* Disable preemption.  */
+	mov		TLB_TAG_ACCESS, %g3
+	stw		%o2, [%g6 + TI_PRE_COUNT]
+
+	/* Load TLB entries.  */
+	rdpr		%pstate, %o2
+	wrpr		%o2, PSTATE_IE, %pstate
+	stxa		%o0, [%g3] ASI_DMMU
+	stxa		%g1, [%g0] ASI_DTLB_DATA_IN
+	membar		#Sync
+	stxa		%o1, [%g3] ASI_DMMU
+	stxa		%g2, [%g0] ASI_DTLB_DATA_IN
+	membar		#Sync
+	wrpr		%o2, 0x0, %pstate
+
+cheetah_copy_page_insn:
+	ba,pt		%xcc, 9f
+	 nop
+
+1:
+	VISEntryHalf
+	membar		#StoreLoad | #StoreStore | #LoadStore
+	sethi		%hi((PAGE_SIZE/64)-2), %o2
+	mov		%o0, %g1
+	prefetch	[%o1 + 0x000], #one_read
+	or		%o2, %lo((PAGE_SIZE/64)-2), %o2
+	prefetch	[%o1 + 0x040], #one_read
+	prefetch	[%o1 + 0x080], #one_read
+	prefetch	[%o1 + 0x0c0], #one_read
+	ldd		[%o1 + 0x000], %f0
+	prefetch	[%o1 + 0x100], #one_read
+	ldd		[%o1 + 0x008], %f2
+	prefetch	[%o1 + 0x140], #one_read
+	ldd		[%o1 + 0x010], %f4
+	prefetch	[%o1 + 0x180], #one_read
+	fsrc2		%f0, %f16
+	ldd		[%o1 + 0x018], %f6
+	fsrc2		%f2, %f18
+	ldd		[%o1 + 0x020], %f8
+	fsrc2		%f4, %f20
+	ldd		[%o1 + 0x028], %f10
+	fsrc2		%f6, %f22
+	ldd		[%o1 + 0x030], %f12
+	fsrc2		%f8, %f24
+	ldd		[%o1 + 0x038], %f14
+	fsrc2		%f10, %f26
+	ldd		[%o1 + 0x040], %f0
+1:	ldd		[%o1 + 0x048], %f2
+	fsrc2		%f12, %f28
+	ldd		[%o1 + 0x050], %f4
+	fsrc2		%f14, %f30
+	stda		%f16, [%o0] ASI_BLK_P
+	ldd		[%o1 + 0x058], %f6
+	fsrc2		%f0, %f16
+	ldd		[%o1 + 0x060], %f8
+	fsrc2		%f2, %f18
+	ldd		[%o1 + 0x068], %f10
+	fsrc2		%f4, %f20
+	ldd		[%o1 + 0x070], %f12
+	fsrc2		%f6, %f22
+	ldd		[%o1 + 0x078], %f14
+	fsrc2		%f8, %f24
+	ldd		[%o1 + 0x080], %f0
+	prefetch	[%o1 + 0x180], #one_read
+	fsrc2		%f10, %f26
+	subcc		%o2, 1, %o2
+	add		%o0, 0x40, %o0
+	bne,pt		%xcc, 1b
+	 add		%o1, 0x40, %o1
+
+	ldd		[%o1 + 0x048], %f2
+	fsrc2		%f12, %f28
+	ldd		[%o1 + 0x050], %f4
+	fsrc2		%f14, %f30
+	stda		%f16, [%o0] ASI_BLK_P
+	ldd		[%o1 + 0x058], %f6
+	fsrc2		%f0, %f16
+	ldd		[%o1 + 0x060], %f8
+	fsrc2		%f2, %f18
+	ldd		[%o1 + 0x068], %f10
+	fsrc2		%f4, %f20
+	ldd		[%o1 + 0x070], %f12
+	fsrc2		%f6, %f22
+	add		%o0, 0x40, %o0
+	ldd		[%o1 + 0x078], %f14
+	fsrc2		%f8, %f24
+	fsrc2		%f10, %f26
+	fsrc2		%f12, %f28
+	fsrc2		%f14, %f30
+	stda		%f16, [%o0] ASI_BLK_P
+	membar		#Sync
+	VISExitHalf
+	ba,pt		%xcc, 5f
+	 nop
+
+9:
+	VISEntry
+	ldub		[%g6 + TI_FAULT_CODE], %g3
+	mov		%o0, %g1
+	cmp		%g3, 0
+	rd		%asi, %g3
+	be,a,pt		%icc, 1f
+	 wr		%g0, ASI_BLK_P, %asi
+	wr		%g0, ASI_BLK_COMMIT_P, %asi
+1:	ldda		[%o1] ASI_BLK_P, %f0
+	add		%o1, 0x40, %o1
+	ldda		[%o1] ASI_BLK_P, %f16
+	add		%o1, 0x40, %o1
+	sethi		%hi(PAGE_SIZE), %o2
+1:	TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
+	ldda		[%o1] ASI_BLK_P, %f32
+	stda		%f48, [%o0] %asi
+	add		%o1, 0x40, %o1
+	sub		%o2, 0x40, %o2
+	add		%o0, 0x40, %o0
+	TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
+	ldda		[%o1] ASI_BLK_P, %f0
+	stda		%f48, [%o0] %asi
+	add		%o1, 0x40, %o1
+	sub		%o2, 0x40, %o2
+	add		%o0, 0x40, %o0
+	TOUCH(f32, f34, f36, f38, f40, f42, f44, f46)
+	ldda		[%o1] ASI_BLK_P, %f16
+	stda		%f48, [%o0] %asi
+	sub		%o2, 0x40, %o2
+	add		%o1, 0x40, %o1
+	cmp		%o2, PAGE_SIZE_REM
+	bne,pt		%xcc, 1b
+	 add		%o0, 0x40, %o0
+#if (PAGE_SHIFT == 16)
+	TOUCH(f0, f2, f4, f6, f8, f10, f12, f14)
+	ldda		[%o1] ASI_BLK_P, %f32
+	stda		%f48, [%o0] %asi
+	add		%o1, 0x40, %o1
+	sub		%o2, 0x40, %o2
+	add		%o0, 0x40, %o0
+	TOUCH(f16, f18, f20, f22, f24, f26, f28, f30)
+	ldda		[%o1] ASI_BLK_P, %f0
+	stda		%f48, [%o0] %asi
+	add		%o1, 0x40, %o1
+	sub		%o2, 0x40, %o2
+	add		%o0, 0x40, %o0
+	membar		#Sync
+	stda		%f32, [%o0] %asi
+	add		%o0, 0x40, %o0
+	stda		%f0, [%o0] %asi
+#else
+	membar		#Sync
+	stda		%f0, [%o0] %asi
+	add		%o0, 0x40, %o0
+	stda		%f16, [%o0] %asi
+#endif
+	membar		#Sync
+	wr		%g3, 0x0, %asi
+	VISExit
+
+5:
+	stxa		%g0, [%g1] ASI_DMMU_DEMAP
+	membar		#Sync
+
+	sethi		%hi(DCACHE_SIZE), %g2
+	stxa		%g0, [%g1 + %g2] ASI_DMMU_DEMAP
+	membar		#Sync
+
+	retl
+	 stw		%o4, [%g6 + TI_PRE_COUNT]
+
+	.size		copy_user_page, .-copy_user_page
+
+	.globl		cheetah_patch_copy_page
+cheetah_patch_copy_page:
+	sethi		%hi(0x01000000), %o1	! NOP
+	sethi		%hi(cheetah_copy_page_insn), %o0
+	or		%o0, %lo(cheetah_copy_page_insn), %o0
+	stw		%o1, [%o0]
+	membar		#StoreStore
+	flush		%o0
+	retl
+	 nop
diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
new file mode 100644
index 0000000..dc72f2b
--- /dev/null
+++ b/arch/sparc/lib/copy_user.S
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* copy_user.S: Sparc optimized copy_from_user and copy_to_user code.
+ *
+ *  Copyright(C) 1995 Linus Torvalds
+ *  Copyright(C) 1996 David S. Miller
+ *  Copyright(C) 1996 Eddie C. Dost
+ *  Copyright(C) 1996,1998 Jakub Jelinek
+ *
+ * derived from:
+ *	e-mail between David and Eddie.
+ *
+ * Returns 0 if successful, otherwise count of bytes not copied yet
+ */
+
+#include <asm/ptrace.h>
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+#include <asm/thread_info.h>
+#include <asm/export.h>
+
+/* Work around cpp -rob */
+#define ALLOC #alloc
+#define EXECINSTR #execinstr
+#define EX(x,y,a,b) 				\
+98: 	x,y;					\
+	.section .fixup,ALLOC,EXECINSTR;	\
+	.align	4;				\
+99:	ba fixupretl;				\
+	 a, b, %g3;				\
+	.section __ex_table,ALLOC;		\
+	.align	4;				\
+	.word	98b, 99b;			\
+	.text;					\
+	.align	4
+
+#define EX2(x,y,c,d,e,a,b) 			\
+98: 	x,y;					\
+	.section .fixup,ALLOC,EXECINSTR;	\
+	.align	4;				\
+99:	c, d, e;				\
+	ba fixupretl;				\
+	 a, b, %g3;				\
+	.section __ex_table,ALLOC;		\
+	.align	4;				\
+	.word	98b, 99b;			\
+	.text;					\
+	.align	4
+
+#define EXO2(x,y) 				\
+98: 	x, y;					\
+	.section __ex_table,ALLOC;		\
+	.align	4;				\
+	.word	98b, 97f;			\
+	.text;					\
+	.align	4
+
+#define EXT(start,end,handler)			\
+	.section __ex_table,ALLOC;		\
+	.align	4;				\
+	.word	start, 0, end, handler;		\
+	.text;					\
+	.align	4
+
+/* Please do not change following macros unless you change logic used
+ * in .fixup at the end of this file as well
+ */
+
+/* Both these macros have to start with exactly the same insn */
+#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
+	ldd	[%src + (offset) + 0x00], %t0; \
+	ldd	[%src + (offset) + 0x08], %t2; \
+	ldd	[%src + (offset) + 0x10], %t4; \
+	ldd	[%src + (offset) + 0x18], %t6; \
+	st	%t0, [%dst + (offset) + 0x00]; \
+	st	%t1, [%dst + (offset) + 0x04]; \
+	st	%t2, [%dst + (offset) + 0x08]; \
+	st	%t3, [%dst + (offset) + 0x0c]; \
+	st	%t4, [%dst + (offset) + 0x10]; \
+	st	%t5, [%dst + (offset) + 0x14]; \
+	st	%t6, [%dst + (offset) + 0x18]; \
+	st	%t7, [%dst + (offset) + 0x1c];
+
+#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
+	ldd	[%src + (offset) + 0x00], %t0; \
+	ldd	[%src + (offset) + 0x08], %t2; \
+	ldd	[%src + (offset) + 0x10], %t4; \
+	ldd	[%src + (offset) + 0x18], %t6; \
+	std	%t0, [%dst + (offset) + 0x00]; \
+	std	%t2, [%dst + (offset) + 0x08]; \
+	std	%t4, [%dst + (offset) + 0x10]; \
+	std	%t6, [%dst + (offset) + 0x18];
+
+#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
+	ldd	[%src - (offset) - 0x10], %t0; \
+	ldd	[%src - (offset) - 0x08], %t2; \
+	st	%t0, [%dst - (offset) - 0x10]; \
+	st	%t1, [%dst - (offset) - 0x0c]; \
+	st	%t2, [%dst - (offset) - 0x08]; \
+	st	%t3, [%dst - (offset) - 0x04];
+
+#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
+	lduh	[%src + (offset) + 0x00], %t0; \
+	lduh	[%src + (offset) + 0x02], %t1; \
+	lduh	[%src + (offset) + 0x04], %t2; \
+	lduh	[%src + (offset) + 0x06], %t3; \
+	sth	%t0, [%dst + (offset) + 0x00]; \
+	sth	%t1, [%dst + (offset) + 0x02]; \
+	sth	%t2, [%dst + (offset) + 0x04]; \
+	sth	%t3, [%dst + (offset) + 0x06];
+
+#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
+	ldub	[%src - (offset) - 0x02], %t0; \
+	ldub	[%src - (offset) - 0x01], %t1; \
+	stb	%t0, [%dst - (offset) - 0x02]; \
+	stb	%t1, [%dst - (offset) - 0x01];
+
+	.text
+	.align	4
+
+	.globl  __copy_user_begin
+__copy_user_begin:
+
+	.globl	__copy_user
+	EXPORT_SYMBOL(__copy_user)
+dword_align:
+	andcc	%o1, 1, %g0
+	be	4f
+	 andcc	%o1, 2, %g0
+
+	EXO2(ldub [%o1], %g2)
+	add	%o1, 1, %o1
+	EXO2(stb %g2, [%o0])
+	sub	%o2, 1, %o2
+	bne	3f
+	 add	%o0, 1, %o0
+
+	EXO2(lduh [%o1], %g2)
+	add	%o1, 2, %o1
+	EXO2(sth %g2, [%o0])
+	sub	%o2, 2, %o2
+	b	3f
+	 add	%o0, 2, %o0
+4:
+	EXO2(lduh [%o1], %g2)
+	add	%o1, 2, %o1
+	EXO2(sth %g2, [%o0])
+	sub	%o2, 2, %o2
+	b	3f
+	 add	%o0, 2, %o0
+
+__copy_user:	/* %o0=dst %o1=src %o2=len */
+	xor	%o0, %o1, %o4
+1:
+	andcc	%o4, 3, %o5
+2:
+	bne	cannot_optimize
+	 cmp	%o2, 15
+
+	bleu	short_aligned_end
+	 andcc	%o1, 3, %g0
+
+	bne	dword_align
+3:
+	 andcc	%o1, 4, %g0
+
+	be	2f
+	 mov	%o2, %g1
+
+	EXO2(ld [%o1], %o4)
+	sub	%g1, 4, %g1
+	EXO2(st %o4, [%o0])
+	add	%o1, 4, %o1
+	add	%o0, 4, %o0
+2:
+	andcc	%g1, 0xffffff80, %g7
+	be	3f
+	 andcc	%o0, 4, %g0
+
+	be	ldd_std + 4
+5:
+	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
+80:
+	EXT(5b, 80b, 50f)
+	subcc	%g7, 128, %g7
+	add	%o1, 128, %o1
+	bne	5b
+	 add	%o0, 128, %o0
+3:
+	andcc	%g1, 0x70, %g7
+	be	copy_user_table_end
+	 andcc	%g1, 8, %g0
+
+	sethi	%hi(copy_user_table_end), %o5
+	srl	%g7, 1, %o4
+	add	%g7, %o4, %o4
+	add	%o1, %g7, %o1
+	sub	%o5, %o4, %o5
+	jmpl	%o5 + %lo(copy_user_table_end), %g0
+	 add	%o0, %g7, %o0
+
+copy_user_table:
+	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
+copy_user_table_end:
+	EXT(copy_user_table, copy_user_table_end, 51f)
+	be	copy_user_last7
+	 andcc	%g1, 4, %g0
+
+	EX(ldd	[%o1], %g2, and %g1, 0xf)
+	add	%o0, 8, %o0
+	add	%o1, 8, %o1
+	EX(st	%g2, [%o0 - 0x08], and %g1, 0xf)
+	EX2(st	%g3, [%o0 - 0x04], and %g1, 0xf, %g1, sub %g1, 4)
+copy_user_last7:
+	be	1f
+	 andcc	%g1, 2, %g0
+
+	EX(ld	[%o1], %g2, and %g1, 7)
+	add	%o1, 4, %o1
+	EX(st	%g2, [%o0], and %g1, 7)
+	add	%o0, 4, %o0
+1:
+	be	1f
+	 andcc	%g1, 1, %g0
+
+	EX(lduh	[%o1], %g2, and %g1, 3)
+	add	%o1, 2, %o1
+	EX(sth	%g2, [%o0], and %g1, 3)
+	add	%o0, 2, %o0
+1:
+	be	1f
+	 nop
+
+	EX(ldub	[%o1], %g2, add %g0, 1)
+	EX(stb	%g2, [%o0], add %g0, 1)
+1:
+	retl
+ 	 clr	%o0
+
+ldd_std:
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
+81:
+	EXT(ldd_std, 81b, 52f)
+	subcc	%g7, 128, %g7
+	add	%o1, 128, %o1
+	bne	ldd_std
+	 add	%o0, 128, %o0
+
+	andcc	%g1, 0x70, %g7
+	be	copy_user_table_end
+	 andcc	%g1, 8, %g0
+
+	sethi	%hi(copy_user_table_end), %o5
+	srl	%g7, 1, %o4
+	add	%g7, %o4, %o4
+	add	%o1, %g7, %o1
+	sub	%o5, %o4, %o5
+	jmpl	%o5 + %lo(copy_user_table_end), %g0
+	 add	%o0, %g7, %o0
+
+cannot_optimize:
+	bleu	short_end
+	 cmp	%o5, 2
+
+	bne	byte_chunk
+	 and	%o2, 0xfffffff0, %o3
+	 
+	andcc	%o1, 1, %g0
+	be	10f
+	 nop
+
+	EXO2(ldub [%o1], %g2)
+	add	%o1, 1, %o1
+	EXO2(stb %g2, [%o0])
+	sub	%o2, 1, %o2
+	andcc	%o2, 0xfffffff0, %o3
+	be	short_end
+	 add	%o0, 1, %o0
+10:
+	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
+	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
+82:
+	EXT(10b, 82b, 53f)
+	subcc	%o3, 0x10, %o3
+	add	%o1, 0x10, %o1
+	bne	10b
+	 add	%o0, 0x10, %o0
+	b	2f
+	 and	%o2, 0xe, %o3
+	
+byte_chunk:
+	MOVE_SHORTCHUNK(o1, o0, -0x02, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, -0x04, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, -0x06, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, -0x08, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, -0x0a, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
+83:
+	EXT(byte_chunk, 83b, 54f)
+	subcc	%o3, 0x10, %o3
+	add	%o1, 0x10, %o1
+	bne	byte_chunk
+	 add	%o0, 0x10, %o0
+
+short_end:
+	and	%o2, 0xe, %o3
+2:
+	sethi	%hi(short_table_end), %o5
+	sll	%o3, 3, %o4
+	add	%o0, %o3, %o0
+	sub	%o5, %o4, %o5
+	add	%o1, %o3, %o1
+	jmpl	%o5 + %lo(short_table_end), %g0
+	 andcc	%o2, 1, %g0
+84:
+	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
+short_table_end:
+	EXT(84b, short_table_end, 55f)
+	be	1f
+	 nop
+	EX(ldub	[%o1], %g2, add %g0, 1)
+	EX(stb	%g2, [%o0], add %g0, 1)
+1:
+	retl
+ 	 clr	%o0
+
+short_aligned_end:
+	bne	short_end
+	 andcc	%o2, 8, %g0
+
+	be	1f
+	 andcc	%o2, 4, %g0
+
+	EXO2(ld	[%o1 + 0x00], %g2)
+	EXO2(ld	[%o1 + 0x04], %g3)
+	add	%o1, 8, %o1
+	EXO2(st	%g2, [%o0 + 0x00])
+	EX(st	%g3, [%o0 + 0x04], sub %o2, 4)
+	add	%o0, 8, %o0
+1:
+	b	copy_user_last7
+	 mov	%o2, %g1
+
+	.section .fixup,#alloc,#execinstr
+	.align	4
+97:
+	mov	%o2, %g3
+fixupretl:
+	retl
+	 mov	%g3, %o0
+
+/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
+50:
+/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
+ * happens. This is derived from the amount ldd reads, st stores, etc.
+ * x = g2 % 12;
+ * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
+ * o0 += (g2 / 12) * 32;
+ */
+	cmp	%g2, 12
+	add	%o0, %g7, %o0
+	bcs	1f
+	 cmp	%g2, 24
+	bcs	2f
+	 cmp	%g2, 36
+	bcs	3f
+	 nop
+	sub	%g2, 12, %g2
+	sub	%g7, 32, %g7
+3:	sub	%g2, 12, %g2
+	sub	%g7, 32, %g7
+2:	sub	%g2, 12, %g2
+	sub	%g7, 32, %g7
+1:	cmp	%g2, 4
+	bcs,a	60f
+	 clr	%g2
+	sub	%g2, 4, %g2
+	sll	%g2, 2, %g2
+60:	and	%g1, 0x7f, %g3
+	sub	%o0, %g7, %o0
+	add	%g3, %g7, %g3
+	ba	fixupretl
+	 sub	%g3, %g2, %g3
+51:
+/* i = 41 - g2; j = i % 6;
+ * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
+ * o0 -= (i / 6) * 16 + 16;
+ */
+	neg	%g2
+	and	%g1, 0xf, %g1
+	add	%g2, 41, %g2
+	add	%o0, %g1, %o0
+1:	cmp	%g2, 6
+	bcs,a	2f
+	 cmp	%g2, 4
+	add	%g1, 16, %g1
+	b	1b
+	 sub	%g2, 6, %g2
+2:	bcc,a	2f
+	 mov	16, %g2
+	inc	%g2
+	sll	%g2, 2, %g2
+2:	add	%g1, %g2, %g3
+	ba	fixupretl
+	 sub	%o0, %g3, %o0
+52:
+/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
+   o0 += (g2 / 8) * 32 */
+	andn	%g2, 7, %g4
+	add	%o0, %g7, %o0
+	andcc	%g2, 4, %g0
+	and	%g2, 3, %g2
+	sll	%g4, 2, %g4
+	sll	%g2, 3, %g2
+	bne	60b
+	 sub	%g7, %g4, %g7
+	ba	60b
+	 clr	%g2
+53:
+/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
+   o0 += (g2 & 8) */
+	and	%g2, 3, %g4
+	andcc	%g2, 4, %g0
+	and	%g2, 8, %g2
+	sll	%g4, 1, %g4
+	be	1f
+	 add	%o0, %g2, %o0
+	add	%g2, %g4, %g2
+1:	and	%o2, 0xf, %g3
+	add	%g3, %o3, %g3
+	ba	fixupretl
+	 sub	%g3, %g2, %g3
+54:
+/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
+   o0 += (g2 / 4) * 2 */
+	srl	%g2, 2, %o4
+	and	%g2, 1, %o5
+	srl	%g2, 1, %g2
+	add	%o4, %o4, %o4
+	and	%o5, %g2, %o5
+	and	%o2, 0xf, %o2
+	add	%o0, %o4, %o0
+	sub	%o3, %o5, %o3
+	sub	%o2, %o4, %o2
+	ba	fixupretl
+	 add	%o2, %o3, %g3
+55:
+/* i = 27 - g2;
+   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
+   o0 -= i / 4 * 2 + 1 */
+	neg	%g2
+	and	%o2, 1, %o2
+	add	%g2, 27, %g2
+	srl	%g2, 2, %o5
+	andcc	%g2, 3, %g0
+	mov	1, %g2
+	add	%o5, %o5, %o5
+	be,a	1f
+	 clr	%g2
+1:	add	%g2, %o5, %g3
+	sub	%o0, %g3, %o0
+	ba	fixupretl
+	 add	%g3, %o2, %g3
+
+	.globl  __copy_user_end
+__copy_user_end:
diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
new file mode 100644
index 0000000..26c644b
--- /dev/null
+++ b/arch/sparc/lib/csum_copy.S
@@ -0,0 +1,314 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* csum_copy.S: Checksum+copy code for sparc64
+ *
+ * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+ */
+
+#include <asm/export.h>
+
+#ifdef __KERNEL__
+#define GLOBAL_SPARE	%g7
+#else
+#define GLOBAL_SPARE	%g5
+#endif
+
+#ifndef EX_LD
+#define EX_LD(x)	x
+#endif
+
+#ifndef EX_ST
+#define EX_ST(x)	x
+#endif
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)	x
+#endif
+
+#ifndef LOAD
+#define LOAD(type,addr,dest)	type [addr], dest
+#endif
+
+#ifndef STORE
+#define STORE(type,src,addr)	type src, [addr]
+#endif
+
+#ifndef FUNC_NAME
+#define FUNC_NAME	csum_partial_copy_nocheck
+#endif
+
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+
+	.text
+
+90:
+	/* We checked for zero length already, so there must be
+	 * at least one byte.
+	 */
+	be,pt		%icc, 1f
+	 nop
+	EX_LD(LOAD(ldub, %o0 + 0x00, %o4))
+	add		%o0, 1, %o0
+	sub		%o2, 1, %o2
+	EX_ST(STORE(stb, %o4, %o1 + 0x00))
+	add		%o1, 1, %o1
+1:	andcc		%o0, 0x2, %g0
+	be,pn		%icc, 80f
+	 cmp		%o2, 2
+	blu,pn		%icc, 60f
+	 nop
+	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
+	add		%o0, 2, %o0
+	sub		%o2, 2, %o2
+	EX_ST(STORE(sth, %o5, %o1 + 0x00))
+	add		%o1, 2, %o1
+	ba,pt		%xcc, 80f
+	 add		%o5, %o4, %o4
+
+	.globl		FUNC_NAME
+	.type		FUNC_NAME,#function
+	EXPORT_SYMBOL(FUNC_NAME)
+FUNC_NAME:		/* %o0=src, %o1=dst, %o2=len, %o3=sum */
+	LOAD(prefetch, %o0 + 0x000, #n_reads)
+	xor		%o0, %o1, %g1
+	clr		%o4
+	andcc		%g1, 0x3, %g0
+	bne,pn		%icc, 95f
+	 LOAD(prefetch, %o0 + 0x040, #n_reads)
+	
+	brz,pn		%o2, 70f
+	 andcc		%o0, 0x3, %g0
+
+	/* We "remember" whether the lowest bit in the address
+	 * was set in GLOBAL_SPARE.  Because if it is, we have to swap
+	 * upper and lower 8 bit fields of the sum we calculate.
+	*/
+	bne,pn		%icc, 90b
+	 andcc		%o0, 0x1, GLOBAL_SPARE
+
+80:
+	LOAD(prefetch, %o0 + 0x080, #n_reads)
+	andncc		%o2, 0x3f, %g3
+
+	LOAD(prefetch, %o0 + 0x0c0, #n_reads)
+	sub		%o2, %g3, %o2
+	brz,pn		%g3, 2f
+	 LOAD(prefetch, %o0 + 0x100, #n_reads)
+
+	/* So that we don't need to use the non-pairing
+	 * add-with-carry instructions we accumulate 32-bit
+	 * values into a 64-bit register.  At the end of the
+	 * loop we fold it down to 32-bits and so on.
+	 */
+	ba,pt		%xcc, 1f
+	LOAD(prefetch, %o0 + 0x140, #n_reads)
+
+	.align		32
+1:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
+	EX_LD(LOAD(lduw, %o0 + 0x04, %g1))
+	EX_LD(LOAD(lduw, %o0 + 0x08, %g2))
+	add		%o4, %o5, %o4
+	EX_ST(STORE(stw, %o5, %o1 + 0x00))
+	EX_LD(LOAD(lduw, %o0 + 0x0c, %o5))
+	add		%o4, %g1, %o4
+	EX_ST(STORE(stw, %g1, %o1 + 0x04))
+	EX_LD(LOAD(lduw, %o0 + 0x10, %g1))
+	add		%o4, %g2, %o4
+	EX_ST(STORE(stw, %g2, %o1 + 0x08))
+	EX_LD(LOAD(lduw, %o0 + 0x14, %g2))
+	add		%o4, %o5, %o4
+	EX_ST(STORE(stw, %o5, %o1 + 0x0c))
+	EX_LD(LOAD(lduw, %o0 + 0x18, %o5))
+	add		%o4, %g1, %o4
+	EX_ST(STORE(stw, %g1, %o1 + 0x10))
+	EX_LD(LOAD(lduw, %o0 + 0x1c, %g1))
+	add		%o4, %g2, %o4
+	EX_ST(STORE(stw, %g2, %o1 + 0x14))
+	EX_LD(LOAD(lduw, %o0 + 0x20, %g2))
+	add		%o4, %o5, %o4
+	EX_ST(STORE(stw, %o5, %o1 + 0x18))
+	EX_LD(LOAD(lduw, %o0 + 0x24, %o5))
+	add		%o4, %g1, %o4
+	EX_ST(STORE(stw, %g1, %o1 + 0x1c))
+	EX_LD(LOAD(lduw, %o0 + 0x28, %g1))
+	add		%o4, %g2, %o4
+	EX_ST(STORE(stw, %g2, %o1 + 0x20))
+	EX_LD(LOAD(lduw, %o0 + 0x2c, %g2))
+	add		%o4, %o5, %o4
+	EX_ST(STORE(stw, %o5, %o1 + 0x24))
+	EX_LD(LOAD(lduw, %o0 + 0x30, %o5))
+	add		%o4, %g1, %o4
+	EX_ST(STORE(stw, %g1, %o1 + 0x28))
+	EX_LD(LOAD(lduw, %o0 + 0x34, %g1))
+	add		%o4, %g2, %o4
+	EX_ST(STORE(stw, %g2, %o1 + 0x2c))
+	EX_LD(LOAD(lduw, %o0 + 0x38, %g2))
+	add		%o4, %o5, %o4
+	EX_ST(STORE(stw, %o5, %o1 + 0x30))
+	EX_LD(LOAD(lduw, %o0 + 0x3c, %o5))
+	add		%o4, %g1, %o4
+	EX_ST(STORE(stw, %g1, %o1 + 0x34))
+	LOAD(prefetch, %o0 + 0x180, #n_reads)
+	add		%o4, %g2, %o4
+	EX_ST(STORE(stw, %g2, %o1 + 0x38))
+	subcc		%g3, 0x40, %g3
+	add		%o0, 0x40, %o0
+	add		%o4, %o5, %o4
+	EX_ST(STORE(stw, %o5, %o1 + 0x3c))
+	bne,pt		%icc, 1b
+	 add		%o1, 0x40, %o1
+
+2:	and		%o2, 0x3c, %g3
+	brz,pn		%g3, 2f
+	 sub		%o2, %g3, %o2
+1:	EX_LD(LOAD(lduw, %o0 + 0x00, %o5))
+	subcc		%g3, 0x4, %g3
+	add		%o0, 0x4, %o0
+	add		%o4, %o5, %o4
+	EX_ST(STORE(stw, %o5, %o1 + 0x00))
+	bne,pt		%icc, 1b
+	 add		%o1, 0x4, %o1
+
+2:
+	/* fold 64-->32 */
+	srlx		%o4, 32, %o5
+	srl		%o4, 0, %o4
+	add		%o4, %o5, %o4
+	srlx		%o4, 32, %o5
+	srl		%o4, 0, %o4
+	add		%o4, %o5, %o4
+
+	/* fold 32-->16 */
+	sethi		%hi(0xffff0000), %g1
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+
+60:
+	/* %o4 has the 16-bit sum we have calculated so-far.  */
+	cmp		%o2, 2
+	blu,pt		%icc, 1f
+	 nop
+	EX_LD(LOAD(lduh, %o0 + 0x00, %o5))
+	sub		%o2, 2, %o2
+	add		%o0, 2, %o0
+	add		%o4, %o5, %o4
+	EX_ST(STORE(sth, %o5, %o1 + 0x00))
+	add		%o1, 0x2, %o1
+1:	brz,pt		%o2, 1f
+	 nop
+	EX_LD(LOAD(ldub, %o0 + 0x00, %o5))
+	sub		%o2, 1, %o2
+	add		%o0, 1, %o0
+	EX_ST(STORE(stb, %o5, %o1 + 0x00))
+	sllx		%o5, 8, %o5
+	add		%o1, 1, %o1
+	add		%o4, %o5, %o4
+1:
+	/* fold 32-->16 */
+	sethi		%hi(0xffff0000), %g1
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+	srl		%o4, 16, %o5
+	andn		%o4, %g1, %g2
+	add		%o5, %g2, %o4
+
+1:	brz,pt		GLOBAL_SPARE, 1f
+	 nop
+
+	/* We started with an odd byte, byte-swap the result.  */
+	srl		%o4, 8, %o5
+	and		%o4, 0xff, %g1
+	sll		%g1, 8, %g1
+	or		%o5, %g1, %o4
+
+1:	addcc		%o3, %o4, %o3
+	addc		%g0, %o3, %o3
+
+70:
+	retl
+	 srl		%o3, 0, %o0
+
+95:	mov		0, GLOBAL_SPARE
+	brlez,pn	%o2, 4f
+	 andcc		%o0, 1, %o5		
+	be,a,pt		%icc, 1f
+	 srl		%o2, 1, %g1		
+	sub		%o2, 1, %o2	
+	EX_LD(LOAD(ldub, %o0, GLOBAL_SPARE))
+	add		%o0, 1, %o0	
+	EX_ST(STORE(stb, GLOBAL_SPARE, %o1))
+	srl		%o2, 1, %g1
+	add		%o1, 1, %o1
+1:	brz,a,pn	%g1, 3f
+	 andcc		%o2, 1, %g0
+	andcc		%o0, 2, %g0	
+	be,a,pt		%icc, 1f
+	 srl		%g1, 1, %g1
+	EX_LD(LOAD(lduh, %o0, %o4))
+	sub		%o2, 2, %o2	
+	srl		%o4, 8, %g2
+	sub		%g1, 1, %g1	
+	EX_ST(STORE(stb, %g2, %o1))
+	add		%o4, GLOBAL_SPARE, GLOBAL_SPARE
+	EX_ST(STORE(stb, %o4, %o1 + 1))
+	add		%o0, 2, %o0	
+	srl		%g1, 1, %g1
+	add		%o1, 2, %o1
+1:	brz,a,pn	%g1, 2f		
+	 andcc		%o2, 2, %g0
+	EX_LD(LOAD(lduw, %o0, %o4))
+5:	srl		%o4, 24, %g2
+	srl		%o4, 16, %g3
+	EX_ST(STORE(stb, %g2, %o1))
+	srl		%o4, 8, %g2
+	EX_ST(STORE(stb, %g3, %o1 + 1))
+	add		%o0, 4, %o0
+	EX_ST(STORE(stb, %g2, %o1 + 2))
+	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
+	EX_ST(STORE(stb, %o4, %o1 + 3))
+	addc		GLOBAL_SPARE, %g0, GLOBAL_SPARE
+	add		%o1, 4, %o1
+	subcc		%g1, 1, %g1
+	bne,a,pt	%icc, 5b
+	 EX_LD(LOAD(lduw, %o0, %o4))
+	sll		GLOBAL_SPARE, 16, %g2
+	srl		GLOBAL_SPARE, 16, GLOBAL_SPARE
+	srl		%g2, 16, %g2
+	andcc		%o2, 2, %g0
+	add		%g2, GLOBAL_SPARE, GLOBAL_SPARE 
+2:	be,a,pt		%icc, 3f		
+	 andcc		%o2, 1, %g0
+	EX_LD(LOAD(lduh, %o0, %o4))
+	andcc		%o2, 1, %g0
+	srl		%o4, 8, %g2
+	add		%o0, 2, %o0	
+	EX_ST(STORE(stb, %g2, %o1))
+	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
+	EX_ST(STORE(stb, %o4, %o1 + 1))
+	add		%o1, 2, %o1
+3:	be,a,pt		%icc, 1f		
+	 sll		GLOBAL_SPARE, 16, %o4
+	EX_LD(LOAD(ldub, %o0, %g2))
+	sll		%g2, 8, %o4	
+	EX_ST(STORE(stb, %g2, %o1))
+	add		GLOBAL_SPARE, %o4, GLOBAL_SPARE
+	sll		GLOBAL_SPARE, 16, %o4
+1:	addcc		%o4, GLOBAL_SPARE, GLOBAL_SPARE
+	srl		GLOBAL_SPARE, 16, %o4
+	addc		%g0, %o4, GLOBAL_SPARE
+	brz,pt		%o5, 4f
+	 srl		GLOBAL_SPARE, 8, %o4
+	and		GLOBAL_SPARE, 0xff, %g2
+	and		%o4, 0xff, %o4
+	sll		%g2, 8, %g2
+	or		%g2, %o4, GLOBAL_SPARE
+4:	addcc		%o3, GLOBAL_SPARE, %o3
+	addc		%g0, %o3, %o0
+	retl
+	 srl		%o0, 0, %o0
+	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/csum_copy_from_user.S b/arch/sparc/lib/csum_copy_from_user.S
new file mode 100644
index 0000000..d20b959
--- /dev/null
+++ b/arch/sparc/lib/csum_copy_from_user.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* csum_copy_from_user.S: Checksum+copy from userspace.
+ *
+ * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_LD(x)		\
+98:	x;			\
+	.section .fixup, "ax";	\
+	.align 4;		\
+99:	retl;			\
+	 mov	-1, %o0;	\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, 99b;		\
+	.text;			\
+	.align 4;
+
+#define FUNC_NAME		__csum_partial_copy_from_user
+#define LOAD(type,addr,dest)	type##a [addr] %asi, dest
+
+#include "csum_copy.S"
diff --git a/arch/sparc/lib/csum_copy_to_user.S b/arch/sparc/lib/csum_copy_to_user.S
new file mode 100644
index 0000000..d71c0c8
--- /dev/null
+++ b/arch/sparc/lib/csum_copy_to_user.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* csum_copy_to_user.S: Checksum+copy to userspace.
+ *
+ * Copyright (C) 2005 David S. Miller (davem@davemloft.net)
+ */
+
+#define EX_ST(x)		\
+98:	x;			\
+	.section .fixup,"ax";	\
+	.align 4;		\
+99:	retl;			\
+	 mov	-1, %o0;	\
+	.section __ex_table,"a";\
+	.align 4;		\
+	.word 98b, 99b;		\
+	.text;			\
+	.align 4;
+
+#define FUNC_NAME		__csum_partial_copy_to_user
+#define STORE(type,src,addr)	type##a src, [addr] %asi
+
+#include "csum_copy.S"
diff --git a/arch/sparc/lib/divdi3.S b/arch/sparc/lib/divdi3.S
new file mode 100644
index 0000000..a2b5a97
--- /dev/null
+++ b/arch/sparc/lib/divdi3.S
@@ -0,0 +1,283 @@
+/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include <asm/export.h>
+	.text
+	.align 4
+	.globl __divdi3
+__divdi3:
+	save %sp,-104,%sp
+	cmp %i0,0
+	bge .LL40
+	mov 0,%l4
+	mov -1,%l4
+	sub %g0,%i1,%o0
+	mov %o0,%o5
+	subcc %g0,%o0,%g0
+	sub %g0,%i0,%o0
+	subx %o0,0,%o4
+	mov %o4,%i0
+	mov %o5,%i1
+.LL40:
+	cmp %i2,0
+	bge .LL84
+	mov %i3,%o4
+	xnor %g0,%l4,%l4
+	sub %g0,%i3,%o0
+	mov %o0,%o3
+	subcc %g0,%o0,%g0
+	sub %g0,%i2,%o0
+	subx %o0,0,%o2
+	mov %o2,%i2
+	mov %o3,%i3
+	mov %i3,%o4
+.LL84:
+	cmp %i2,0
+	bne .LL45
+	mov %i1,%i3
+	cmp %o4,%i0
+	bleu .LL46
+	mov %i3,%o1
+	mov	32,%g1
+	subcc	%i0,%o4,%g0
+1:	bcs	5f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	sub	%i0,%o4,%i0	! this kills msb of n
+	addx	%i0,%i0,%i0	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%i0,%o4,%g0
+	bcs	3f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%i0,%o4,%i0	! this kills msb of n
+4:	sub	%i0,%o4,%i0
+5:	addxcc	%i0,%i0,%i0
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
+	sub	%i0,%o4,%i0
+3:	xnor	%o1,0,%o1
+	b .LL50
+	mov 0,%o2
+.LL46:
+	cmp %o4,0
+	bne .LL85
+	mov %i0,%o2
+	mov 1,%o0
+	mov 0,%o1
+	wr %g0, 0, %y
+	udiv %o0, %o1, %o0
+	mov %o0,%o4
+	mov %i0,%o2
+.LL85:
+	mov 0,%g3
+	mov	32,%g1
+	subcc	%g3,%o4,%g0
+1:	bcs	5f
+	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
+	sub	%g3,%o4,%g3	! this kills msb of n
+	addx	%g3,%g3,%g3	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%g3,%o4,%g0
+	bcs	3f
+	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%g3,%o4,%g3	! this kills msb of n
+4:	sub	%g3,%o4,%g3
+5:	addxcc	%g3,%g3,%g3
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o2,%o2,%o2	! shift n1n0 and a 0-bit in lsb
+	sub	%g3,%o4,%g3
+3:	xnor	%o2,0,%o2
+	mov %g3,%i0
+	mov %i3,%o1
+	mov	32,%g1
+	subcc	%i0,%o4,%g0
+1:	bcs	5f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	sub	%i0,%o4,%i0	! this kills msb of n
+	addx	%i0,%i0,%i0	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%i0,%o4,%g0
+	bcs	3f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%i0,%o4,%i0	! this kills msb of n
+4:	sub	%i0,%o4,%i0
+5:	addxcc	%i0,%i0,%i0
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
+	sub	%i0,%o4,%i0
+3:	xnor	%o1,0,%o1
+	b .LL86
+	mov %o1,%l1
+.LL45:
+	cmp %i2,%i0
+	bleu .LL51
+	sethi %hi(65535),%o0
+	b .LL78
+	mov 0,%o1
+.LL51:
+	or %o0,%lo(65535),%o0
+	cmp %i2,%o0
+	bgu .LL58
+	mov %i2,%o1
+	cmp %i2,256
+	addx %g0,-1,%o0
+	b .LL64
+	and %o0,8,%o2
+.LL58:
+	sethi %hi(16777215),%o0
+	or %o0,%lo(16777215),%o0
+	cmp %i2,%o0
+	bgu .LL64
+	mov 24,%o2
+	mov 16,%o2
+.LL64:
+	srl %o1,%o2,%o0
+	sethi %hi(__clz_tab),%o1
+	or %o1,%lo(__clz_tab),%o1
+	ldub [%o0+%o1],%o0
+	add %o0,%o2,%o0
+	mov 32,%o1
+	subcc %o1,%o0,%o3
+	bne,a .LL72
+	sub %o1,%o3,%o1
+	cmp %i0,%i2
+	bgu .LL74
+	cmp %i3,%o4
+	blu .LL78
+	mov 0,%o1
+.LL74:
+	b .LL78
+	mov 1,%o1
+.LL72:
+	sll %i2,%o3,%o2
+	srl %o4,%o1,%o0
+	or %o2,%o0,%i2
+	sll %o4,%o3,%o4
+	srl %i0,%o1,%o2
+	sll %i0,%o3,%o0
+	srl %i3,%o1,%o1
+	or %o0,%o1,%i0
+	sll %i3,%o3,%i3
+	mov %i0,%o1
+	mov	32,%g1
+	subcc	%o2,%i2,%g0
+1:	bcs	5f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	sub	%o2,%i2,%o2	! this kills msb of n
+	addx	%o2,%o2,%o2	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%o2,%i2,%g0
+	bcs	3f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%o2,%i2,%o2	! this kills msb of n
+4:	sub	%o2,%i2,%o2
+5:	addxcc	%o2,%o2,%o2
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
+	sub	%o2,%i2,%o2
+3:	xnor	%o1,0,%o1
+	mov %o2,%i0
+	wr	%g0,%o1,%y	! SPARC has 0-3 delay insn after a wr
+	sra	%o4,31,%g2	! Do not move this insn
+	and	%o1,%g2,%g2	! Do not move this insn
+	andcc	%g0,0,%g1	! Do not move this insn
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,%o4,%g1
+	mulscc	%g1,0,%g1
+	add	%g1,%g2,%o0
+	rd	%y,%o2
+	cmp %o0,%i0
+	bgu,a .LL78
+	add %o1,-1,%o1
+	bne,a .LL50
+	mov 0,%o2
+	cmp %o2,%i3
+	bleu .LL50
+	mov 0,%o2
+	add %o1,-1,%o1
+.LL78:
+	mov 0,%o2
+.LL50:
+	mov %o1,%l1
+.LL86:
+	mov %o2,%l0
+	mov %l0,%i0
+	mov %l1,%i1
+	cmp %l4,0
+	be .LL81
+	sub %g0,%i1,%o0
+	mov %o0,%l3
+	subcc %g0,%o0,%g0
+	sub %g0,%i0,%o0
+	subx %o0,0,%l2
+	mov %l2,%i0
+	mov %l3,%i1
+.LL81:
+	ret
+	restore
+EXPORT_SYMBOL(__divdi3)
diff --git a/arch/sparc/lib/ffs.S b/arch/sparc/lib/ffs.S
new file mode 100644
index 0000000..5a11d86
--- /dev/null
+++ b/arch/sparc/lib/ffs.S
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.register	%g2,#scratch
+
+	.text
+	.align	32
+
+ENTRY(ffs)
+	brnz,pt	%o0, 1f
+	 mov	1, %o1
+	retl
+	 clr	%o0
+	nop
+	nop
+ENTRY(__ffs)
+	sllx	%o0, 32, %g1		/* 1  */
+	srlx	%o0, 32, %g2
+
+	clr	%o1			/* 2  */
+	movrz	%g1, %g2, %o0
+
+	movrz	%g1, 32, %o1		/* 3  */
+1:	clr	%o2
+
+	sllx	%o0, (64 - 16), %g1	/* 4  */
+	srlx	%o0, 16, %g2
+
+	movrz	%g1, %g2, %o0		/* 5  */
+	clr	%o3
+
+	movrz	%g1, 16, %o2		/* 6  */
+	clr	%o4
+
+	and	%o0, 0xff, %g1		/* 7  */
+	srlx	%o0, 8, %g2
+
+	movrz	%g1, %g2, %o0		/* 8  */
+	clr	%o5
+
+	movrz	%g1, 8, %o3		/* 9  */
+	add	%o2, %o1, %o2
+
+	and	%o0, 0xf, %g1		/* 10 */
+	srlx	%o0, 4, %g2
+
+	movrz	%g1, %g2, %o0		/* 11 */
+	add	%o2, %o3, %o2
+
+	movrz	%g1, 4, %o4		/* 12 */
+
+	and	%o0, 0x3, %g1		/* 13 */
+	srlx	%o0, 2, %g2
+
+	movrz	%g1, %g2, %o0		/* 14 */
+	add	%o2, %o4, %o2
+
+	movrz	%g1, 2, %o5		/* 15 */
+
+	and	%o0, 0x1, %g1		/* 16 */
+
+	add	%o2, %o5, %o2		/* 17 */
+	xor	%g1, 0x1, %g1
+
+	retl				/* 18 */
+	 add	%o2, %g1, %o0
+ENDPROC(ffs)
+ENDPROC(__ffs)
+EXPORT_SYMBOL(__ffs)
+EXPORT_SYMBOL(ffs)
+
+	.section	.popc_6insn_patch, "ax"
+	.word		ffs
+	brz,pn	%o0, 98f
+	 neg	%o0, %g1
+	xnor	%o0, %g1, %o1
+	popc	%o1, %o0
+98:	retl
+	 nop
+	.word		__ffs
+	neg	%o0, %g1
+	xnor	%o0, %g1, %o1
+	popc	%o1, %o0
+	retl
+	 sub	%o0, 1, %o0
+	nop
+	.previous
diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S
new file mode 100644
index 0000000..06b8d30
--- /dev/null
+++ b/arch/sparc/lib/fls.S
@@ -0,0 +1,67 @@
+/* fls.S: SPARC default fls definition.
+ *
+ * SPARC default fls definition, which follows the same algorithm as
+ * in generic fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+ENTRY(fls)
+	brz,pn	%o0, 6f
+	 mov	0, %o1
+	sethi	%hi(0xffff0000), %g3
+	mov	%o0, %g2
+	andcc	%o0, %g3, %g0
+	be,pt	%icc, 8f
+	 mov	32, %o1
+	sethi	%hi(0xff000000), %g3
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 3f
+	 sethi	%hi(0xf0000000), %g3
+	sll	%o0, 8, %o0
+1:
+	add	%o1, -8, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+2:
+	sethi	%hi(0xf0000000), %g3
+3:
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 4f
+	 sethi	%hi(0xc0000000), %g3
+	sll	%o0, 4, %o0
+	add	%o1, -4, %o1
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+4:
+	andcc	%g2, %g3, %g0
+	be,a,pt	%icc, 7f
+	 sll	%o0, 2, %o0
+5:
+	xnor	%g0, %o0, %o0
+	srl	%o0, 31, %o0
+	sub	%o1, %o0, %o1
+6:
+	jmp	%o7 + 8
+	 sra	%o1, 0, %o0
+7:
+	add	%o1, -2, %o1
+	ba,pt	%xcc, 5b
+	 sra	%o0, 0, %o0
+8:
+	sll	%o0, 16, %o0
+	sethi	%hi(0xff000000), %g3
+	sra	%o0, 0, %o0
+	mov	%o0, %g2
+	andcc	%g2, %g3, %g0
+	bne,pt	%icc, 2b
+	 mov	16, %o1
+	ba,pt	%xcc, 1b
+	 sll	%o0, 8, %o0
+ENDPROC(fls)
+EXPORT_SYMBOL(fls)
diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S
new file mode 100644
index 0000000..c83e22a
--- /dev/null
+++ b/arch/sparc/lib/fls64.S
@@ -0,0 +1,61 @@
+/* fls64.S: SPARC default __fls definition.
+ *
+ * SPARC default __fls definition, which follows the same algorithm as
+ * in generic __fls(). This function will be boot time patched on T4
+ * and onward.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.register	%g2, #scratch
+	.register	%g3, #scratch
+ENTRY(__fls)
+	mov	-1, %g2
+	sllx	%g2, 32, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 1f
+	 mov	63, %g1
+	sllx	%o0, 32, %o0
+	mov	31, %g1
+1:
+	mov	-1, %g2
+	sllx	%g2, 48, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 2f
+	 mov	-1, %g2
+	sllx	%o0, 16, %o0
+	add	%g1, -16, %g1
+2:
+	mov	-1, %g2
+	sllx	%g2, 56, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 3f
+	 mov	-1, %g2
+	sllx	%o0, 8, %o0
+	add	%g1, -8, %g1
+3:
+	sllx	%g2, 60, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 4f
+	 mov	-1, %g2
+	sllx	%o0, 4, %o0
+	add	%g1, -4, %g1
+4:
+	sllx	%g2, 62, %g2
+	and	%o0, %g2, %g2
+	brnz,pt	%g2, 5f
+	 mov	-1, %g3
+	sllx	%o0, 2, %o0
+	add	%g1, -2, %g1
+5:
+	mov	0, %g2
+	sllx	%g3, 63, %g3
+	and	%o0, %g3, %o0
+	movre	%o0, 1, %g2
+	sub	%g1, %g2, %g1
+	jmp	%o7+8
+	 sra	%g1, 0, %o0
+ENDPROC(__fls)
+EXPORT_SYMBOL(__fls)
diff --git a/arch/sparc/lib/hweight.S b/arch/sparc/lib/hweight.S
new file mode 100644
index 0000000..0ddbbb0
--- /dev/null
+++ b/arch/sparc/lib/hweight.S
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.align	32
+ENTRY(__arch_hweight8)
+	sethi	%hi(__sw_hweight8), %g1
+	jmpl	%g1 + %lo(__sw_hweight8), %g0
+	 nop
+ENDPROC(__arch_hweight8)
+EXPORT_SYMBOL(__arch_hweight8)
+	.section	.popc_3insn_patch, "ax"
+	.word		__arch_hweight8
+	sllx		%o0, 64-8, %g1
+	retl
+	 popc		%g1, %o0
+	.previous
+
+ENTRY(__arch_hweight16)
+	sethi	%hi(__sw_hweight16), %g1
+	jmpl	%g1 + %lo(__sw_hweight16), %g0
+	 nop
+ENDPROC(__arch_hweight16)
+EXPORT_SYMBOL(__arch_hweight16)
+	.section	.popc_3insn_patch, "ax"
+	.word		__arch_hweight16
+	sllx		%o0, 64-16, %g1
+	retl
+	 popc		%g1, %o0
+	.previous
+
+ENTRY(__arch_hweight32)
+	sethi	%hi(__sw_hweight32), %g1
+	jmpl	%g1 + %lo(__sw_hweight32), %g0
+	 nop
+ENDPROC(__arch_hweight32)
+EXPORT_SYMBOL(__arch_hweight32)
+	.section	.popc_3insn_patch, "ax"
+	.word		__arch_hweight32
+	sllx		%o0, 64-32, %g1
+	retl
+	 popc		%g1, %o0
+	.previous
+
+ENTRY(__arch_hweight64)
+	sethi	%hi(__sw_hweight64), %g1
+	jmpl	%g1 + %lo(__sw_hweight64), %g0
+	 nop
+ENDPROC(__arch_hweight64)
+EXPORT_SYMBOL(__arch_hweight64)
+	.section	.popc_3insn_patch, "ax"
+	.word		__arch_hweight64
+	retl
+	 popc		%o0, %o0
+	nop
+	.previous
diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c
new file mode 100644
index 0000000..c9da9f1
--- /dev/null
+++ b/arch/sparc/lib/iomap.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement the sparc iomap interfaces
+ */
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <asm/io.h>
+
+/* Create a virtual mapping cookie for an IO port range */
+void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return (void __iomem *) (unsigned long) port;
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+	/* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
+{
+	/* nothing to do */
+}
+EXPORT_SYMBOL(pci_iounmap);
diff --git a/arch/sparc/lib/ipcsum.S b/arch/sparc/lib/ipcsum.S
new file mode 100644
index 0000000..531d89c
--- /dev/null
+++ b/arch/sparc/lib/ipcsum.S
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
+	sub	%o1, 4, %g7
+	lduw	[%o0 + 0x00], %o2
+	lduw	[%o0 + 0x04], %g2
+	lduw	[%o0 + 0x08], %g3
+	addcc	%g2, %o2, %o2
+	lduw	[%o0 + 0x0c], %g2
+	addccc	%g3, %o2, %o2
+	lduw	[%o0 + 0x10], %g3
+
+	addccc	%g2, %o2, %o2
+	addc	%o2, %g0, %o2
+1:	addcc	%g3, %o2, %o2
+	add	%o0, 4, %o0
+	addccc	%o2, %g0, %o2
+	subcc	%g7, 1, %g7
+	be,a,pt	%icc, 2f
+	 sll	%o2, 16, %g2
+
+	lduw	[%o0 + 0x10], %g3
+	ba,pt	%xcc, 1b
+	 nop
+2:	addcc	%o2, %g2, %g2
+	srl	%g2, 16, %o2
+	addc	%o2, %g0, %o2
+	xnor	%g0, %o2, %o2
+	set	0xffff, %o1
+	retl
+	 and	%o2, %o1, %o0
+ENDPROC(ip_fast_csum)
+EXPORT_SYMBOL(ip_fast_csum)
diff --git a/arch/sparc/lib/libgcc.h b/arch/sparc/lib/libgcc.h
new file mode 100644
index 0000000..79845c9
--- /dev/null
+++ b/arch/sparc/lib/libgcc.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_LIBGCC_H
+#define __ASM_LIBGCC_H
+
+#include <asm/byteorder.h>
+
+typedef int word_type __attribute__ ((mode (__word__)));
+
+struct DWstruct {
+	int high, low;
+};
+
+typedef union
+{
+	struct DWstruct s;
+	long long ll;
+} DWunion;
+
+#endif /* __ASM_LIBGCC_H */
diff --git a/arch/sparc/lib/locks.S b/arch/sparc/lib/locks.S
new file mode 100644
index 0000000..9a1289a
--- /dev/null
+++ b/arch/sparc/lib/locks.S
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * locks.S: SMP low-level lock primitives on Sparc.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1998 Anton Blanchard (anton@progsoc.uts.edu.au)
+ * Copyright (C) 1998 Jakub Jelinek   (jj@ultra.linux.cz)
+ */
+
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/smp.h>
+#include <asm/spinlock.h>
+#include <asm/export.h>
+
+	.text
+	.align	4
+
+	/* Read/writer locks, as usual this is overly clever to make it
+	 * as fast as possible.
+	 */
+
+	/* caches... */
+___rw_read_enter_spin_on_wlock:
+	orcc	%g2, 0x0, %g0
+	be,a	___rw_read_enter
+	 ldstub	[%g1 + 3], %g2
+	b	___rw_read_enter_spin_on_wlock
+	 ldub	[%g1 + 3], %g2
+___rw_read_try_spin_on_wlock:
+	andcc	%g2, 0xff, %g0
+	be,a	___rw_read_try
+	 ldstub	[%g1 + 3], %g2
+	xnorcc	%g2, 0x0, %o0	/* if g2 is ~0, set o0 to 0 and bugger off */
+	bne,a	___rw_read_enter_spin_on_wlock
+	 ld	[%g1], %g2
+	retl
+	 mov	%g4, %o7
+___rw_read_exit_spin_on_wlock:
+	orcc	%g2, 0x0, %g0
+	be,a	___rw_read_exit
+	 ldstub	[%g1 + 3], %g2
+	b	___rw_read_exit_spin_on_wlock
+	 ldub	[%g1 + 3], %g2
+___rw_write_enter_spin_on_wlock:
+	orcc	%g2, 0x0, %g0
+	be,a	___rw_write_enter
+	 ldstub	[%g1 + 3], %g2
+	b	___rw_write_enter_spin_on_wlock
+	 ld	[%g1], %g2
+
+	.globl	___rw_read_enter
+EXPORT_SYMBOL(___rw_read_enter)
+___rw_read_enter:
+	orcc	%g2, 0x0, %g0
+	bne,a	___rw_read_enter_spin_on_wlock
+	 ldub	[%g1 + 3], %g2
+	ld	[%g1], %g2
+	add	%g2, 1, %g2
+	st	%g2, [%g1]
+	retl
+	 mov	%g4, %o7
+
+	.globl	___rw_read_exit
+EXPORT_SYMBOL(___rw_read_exit)
+___rw_read_exit:
+	orcc	%g2, 0x0, %g0
+	bne,a	___rw_read_exit_spin_on_wlock
+	 ldub	[%g1 + 3], %g2
+	ld	[%g1], %g2
+	sub	%g2, 0x1ff, %g2
+	st	%g2, [%g1]
+	retl
+	 mov	%g4, %o7
+
+	.globl	___rw_read_try
+EXPORT_SYMBOL(___rw_read_try)
+___rw_read_try:
+	orcc	%g2, 0x0, %g0
+	bne	___rw_read_try_spin_on_wlock
+	 ld	[%g1], %g2
+	add	%g2, 1, %g2
+	st	%g2, [%g1]
+	set	1, %o1
+	retl
+	 mov	%g4, %o7
+
+	.globl	___rw_write_enter
+EXPORT_SYMBOL(___rw_write_enter)
+___rw_write_enter:
+	orcc	%g2, 0x0, %g0
+	bne	___rw_write_enter_spin_on_wlock
+	 ld	[%g1], %g2
+	andncc	%g2, 0xff, %g0
+	bne,a	___rw_write_enter_spin_on_wlock
+	 stb	%g0, [%g1 + 3]
+	retl
+	 mov	%g4, %o7
diff --git a/arch/sparc/lib/lshrdi3.S b/arch/sparc/lib/lshrdi3.S
new file mode 100644
index 0000000..509ca66
--- /dev/null
+++ b/arch/sparc/lib/lshrdi3.S
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+ENTRY(__lshrdi3)
+	cmp	%o2, 0
+	be	3f
+	 mov	0x20, %g2
+
+	sub	%g2, %o2, %g2
+	cmp	%g2, 0
+	bg	1f
+	 srl	%o0, %o2, %o4
+
+	clr	%o4
+	neg	%g2
+	b	2f
+	 srl	%o0, %g2, %o5
+1:
+	sll  %o0, %g2, %g3
+	srl  %o1, %o2, %g2
+	or  %g2, %g3, %o5
+2:
+	mov  %o4, %o0
+	mov  %o5, %o1
+3:
+	retl 
+	 nop 
+ENDPROC(__lshrdi3)
+EXPORT_SYMBOL(__lshrdi3)
diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S
new file mode 100644
index 0000000..deba6fa
--- /dev/null
+++ b/arch/sparc/lib/mcount.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com)
+ *
+ * This file implements mcount(), which is used to collect profiling data.
+ * This can also be tweaked for kernel stack overflow detection.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+/*
+ * This is the main variant and is called by C code.  GCC's -pg option
+ * automatically instruments every C function with a call to this.
+ */
+
+	.text
+	.align		32
+	.globl		_mcount
+	.type		_mcount,#function
+	EXPORT_SYMBOL(_mcount)
+	.globl		mcount
+	.type		mcount,#function
+_mcount:
+mcount:
+#ifdef CONFIG_FUNCTION_TRACER
+#ifdef CONFIG_DYNAMIC_FTRACE
+	/* Do nothing, the retl/nop below is all we need.  */
+#else
+	sethi		%hi(ftrace_trace_function), %g1
+	sethi		%hi(ftrace_stub), %g2
+	ldx		[%g1 + %lo(ftrace_trace_function)], %g1
+	or		%g2, %lo(ftrace_stub), %g2
+	cmp		%g1, %g2
+	be,pn		%icc, 1f
+	 mov		%i7, %g3
+	save		%sp, -176, %sp
+	mov		%g3, %o1
+	jmpl		%g1, %o7
+	 mov		%i7, %o0
+	ret
+	 restore
+	/* not reached */
+1:
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	sethi		%hi(ftrace_graph_return), %g1
+	ldx		[%g1 + %lo(ftrace_graph_return)], %g3
+	cmp		%g2, %g3
+	bne,pn		%xcc, 5f
+	 sethi		%hi(ftrace_graph_entry_stub), %g2
+	sethi		%hi(ftrace_graph_entry), %g1
+	or		%g2, %lo(ftrace_graph_entry_stub), %g2
+	ldx		[%g1 + %lo(ftrace_graph_entry)], %g1
+	cmp		%g1, %g2
+	be,pt		%xcc, 2f
+	 nop
+5:	mov		%i7, %g2
+	mov		%fp, %g3
+	save		%sp, -176, %sp
+	mov		%g2, %l0
+	ba,pt		%xcc, ftrace_graph_caller
+	 mov		%g3, %l1
+#endif
+2:
+#endif
+#endif
+	retl
+	 nop
+	.size		_mcount,.-_mcount
+	.size		mcount,.-mcount
+
+#ifdef CONFIG_FUNCTION_TRACER
+	.globl		ftrace_stub
+	.type		ftrace_stub,#function
+ftrace_stub:
+	retl
+	 nop
+	.size		ftrace_stub,.-ftrace_stub
+#ifdef CONFIG_DYNAMIC_FTRACE
+	.globl		ftrace_caller
+	.type		ftrace_caller,#function
+ftrace_caller:
+	mov		%i7, %g2
+	mov		%fp, %g3
+	save		%sp, -176, %sp
+	mov		%g2, %o1
+	mov		%g2, %l0
+	mov		%g3, %l1
+	.globl		ftrace_call
+ftrace_call:
+	call		ftrace_stub
+	 mov		%i7, %o0
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.globl		ftrace_graph_call
+ftrace_graph_call:
+	call		ftrace_stub
+	 nop
+#endif
+	ret
+	 restore
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	.size		ftrace_graph_call,.-ftrace_graph_call
+#endif
+	.size		ftrace_call,.-ftrace_call
+	.size		ftrace_caller,.-ftrace_caller
+#endif
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+	mov		%l0, %o0
+	mov		%i7, %o1
+	call		prepare_ftrace_return
+	 mov		%l1, %o2
+	ret
+	 restore	%o0, -8, %i7
+END(ftrace_graph_caller)
+
+ENTRY(return_to_handler)
+	save		%sp, -176, %sp
+	call		ftrace_return_to_handler
+	 mov		%fp, %o0
+	jmpl		%o0 + 8, %g0
+	 restore
+END(return_to_handler)
+#endif
diff --git a/arch/sparc/lib/memcmp.S b/arch/sparc/lib/memcmp.S
new file mode 100644
index 0000000..a18076e
--- /dev/null
+++ b/arch/sparc/lib/memcmp.S
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Sparc optimized memcmp code.
+ *
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/export.h>
+
+	.text
+ENTRY(memcmp)
+	cmp	%o2, 0
+1:	BRANCH32(be, pn, 2f)
+	 nop
+	ldub	[%o0], %g7
+	ldub	[%o1], %g3
+	sub	%o2, 1, %o2
+	add	%o0, 1, %o0
+	add	%o1, 1, %o1
+	subcc	%g7, %g3, %g3
+	BRANCH32(be, pt, 1b)
+	 cmp	%o2, 0
+	retl
+	 mov	%g3, %o0
+2:	retl
+	 mov	0, %o0
+ENDPROC(memcmp)
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/sparc/lib/memcpy.S b/arch/sparc/lib/memcpy.S
new file mode 100644
index 0000000..ee823d8
--- /dev/null
+++ b/arch/sparc/lib/memcpy.S
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* memcpy.S: Sparc optimized memcpy and memmove code
+ * Hand optimized from GNU libc's memcpy and memmove
+ * Copyright (C) 1991,1996 Free Software Foundation
+ * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi)
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <asm/export.h>
+#define FUNC(x) 		\
+	.globl	x;		\
+	.type	x,@function;	\
+	.align	4;		\
+x:
+
+/* Both these macros have to start with exactly the same insn */
+#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
+	ldd	[%src + (offset) + 0x00], %t0; \
+	ldd	[%src + (offset) + 0x08], %t2; \
+	ldd	[%src + (offset) + 0x10], %t4; \
+	ldd	[%src + (offset) + 0x18], %t6; \
+	st	%t0, [%dst + (offset) + 0x00]; \
+	st	%t1, [%dst + (offset) + 0x04]; \
+	st	%t2, [%dst + (offset) + 0x08]; \
+	st	%t3, [%dst + (offset) + 0x0c]; \
+	st	%t4, [%dst + (offset) + 0x10]; \
+	st	%t5, [%dst + (offset) + 0x14]; \
+	st	%t6, [%dst + (offset) + 0x18]; \
+	st	%t7, [%dst + (offset) + 0x1c];
+
+#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
+	ldd	[%src + (offset) + 0x00], %t0; \
+	ldd	[%src + (offset) + 0x08], %t2; \
+	ldd	[%src + (offset) + 0x10], %t4; \
+	ldd	[%src + (offset) + 0x18], %t6; \
+	std	%t0, [%dst + (offset) + 0x00]; \
+	std	%t2, [%dst + (offset) + 0x08]; \
+	std	%t4, [%dst + (offset) + 0x10]; \
+	std	%t6, [%dst + (offset) + 0x18];
+
+#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
+	ldd	[%src - (offset) - 0x10], %t0; \
+	ldd	[%src - (offset) - 0x08], %t2; \
+	st	%t0, [%dst - (offset) - 0x10]; \
+	st	%t1, [%dst - (offset) - 0x0c]; \
+	st	%t2, [%dst - (offset) - 0x08]; \
+	st	%t3, [%dst - (offset) - 0x04];
+
+#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
+	ldd	[%src - (offset) - 0x10], %t0; \
+	ldd	[%src - (offset) - 0x08], %t2; \
+	std	%t0, [%dst - (offset) - 0x10]; \
+	std	%t2, [%dst - (offset) - 0x08];
+
+#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
+	ldub	[%src - (offset) - 0x02], %t0; \
+	ldub	[%src - (offset) - 0x01], %t1; \
+	stb	%t0, [%dst - (offset) - 0x02]; \
+	stb	%t1, [%dst - (offset) - 0x01];
+
+	.text
+	.align	4
+
+FUNC(memmove)
+EXPORT_SYMBOL(memmove)
+	cmp		%o0, %o1
+	mov		%o0, %g7
+	bleu		9f
+	 sub		%o0, %o1, %o4
+
+	add		%o1, %o2, %o3
+	cmp		%o3, %o0
+	bleu		0f
+	 andcc		%o4, 3, %o5
+
+	add		%o1, %o2, %o1
+	add		%o0, %o2, %o0
+	sub		%o1, 1, %o1
+	sub		%o0, 1, %o0
+	
+1:	/* reverse_bytes */
+
+	ldub		[%o1], %o4
+	subcc		%o2, 1, %o2
+	stb		%o4, [%o0]
+	sub		%o1, 1, %o1
+	bne		1b
+	 sub		%o0, 1, %o0
+
+	retl
+	 mov		%g7, %o0
+
+/* NOTE: This code is executed just for the cases,
+         where %src (=%o1) & 3 is != 0.
+	 We need to align it to 4. So, for (%src & 3)
+	 1 we need to do ldub,lduh
+	 2 lduh
+	 3 just ldub
+         so even if it looks weird, the branches
+         are correct here. -jj
+ */
+78:	/* dword_align */
+
+	andcc		%o1, 1, %g0
+	be		4f
+	 andcc		%o1, 2, %g0
+
+	ldub		[%o1], %g2
+	add		%o1, 1, %o1
+	stb		%g2, [%o0]
+	sub		%o2, 1, %o2
+	bne		3f
+	 add		%o0, 1, %o0
+4:
+	lduh		[%o1], %g2
+	add		%o1, 2, %o1
+	sth		%g2, [%o0]
+	sub		%o2, 2, %o2
+	b		3f
+	 add		%o0, 2, %o0
+
+FUNC(memcpy)	/* %o0=dst %o1=src %o2=len */
+EXPORT_SYMBOL(memcpy)
+
+	sub		%o0, %o1, %o4
+	mov		%o0, %g7
+9:
+	andcc		%o4, 3, %o5
+0:
+	bne		86f
+	 cmp		%o2, 15
+
+	bleu		90f
+	 andcc		%o1, 3, %g0
+
+	bne		78b
+3:
+	 andcc		%o1, 4, %g0
+
+	be		2f
+	 mov		%o2, %g1
+
+	ld		[%o1], %o4
+	sub		%g1, 4, %g1
+	st		%o4, [%o0]
+	add		%o1, 4, %o1
+	add		%o0, 4, %o0
+2:
+	andcc		%g1, 0xffffff80, %g0
+	be		3f
+	 andcc		%o0, 4, %g0
+
+	be		82f + 4
+5:
+	MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
+	sub		%g1, 128, %g1
+	add		%o1, 128, %o1
+	cmp		%g1, 128
+	bge		5b
+	 add		%o0, 128, %o0
+3:
+	andcc		%g1, 0x70, %g4
+	be		80f
+	 andcc		%g1, 8, %g0
+
+	sethi		%hi(80f), %o5
+	srl		%g4, 1, %o4
+	add		%g4, %o4, %o4
+	add		%o1, %g4, %o1
+	sub		%o5, %o4, %o5
+	jmpl		%o5 + %lo(80f), %g0
+	 add		%o0, %g4, %o0
+
+79:	/* memcpy_table */
+
+	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
+	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
+
+80:	/* memcpy_table_end */
+	be		81f
+	 andcc		%g1, 4, %g0
+
+	ldd		[%o1], %g2
+	add		%o0, 8, %o0
+	st		%g2, [%o0 - 0x08]
+	add		%o1, 8, %o1
+	st		%g3, [%o0 - 0x04]
+
+81:	/* memcpy_last7 */
+
+	be		1f
+	 andcc		%g1, 2, %g0
+
+	ld		[%o1], %g2
+	add		%o1, 4, %o1
+	st		%g2, [%o0]
+	add		%o0, 4, %o0
+1:
+	be		1f
+	 andcc		%g1, 1, %g0
+
+	lduh		[%o1], %g2
+	add		%o1, 2, %o1
+	sth		%g2, [%o0]
+	add		%o0, 2, %o0
+1:
+	be		1f
+	 nop
+
+	ldub		[%o1], %g2
+	stb		%g2, [%o0]
+1:
+	retl
+	 mov		%g7, %o0
+
+82:	/* ldd_std */
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
+	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
+	subcc		%g1, 128, %g1
+	add		%o1, 128, %o1
+	cmp		%g1, 128
+	bge		82b
+	 add		%o0, 128, %o0
+
+	andcc		%g1, 0x70, %g4
+	be		84f
+	 andcc		%g1, 8, %g0
+
+	sethi		%hi(84f), %o5
+	add		%o1, %g4, %o1
+	sub		%o5, %g4, %o5
+	jmpl		%o5 + %lo(84f), %g0
+	 add		%o0, %g4, %o0
+
+83:	/* amemcpy_table */
+
+	MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
+	MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
+	MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
+	MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
+	MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
+	MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
+	MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
+
+84:	/* amemcpy_table_end */
+	be		85f
+	 andcc		%g1, 4, %g0
+
+	ldd		[%o1], %g2
+	add		%o0, 8, %o0
+	std		%g2, [%o0 - 0x08]
+	add		%o1, 8, %o1
+85:	/* amemcpy_last7 */
+	be		1f
+	 andcc		%g1, 2, %g0
+
+	ld		[%o1], %g2
+	add		%o1, 4, %o1
+	st		%g2, [%o0]
+	add		%o0, 4, %o0
+1:
+	be		1f
+	 andcc		%g1, 1, %g0
+
+	lduh		[%o1], %g2
+	add		%o1, 2, %o1
+	sth		%g2, [%o0]
+	add		%o0, 2, %o0
+1:
+	be		1f
+	 nop
+
+	ldub		[%o1], %g2
+	stb		%g2, [%o0]
+1:
+	retl
+	 mov		%g7, %o0
+
+86:	/* non_aligned */
+	cmp		%o2, 6
+	bleu		88f
+	 nop
+
+	save		%sp, -96, %sp
+	andcc		%i0, 3, %g0
+	be		61f
+	 andcc		%i0, 1, %g0
+	be		60f
+	 andcc		%i0, 2, %g0
+
+	ldub		[%i1], %g5
+	add		%i1, 1, %i1
+	stb		%g5, [%i0]
+	sub		%i2, 1, %i2
+	bne		61f
+	 add		%i0, 1, %i0
+60:
+	ldub		[%i1], %g3
+	add		%i1, 2, %i1
+	stb		%g3, [%i0]
+	sub		%i2, 2, %i2
+	ldub		[%i1 - 1], %g3
+	add		%i0, 2, %i0
+	stb		%g3, [%i0 - 1]
+61:
+	and		%i1, 3, %g2
+	and		%i2, 0xc, %g3
+	and		%i1, -4, %i1
+	cmp		%g3, 4
+	sll		%g2, 3, %g4
+	mov		32, %g2
+	be		4f
+	 sub		%g2, %g4, %l0
+	
+	blu		3f
+	 cmp		%g3, 0x8
+
+	be		2f
+	 srl		%i2, 2, %g3
+
+	ld		[%i1], %i3
+	add		%i0, -8, %i0
+	ld		[%i1 + 4], %i4
+	b		8f
+	 add		%g3, 1, %g3
+2:
+	ld		[%i1], %i4
+	add		%i0, -12, %i0
+	ld		[%i1 + 4], %i5
+	add		%g3, 2, %g3
+	b		9f
+	 add		%i1, -4, %i1
+3:
+	ld		[%i1], %g1
+	add		%i0, -4, %i0
+	ld		[%i1 + 4], %i3
+	srl		%i2, 2, %g3
+	b		7f
+	 add		%i1, 4, %i1
+4:
+	ld		[%i1], %i5
+	cmp		%i2, 7
+	ld		[%i1 + 4], %g1
+	srl		%i2, 2, %g3
+	bleu		10f
+	 add		%i1, 8, %i1
+
+	ld		[%i1], %i3
+	add		%g3, -1, %g3
+5:
+	sll		%i5, %g4, %g2
+	srl		%g1, %l0, %g5
+	or		%g2, %g5, %g2
+	st		%g2, [%i0]
+7:
+	ld		[%i1 + 4], %i4
+	sll		%g1, %g4, %g2
+	srl		%i3, %l0, %g5
+	or		%g2, %g5, %g2
+	st		%g2, [%i0 + 4]
+8:
+	ld		[%i1 + 8], %i5
+	sll		%i3, %g4, %g2
+	srl		%i4, %l0, %g5
+	or		%g2, %g5, %g2
+	st		%g2, [%i0 + 8]
+9:
+	ld		[%i1 + 12], %g1
+	sll		%i4, %g4, %g2
+	srl		%i5, %l0, %g5
+	addcc		%g3, -4, %g3
+	or		%g2, %g5, %g2
+	add		%i1, 16, %i1
+	st		%g2, [%i0 + 12]
+	add		%i0, 16, %i0
+	bne,a		5b
+	 ld		[%i1], %i3
+10:
+	sll		%i5, %g4, %g2
+	srl		%g1, %l0, %g5
+	srl		%l0, 3, %g3
+	or		%g2, %g5, %g2
+	sub		%i1, %g3, %i1
+	andcc		%i2, 2, %g0
+	st		%g2, [%i0]
+	be		1f
+	 andcc		%i2, 1, %g0
+
+	ldub		[%i1], %g2
+	add		%i1, 2, %i1
+	stb		%g2, [%i0 + 4]
+	add		%i0, 2, %i0
+	ldub		[%i1 - 1], %g2
+	stb		%g2, [%i0 + 3]
+1:
+	be		1f
+	 nop
+	ldub		[%i1], %g2
+	stb		%g2, [%i0 + 4]
+1:
+	ret
+	 restore	%g7, %g0, %o0
+
+88:	/* short_end */
+
+	and		%o2, 0xe, %o3
+20:
+	sethi		%hi(89f), %o5
+	sll		%o3, 3, %o4
+	add		%o0, %o3, %o0
+	sub		%o5, %o4, %o5
+	add		%o1, %o3, %o1
+	jmpl		%o5 + %lo(89f), %g0
+	 andcc		%o2, 1, %g0
+
+	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
+	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
+
+89:	/* short_table_end */
+
+	be		1f
+	 nop
+
+	ldub		[%o1], %g2
+	stb		%g2, [%o0]
+1:
+	retl
+	 mov		%g7, %o0
+
+90:	/* short_aligned_end */
+	bne		88b
+	 andcc		%o2, 8, %g0
+
+	be		1f
+	 andcc		%o2, 4, %g0
+
+	ld		[%o1 + 0x00], %g2
+	ld		[%o1 + 0x04], %g3
+	add		%o1, 8, %o1
+	st		%g2, [%o0 + 0x00]
+	st		%g3, [%o0 + 0x04]
+	add		%o0, 8, %o0
+1:
+	b		81b
+	 mov		%o2, %g1
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
new file mode 100644
index 0000000..3132b63
--- /dev/null
+++ b/arch/sparc/lib/memmove.S
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* memmove.S: Simple memmove implementation.
+ *
+ * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
+ * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+ENTRY(memmove) /* o0=dst o1=src o2=len */
+	brz,pn		%o2, 99f
+	 mov		%o0, %g1
+
+	cmp		%o0, %o1
+	bleu,pt		%xcc, 2f
+	 add		%o1, %o2, %g7
+	cmp		%g7, %o0
+	bleu,pt		%xcc, memcpy
+	 add		%o0, %o2, %o5
+	sub		%g7, 1, %o1
+
+	sub		%o5, 1, %o0
+1:	ldub		[%o1], %g7
+	subcc		%o2, 1, %o2
+	sub		%o1, 1, %o1
+	stb		%g7, [%o0]
+	bne,pt		%icc, 1b
+	 sub		%o0, 1, %o0
+99:
+	retl
+	 mov		%g1, %o0
+
+	/* We can't just call memcpy for these memmove cases.  On some
+	 * chips the memcpy uses cache initializing stores and when dst
+	 * and src are close enough, those can clobber the source data
+	 * before we've loaded it in.
+	 */
+2:	or		%o0, %o1, %g7
+	or		%o2, %g7, %g7
+	andcc		%g7, 0x7, %g0
+	bne,pn		%xcc, 4f
+	 nop
+
+3:	ldx		[%o1], %g7
+	add		%o1, 8, %o1
+	subcc		%o2, 8, %o2
+	add		%o0, 8, %o0
+	bne,pt		%icc, 3b
+	 stx		%g7, [%o0 - 0x8]
+	ba,a,pt		%xcc, 99b
+
+4:	ldub		[%o1], %g7
+	add		%o1, 1, %o1
+	subcc		%o2, 1, %o2
+	add		%o0, 1, %o0
+	bne,pt		%icc, 4b
+	 stb		%g7, [%o0 - 0x1]
+	ba,a,pt		%xcc, 99b
+ENDPROC(memmove)
+EXPORT_SYMBOL(memmove)
diff --git a/arch/sparc/lib/memscan_32.S b/arch/sparc/lib/memscan_32.S
new file mode 100644
index 0000000..c4c2d5b
--- /dev/null
+++ b/arch/sparc/lib/memscan_32.S
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * memscan.S: Optimized memscan for the Sparc.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/export.h>
+
+/* In essence, this is just a fancy strlen. */
+
+#define LO_MAGIC 0x01010101
+#define HI_MAGIC 0x80808080
+
+	.text
+	.align	4
+	.globl	__memscan_zero, __memscan_generic
+	.globl	memscan
+EXPORT_SYMBOL(__memscan_zero)
+EXPORT_SYMBOL(__memscan_generic)
+__memscan_zero:
+	/* %o0 = addr, %o1 = size */
+	cmp	%o1, 0
+	bne,a	1f
+	 andcc	%o0, 3, %g0
+
+	retl
+	 nop
+
+1:
+	be	mzero_scan_word
+	 sethi	%hi(HI_MAGIC), %g2
+
+	ldsb	[%o0], %g3
+mzero_still_not_word_aligned:
+	cmp	%g3, 0
+	bne	1f
+	 add	%o0, 1, %o0
+
+	retl
+	 sub	%o0, 1, %o0
+
+1:
+	subcc	%o1, 1, %o1
+	bne,a	1f
+	 andcc	%o0, 3, %g0
+
+	retl
+	 nop
+
+1:
+	bne,a	mzero_still_not_word_aligned
+	 ldsb	[%o0], %g3
+
+	sethi	%hi(HI_MAGIC), %g2
+mzero_scan_word:
+	or	%g2, %lo(HI_MAGIC), %o3
+	sethi	%hi(LO_MAGIC), %g3
+	or	%g3, %lo(LO_MAGIC), %o2
+mzero_next_word:
+	ld	[%o0], %g2
+mzero_next_word_preloaded:
+	sub	%g2, %o2, %g2
+mzero_next_word_preloaded_next:
+	andcc	%g2, %o3, %g0
+	bne	mzero_byte_zero
+	 add	%o0, 4, %o0
+
+mzero_check_out_of_fuel:
+	subcc	%o1, 4, %o1
+	bg,a	1f
+	 ld	[%o0], %g2
+
+	retl
+	 nop
+
+1:
+	b	mzero_next_word_preloaded_next
+	 sub	%g2, %o2, %g2
+
+	/* Check every byte. */
+mzero_byte_zero:
+	ldsb	[%o0 - 4], %g2
+	cmp	%g2, 0
+	bne	mzero_byte_one
+	 sub	%o0, 4, %g3
+
+	retl
+	 mov	%g3, %o0
+
+mzero_byte_one:
+	ldsb	[%o0 - 3], %g2
+	cmp	%g2, 0
+	bne,a	mzero_byte_two_and_three
+	 ldsb	[%o0 - 2], %g2
+
+	retl
+	 sub	%o0, 3, %o0
+
+mzero_byte_two_and_three:
+	cmp	%g2, 0
+	bne,a	1f
+	 ldsb	[%o0 - 1], %g2
+
+	retl
+	 sub	%o0, 2, %o0
+
+1:
+	cmp	%g2, 0
+	bne,a	mzero_next_word_preloaded
+	 ld	[%o0], %g2
+
+	retl
+	 sub	%o0, 1, %o0
+
+mzero_found_it:
+	retl
+	 sub	%o0, 2, %o0
+
+memscan:
+__memscan_generic:
+	/* %o0 = addr, %o1 = c, %o2 = size */
+	cmp	%o2, 0
+	bne,a	0f
+	 ldub	[%o0], %g2
+
+	b,a	2f
+1:
+	ldub	[%o0], %g2
+0:
+	cmp	%g2, %o1
+	be	2f
+	 addcc	%o2, -1, %o2
+	bne	1b
+	 add	%o0, 1, %o0
+2:
+	retl
+	 nop
diff --git a/arch/sparc/lib/memscan_64.S b/arch/sparc/lib/memscan_64.S
new file mode 100644
index 0000000..36dd638
--- /dev/null
+++ b/arch/sparc/lib/memscan_64.S
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * memscan.S: Optimized memscan for Sparc64.
+ *
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1998 David S. Miller (davem@redhat.com)
+ */
+
+	#include <asm/export.h>
+
+#define HI_MAGIC	0x8080808080808080
+#define LO_MAGIC	0x0101010101010101
+#define ASI_PL		0x88
+
+	.text
+	.align	32
+	.globl		__memscan_zero, __memscan_generic
+	.type		__memscan_zero,#function
+	.type		__memscan_generic,#function
+	.globl		memscan
+	EXPORT_SYMBOL(__memscan_zero)
+	EXPORT_SYMBOL(__memscan_generic)
+
+__memscan_zero:
+	/* %o0 = bufp, %o1 = size */
+	brlez,pn	%o1, szzero
+	 andcc		%o0, 7, %g0
+	be,pt		%icc, we_are_aligned
+	 sethi		%hi(HI_MAGIC), %o4
+	ldub		[%o0], %o5
+1:	subcc		%o1, 1, %o1
+	brz,pn		%o5, 10f
+	 add		%o0, 1, %o0
+
+	be,pn		%xcc, szzero
+	 andcc		%o0, 7, %g0
+	bne,a,pn	%icc, 1b
+	 ldub		[%o0], %o5
+we_are_aligned:
+	ldxa		[%o0] ASI_PL, %o5
+	or		%o4, %lo(HI_MAGIC), %o3
+	sllx		%o3, 32, %o4
+	or		%o4, %o3, %o3
+
+	srlx		%o3, 7, %o2
+msloop:
+	sub		%o1, 8, %o1
+	add		%o0, 8, %o0
+	sub		%o5, %o2, %o4
+	xor		%o4, %o5, %o4
+	andcc		%o4, %o3, %g3
+	bne,pn		%xcc, check_bytes
+	 srlx		%o4, 32, %g3
+
+	brgz,a,pt	%o1, msloop
+	 ldxa		[%o0] ASI_PL, %o5
+check_bytes:
+	bne,a,pn	%icc, 2f
+	 andcc		%o5, 0xff, %g0
+	add		%o0, -5, %g2
+	ba,pt		%xcc, 3f
+	 srlx		%o5, 32, %g7
+
+2:	srlx		%o5, 8, %g7
+	be,pn		%icc, 1f
+	 add		%o0, -8, %g2
+	andcc		%g7, 0xff, %g0
+	srlx		%g7, 8, %g7
+	be,pn		%icc, 1f
+	 inc		%g2
+	andcc		%g7, 0xff, %g0
+
+	srlx		%g7, 8, %g7
+	be,pn		%icc, 1f
+	 inc		%g2
+	andcc		%g7, 0xff, %g0
+	srlx		%g7, 8, %g7
+	be,pn		%icc, 1f
+	 inc		%g2
+	andcc		%g3, %o3, %g0
+
+	be,a,pn		%icc, 2f
+	 mov		%o0, %g2
+3:	andcc		%g7, 0xff, %g0
+	srlx		%g7, 8, %g7
+	be,pn		%icc, 1f
+	 inc		%g2
+	andcc		%g7, 0xff, %g0
+	srlx		%g7, 8, %g7
+
+	be,pn		%icc, 1f
+	 inc		%g2
+	andcc		%g7, 0xff, %g0
+	srlx		%g7, 8, %g7
+	be,pn		%icc, 1f
+	 inc		%g2
+	andcc		%g7, 0xff, %g0
+	srlx		%g7, 8, %g7
+
+	be,pn		%icc, 1f
+	 inc		%g2
+2:	brgz,a,pt	%o1, msloop
+	 ldxa		[%o0] ASI_PL, %o5
+	inc		%g2
+1:	add		%o0, %o1, %o0
+	cmp		%g2, %o0
+	retl
+
+	 movle		%xcc, %g2, %o0
+10:	retl
+	 sub		%o0, 1, %o0
+szzero:	retl
+	 nop
+
+memscan:
+__memscan_generic:
+	/* %o0 = addr, %o1 = c, %o2 = size */
+	brz,pn		%o2, 3f
+	 add		%o0, %o2, %o3
+	ldub		[%o0], %o5
+	sub		%g0, %o2, %o4
+1:
+	cmp		%o5, %o1
+	be,pn		%icc, 2f
+	 addcc		%o4, 1, %o4
+	bne,a,pt 	%xcc, 1b
+	 ldub		[%o3 + %o4], %o5
+	retl
+	/* The delay slot is the same as the next insn, this is just to make it look more awful */
+2:
+	 add		%o3, %o4, %o0
+	retl
+	 sub		%o0, 1, %o0
+3:
+	retl
+	 nop
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
new file mode 100644
index 0000000..b89d42b
--- /dev/null
+++ b/arch/sparc/lib/memset.S
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* linux/arch/sparc/lib/memset.S: Sparc optimized memset, bzero and clear_user code
+ * Copyright (C) 1991,1996 Free Software Foundation
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ *
+ * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
+ * number of bytes not yet set if exception occurs and we were called as
+ * clear_user.
+ */
+
+#include <asm/ptrace.h>
+#include <asm/export.h>
+
+/* Work around cpp -rob */
+#define ALLOC #alloc
+#define EXECINSTR #execinstr
+#define EX(x,y,a,b) 				\
+98: 	x,y;					\
+	.section .fixup,ALLOC,EXECINSTR;	\
+	.align	4;				\
+99:	ba 30f;					\
+	 a, b, %o0;				\
+	.section __ex_table,ALLOC;		\
+	.align	4;				\
+	.word	98b, 99b;			\
+	.text;					\
+	.align	4
+
+#define EXT(start,end,handler) 			\
+	.section __ex_table,ALLOC;		\
+	.align	4;				\
+	.word	start, 0, end, handler;		\
+	.text;					\
+	.align	4
+
+/* Please don't change these macros, unless you change the logic
+ * in the .fixup section below as well.
+ * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
+#define ZERO_BIG_BLOCK(base, offset, source)    \
+	std	source, [base + offset + 0x00]; \
+	std	source, [base + offset + 0x08]; \
+	std	source, [base + offset + 0x10]; \
+	std	source, [base + offset + 0x18]; \
+	std	source, [base + offset + 0x20]; \
+	std	source, [base + offset + 0x28]; \
+	std	source, [base + offset + 0x30]; \
+	std	source, [base + offset + 0x38];
+
+#define ZERO_LAST_BLOCKS(base, offset, source)	\
+	std	source, [base - offset - 0x38]; \
+	std	source, [base - offset - 0x30]; \
+	std	source, [base - offset - 0x28]; \
+	std	source, [base - offset - 0x20]; \
+	std	source, [base - offset - 0x18]; \
+	std	source, [base - offset - 0x10]; \
+	std	source, [base - offset - 0x08]; \
+	std	source, [base - offset - 0x00];
+
+	.text
+	.align 4
+
+        .globl  __bzero_begin
+__bzero_begin:
+
+	.globl	__bzero
+	.type	__bzero,#function
+	.globl	memset
+	EXPORT_SYMBOL(__bzero)
+	EXPORT_SYMBOL(memset)
+	.globl	__memset_start, __memset_end
+__memset_start:
+memset:
+	mov	%o0, %g1
+	mov	1, %g4
+	and	%o1, 0xff, %g3
+	sll	%g3, 8, %g2
+	or	%g3, %g2, %g3
+	sll	%g3, 16, %g2
+	or	%g3, %g2, %g3
+	b	1f
+	 mov	%o2, %o1
+3:
+	cmp	%o2, 3
+	be	2f
+	 EX(stb	%g3, [%o0], sub %o1, 0)
+
+	cmp	%o2, 2
+	be	2f
+	 EX(stb	%g3, [%o0 + 0x01], sub %o1, 1)
+
+	EX(stb	%g3, [%o0 + 0x02], sub %o1, 2)
+2:
+	sub	%o2, 4, %o2
+	add	%o1, %o2, %o1
+	b	4f
+	 sub	%o0, %o2, %o0
+
+__bzero:
+	clr	%g4
+	mov	%g0, %g3
+1:
+	cmp	%o1, 7
+	bleu	7f
+	 andcc	%o0, 3, %o2
+
+	bne	3b
+4:
+	 andcc	%o0, 4, %g0
+
+	be	2f
+	 mov	%g3, %g2
+
+	EX(st	%g3, [%o0], sub %o1, 0)
+	sub	%o1, 4, %o1
+	add	%o0, 4, %o0
+2:
+	andcc	%o1, 0xffffff80, %o3	! Now everything is 8 aligned and o1 is len to run
+	be	9f
+	 andcc	%o1, 0x78, %o2
+10:
+	ZERO_BIG_BLOCK(%o0, 0x00, %g2)
+	subcc	%o3, 128, %o3
+	ZERO_BIG_BLOCK(%o0, 0x40, %g2)
+11:
+	EXT(10b, 11b, 20f)
+	bne	10b
+	 add	%o0, 128, %o0
+
+	orcc	%o2, %g0, %g0
+9:
+	be	13f
+	 andcc	%o1, 7, %o1
+
+	srl	%o2, 1, %o3
+	set	13f, %o4
+	sub	%o4, %o3, %o4
+	jmp	%o4
+	 add	%o0, %o2, %o0
+
+12:
+	ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
+	ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
+13:
+	be	8f
+	 andcc	%o1, 4, %g0
+
+	be	1f
+	 andcc	%o1, 2, %g0
+
+	EX(st	%g3, [%o0], and %o1, 7)
+	add	%o0, 4, %o0
+1:
+	be	1f
+	 andcc	%o1, 1, %g0
+
+	EX(sth	%g3, [%o0], and %o1, 3)
+	add	%o0, 2, %o0
+1:
+	bne,a	8f
+	 EX(stb	%g3, [%o0], and %o1, 1)
+8:
+	b	0f
+	 nop
+7:
+	be	13b
+	 orcc	%o1, 0, %g0
+
+	be	0f
+8:
+	 add	%o0, 1, %o0
+	subcc	%o1, 1, %o1
+	bne	8b
+	 EX(stb	%g3, [%o0 - 1], add %o1, 1)
+0:
+	andcc	%g4, 1, %g0
+	be	5f
+	 nop
+	retl
+	 mov	%g1, %o0
+5:
+	retl
+	 clr	%o0
+__memset_end:
+
+	.section .fixup,#alloc,#execinstr
+	.align	4
+20:
+	cmp	%g2, 8
+	bleu	1f
+	 and	%o1, 0x7f, %o1
+	sub	%g2, 9, %g2
+	add	%o3, 64, %o3
+1:
+	sll	%g2, 3, %g2
+	add	%o3, %o1, %o0
+	b 30f
+	 sub	%o0, %g2, %o0
+21:
+	mov	8, %o0
+	and	%o1, 7, %o1
+	sub	%o0, %g2, %o0
+	sll	%o0, 3, %o0
+	b 30f
+	 add	%o0, %o1, %o0
+30:
+/* %o4 is faulting address, %o5 is %pc where fault occurred */
+	save	%sp, -104, %sp
+	mov	%i5, %o0
+	mov	%i7, %o1
+	call	lookup_fault
+	 mov	%i4, %o2
+	ret
+	 restore
+
+	.globl __bzero_end
+__bzero_end:
diff --git a/arch/sparc/lib/muldi3.S b/arch/sparc/lib/muldi3.S
new file mode 100644
index 0000000..17a0f49
--- /dev/null
+++ b/arch/sparc/lib/muldi3.S
@@ -0,0 +1,78 @@
+/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#include <asm/export.h>
+	.text
+	.align 4
+	.globl __muldi3
+__muldi3:
+	save  %sp, -104, %sp
+	wr  %g0, %i1, %y
+	sra  %i3, 0x1f, %g2
+	and  %i1, %g2, %g2
+	andcc  %g0, 0, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, %i3, %g1
+	mulscc  %g1, 0, %g1
+	add  %g1, %g2, %l2
+	rd  %y, %o1
+	mov  %o1, %l3
+	mov  %i1, %o0
+	mov  %i2, %o1
+	umul %o0, %o1, %o0
+	mov  %o0, %l0
+	mov  %i0, %o0
+	mov  %i3, %o1
+	umul %o0, %o1, %o0
+	add  %l0, %o0, %l0
+	mov  %l2, %i0
+	add  %l2, %l0, %i0
+	ret 
+	restore  %g0, %l3, %o1
+EXPORT_SYMBOL(__muldi3)
diff --git a/arch/sparc/lib/multi3.S b/arch/sparc/lib/multi3.S
new file mode 100644
index 0000000..2f187b2
--- /dev/null
+++ b/arch/sparc/lib/multi3.S
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+	.align	4
+ENTRY(__multi3) /* %o0 = u, %o1 = v */
+	mov	%o1, %g1
+	srl	%o3, 0, %o4
+	mulx	%o4, %g1, %o1
+	srlx	%g1, 0x20, %g3
+	mulx	%g3, %o4, %g7
+	sllx	%g7, 0x20, %o5
+	srl	%g1, 0, %o4
+	sub	%o1, %o5, %o5
+	srlx	%o5, 0x20, %o5
+	addcc	%g7, %o5, %g7
+	srlx	%o3, 0x20, %o5
+	mulx	%o4, %o5, %o4
+	mulx	%g3, %o5, %o5
+	sethi	%hi(0x80000000), %g3
+	addcc	%g7, %o4, %g7
+	srlx	%g7, 0x20, %g7
+	add	%g3, %g3, %g3
+	movcc	%xcc, %g0, %g3
+	addcc	%o5, %g7, %o5
+	sllx	%o4, 0x20, %o4
+	add	%o1, %o4, %o1
+	add	%o5, %g3, %g2
+	mulx	%g1, %o2, %g1
+	add	%g1, %g2, %g1
+	mulx	%o0, %o3, %o0
+	retl
+	 add	%g1, %o0, %o0
+ENDPROC(__multi3)
+EXPORT_SYMBOL(__multi3)
diff --git a/arch/sparc/lib/strlen.S b/arch/sparc/lib/strlen.S
new file mode 100644
index 0000000..dd111bb
--- /dev/null
+++ b/arch/sparc/lib/strlen.S
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* strlen.S: Sparc optimized strlen code
+ * Hand optimized from GNU libc's strlen
+ * Copyright (C) 1991,1996 Free Software Foundation
+ * Copyright (C) 1996,2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/export.h>
+
+#define LO_MAGIC 0x01010101
+#define HI_MAGIC 0x80808080
+
+	.text
+ENTRY(strlen)
+	mov	%o0, %o1
+	andcc	%o0, 3, %g0
+	BRANCH32(be, pt, 9f)
+	 sethi	%hi(HI_MAGIC), %o4
+	ldub	[%o0], %o5
+	BRANCH_REG_ZERO(pn, %o5, 11f)
+	 add	%o0, 1, %o0
+	andcc	%o0, 3, %g0
+	BRANCH32(be, pn, 4f)
+	 or	%o4, %lo(HI_MAGIC), %o3
+	ldub	[%o0], %o5
+	BRANCH_REG_ZERO(pn, %o5, 12f)
+	 add	%o0, 1, %o0
+	andcc	%o0, 3, %g0
+	BRANCH32(be, pt, 5f)
+	 sethi	%hi(LO_MAGIC), %o4
+	ldub	[%o0], %o5
+	BRANCH_REG_ZERO(pn, %o5, 13f)
+	 add	%o0, 1, %o0
+	BRANCH32(ba, pt, 8f)
+	 or	%o4, %lo(LO_MAGIC), %o2
+9:
+	or	%o4, %lo(HI_MAGIC), %o3
+4:
+	sethi	%hi(LO_MAGIC), %o4
+5:
+	or	%o4, %lo(LO_MAGIC), %o2
+8:
+	ld	[%o0], %o5
+2:
+	sub	%o5, %o2, %o4
+	andcc	%o4, %o3, %g0
+	BRANCH32(be, pt, 8b)
+	 add	%o0, 4, %o0
+
+	/* Check every byte. */
+	srl	%o5, 24, %g7
+	andcc	%g7, 0xff, %g0
+	BRANCH32(be, pn, 1f)
+	 add	%o0, -4, %o4
+	srl	%o5, 16, %g7
+	andcc	%g7, 0xff, %g0
+	BRANCH32(be, pn, 1f)
+	 add	%o4, 1, %o4
+	srl	%o5, 8, %g7
+	andcc	%g7, 0xff, %g0
+	BRANCH32(be, pn, 1f)
+	 add	%o4, 1, %o4
+	andcc	%o5, 0xff, %g0
+	BRANCH32_ANNUL(bne, pt, 2b)
+	 ld	[%o0], %o5
+	add	%o4, 1, %o4
+1:
+	retl
+	 sub	%o4, %o1, %o0
+11:
+	retl
+	 mov	0, %o0
+12:
+	retl
+	 mov	1, %o0
+13:
+	retl
+	 mov	2, %o0
+ENDPROC(strlen)
+EXPORT_SYMBOL(strlen)
diff --git a/arch/sparc/lib/strncmp_32.S b/arch/sparc/lib/strncmp_32.S
new file mode 100644
index 0000000..794733f
--- /dev/null
+++ b/arch/sparc/lib/strncmp_32.S
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * strncmp.S: Hand optimized Sparc assembly of GCC output from GNU libc
+ *            generic strncmp routine.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+
+	.text
+ENTRY(strncmp)
+	mov	%o0, %g3
+	mov	0, %o3
+
+	cmp	%o2, 3
+	ble	7f
+	 mov	0, %g2
+
+	sra	%o2, 2, %o4
+	ldub	[%g3], %o3
+
+0:
+	ldub	[%o1], %g2
+	add	%g3, 1, %g3
+	and	%o3, 0xff, %o0
+
+	cmp	%o0, 0
+	be	8f
+	 add	%o1, 1, %o1
+
+	cmp	%o0, %g2
+	be,a	1f
+	 ldub	[%g3], %o3
+
+	retl
+	 sub	%o0, %g2, %o0
+
+1:
+	ldub	[%o1], %g2
+	add	%g3,1, %g3
+	and	%o3, 0xff, %o0
+
+	cmp	%o0, 0
+	be	8f
+	 add	%o1, 1, %o1
+
+	cmp	%o0, %g2
+	be,a	1f
+	 ldub	[%g3], %o3
+
+	retl
+	 sub	%o0, %g2, %o0
+
+1:
+	ldub	[%o1], %g2
+	add	%g3, 1, %g3
+	and	%o3, 0xff, %o0
+
+	cmp	%o0, 0
+	be	8f
+	 add	%o1, 1, %o1
+
+	cmp	%o0, %g2
+	be,a	1f
+	 ldub	[%g3], %o3
+
+	retl
+	 sub	%o0, %g2, %o0
+
+1:
+	ldub	[%o1], %g2
+	add	%g3, 1, %g3
+	and	%o3, 0xff, %o0
+
+	cmp	%o0, 0
+	be	8f
+	 add	%o1, 1, %o1
+
+	cmp	%o0, %g2
+	be	1f
+	 add	%o4, -1, %o4
+
+	retl
+	 sub	%o0, %g2, %o0
+
+1:
+
+	cmp	%o4, 0
+	bg,a	0b
+	 ldub	[%g3], %o3
+
+	b	7f
+	 and	%o2, 3, %o2
+
+9:
+	ldub	[%o1], %g2
+	add	%g3, 1, %g3
+	and	%o3, 0xff, %o0
+
+	cmp	%o0, 0
+	be	8f
+	 add	%o1, 1, %o1
+
+	cmp	%o0, %g2
+	be	7f
+	 add	%o2, -1, %o2
+
+8:
+	retl
+	 sub	%o0, %g2, %o0
+
+7:
+	cmp	%o2, 0
+	bg,a	9b
+	 ldub	[%g3], %o3
+
+	and	%g2, 0xff, %o0
+	retl
+	 sub	%o3, %o0, %o0
+ENDPROC(strncmp)
+EXPORT_SYMBOL(strncmp)
diff --git a/arch/sparc/lib/strncmp_64.S b/arch/sparc/lib/strncmp_64.S
new file mode 100644
index 0000000..3d37d65
--- /dev/null
+++ b/arch/sparc/lib/strncmp_64.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Sparc64 optimized strncmp code.
+ *
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/linkage.h>
+#include <asm/asi.h>
+#include <asm/export.h>
+
+	.text
+ENTRY(strncmp)
+	brlez,pn %o2, 3f
+	 lduba	[%o0] (ASI_PNF), %o3
+1:
+	add	%o0, 1, %o0
+	ldub	[%o1], %o4
+	brz,pn	%o3, 2f
+	 add	%o1, 1, %o1
+	cmp	%o3, %o4
+	bne,pn	%icc, 2f
+	 subcc	%o2, 1, %o2
+	bne,a,pt %xcc, 1b
+	 ldub	[%o0], %o3
+2:
+	retl
+	 sub	%o3, %o4, %o0
+3:
+	retl
+	 clr	%o0
+ENDPROC(strncmp)
+EXPORT_SYMBOL(strncmp)
diff --git a/arch/sparc/lib/ucmpdi2.c b/arch/sparc/lib/ucmpdi2.c
new file mode 100644
index 0000000..82c1ccc
--- /dev/null
+++ b/arch/sparc/lib/ucmpdi2.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include "libgcc.h"
+
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
+{
+	const DWunion au = {.ll = a};
+	const DWunion bu = {.ll = b};
+
+	if ((unsigned int) au.s.high < (unsigned int) bu.s.high)
+		return 0;
+	else if ((unsigned int) au.s.high > (unsigned int) bu.s.high)
+		return 2;
+	if ((unsigned int) au.s.low < (unsigned int) bu.s.low)
+		return 0;
+	else if ((unsigned int) au.s.low > (unsigned int) bu.s.low)
+		return 2;
+	return 1;
+}
+EXPORT_SYMBOL(__ucmpdi2);
diff --git a/arch/sparc/lib/udivdi3.S b/arch/sparc/lib/udivdi3.S
new file mode 100644
index 0000000..24e0a35
--- /dev/null
+++ b/arch/sparc/lib/udivdi3.S
@@ -0,0 +1,259 @@
+/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+	.text
+	.align 4
+	.globl __udivdi3
+__udivdi3:
+	save %sp,-104,%sp
+	mov %i3,%o3
+	cmp %i2,0
+	bne .LL40
+	mov %i1,%i3
+	cmp %o3,%i0
+	bleu .LL41
+	mov %i3,%o1
+	! Inlined udiv_qrnnd
+	mov	32,%g1
+	subcc	%i0,%o3,%g0
+1:	bcs	5f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	sub	%i0,%o3,%i0	! this kills msb of n
+	addx	%i0,%i0,%i0	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%i0,%o3,%g0
+	bcs	3f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%i0,%o3,%i0	! this kills msb of n
+4:	sub	%i0,%o3,%i0
+5:	addxcc	%i0,%i0,%i0
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
+	sub	%i0,%o3,%i0
+3:	xnor	%o1,0,%o1
+	! End of inline udiv_qrnnd
+	b .LL45
+	mov 0,%o2
+.LL41:
+	cmp %o3,0
+	bne .LL77
+	mov %i0,%o2
+	mov 1,%o0
+	mov 0,%o1
+	wr %g0, 0, %y
+	udiv %o0, %o1, %o0
+	mov %o0,%o3
+	mov %i0,%o2
+.LL77:
+	mov 0,%o4
+	! Inlined udiv_qrnnd
+	mov	32,%g1
+	subcc	%o4,%o3,%g0
+1:	bcs	5f
+	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
+	sub	%o4,%o3,%o4	! this kills msb of n
+	addx	%o4,%o4,%o4	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%o4,%o3,%g0
+	bcs	3f
+	 addxcc %o2,%o2,%o2	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%o4,%o3,%o4	! this kills msb of n
+4:	sub	%o4,%o3,%o4
+5:	addxcc	%o4,%o4,%o4
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o2,%o2,%o2	! shift n1n0 and a 0-bit in lsb
+	sub	%o4,%o3,%o4
+3:	xnor	%o2,0,%o2
+	! End of inline udiv_qrnnd
+	mov %o4,%i0
+	mov %i3,%o1
+	! Inlined udiv_qrnnd
+	mov	32,%g1
+	subcc	%i0,%o3,%g0
+1:	bcs	5f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	sub	%i0,%o3,%i0	! this kills msb of n
+	addx	%i0,%i0,%i0	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%i0,%o3,%g0
+	bcs	3f
+	 addxcc %o1,%o1,%o1	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%i0,%o3,%i0	! this kills msb of n
+4:	sub	%i0,%o3,%i0
+5:	addxcc	%i0,%i0,%i0
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o1,%o1,%o1	! shift n1n0 and a 0-bit in lsb
+	sub	%i0,%o3,%i0
+3:	xnor	%o1,0,%o1
+	! End of inline udiv_qrnnd
+	b .LL78
+	mov %o1,%l1
+.LL40:
+	cmp %i2,%i0
+	bleu .LL46
+	sethi %hi(65535),%o0
+	b .LL73
+	mov 0,%o1
+.LL46:
+	or %o0,%lo(65535),%o0
+	cmp %i2,%o0
+	bgu .LL53
+	mov %i2,%o1
+	cmp %i2,256
+	addx %g0,-1,%o0
+	b .LL59
+	and %o0,8,%o2
+.LL53:
+	sethi %hi(16777215),%o0
+	or %o0,%lo(16777215),%o0
+	cmp %o1,%o0
+	bgu .LL59
+	mov 24,%o2
+	mov 16,%o2
+.LL59:
+	srl %o1,%o2,%o1
+	sethi %hi(__clz_tab),%o0
+	or %o0,%lo(__clz_tab),%o0
+	ldub [%o1+%o0],%o0
+	add %o0,%o2,%o0
+	mov 32,%o1
+	subcc %o1,%o0,%o2
+	bne,a .LL67
+	mov 32,%o0
+	cmp %i0,%i2
+	bgu .LL69
+	cmp %i3,%o3
+	blu .LL73
+	mov 0,%o1
+.LL69:
+	b .LL73
+	mov 1,%o1
+.LL67:
+	sub %o0,%o2,%o0
+	sll %i2,%o2,%i2
+	srl %o3,%o0,%o1
+	or %i2,%o1,%i2
+	sll %o3,%o2,%o3
+	srl %i0,%o0,%o1
+	sll %i0,%o2,%i0
+	srl %i3,%o0,%o0
+	or %i0,%o0,%i0
+	sll %i3,%o2,%i3
+	mov %i0,%o5
+	mov %o1,%o4
+	! Inlined udiv_qrnnd
+	mov	32,%g1
+	subcc	%o4,%i2,%g0
+1:	bcs	5f
+	 addxcc %o5,%o5,%o5	! shift n1n0 and a q-bit in lsb
+	sub	%o4,%i2,%o4	! this kills msb of n
+	addx	%o4,%o4,%o4	! so this cannot give carry
+	subcc	%g1,1,%g1
+2:	bne	1b
+	 subcc	%o4,%i2,%g0
+	bcs	3f
+	 addxcc %o5,%o5,%o5	! shift n1n0 and a q-bit in lsb
+	b	3f
+	 sub	%o4,%i2,%o4	! this kills msb of n
+4:	sub	%o4,%i2,%o4
+5:	addxcc	%o4,%o4,%o4
+	bcc	2b
+	 subcc	%g1,1,%g1
+! Got carry from n.  Subtract next step to cancel this carry.
+	bne	4b
+	 addcc	%o5,%o5,%o5	! shift n1n0 and a 0-bit in lsb
+	sub	%o4,%i2,%o4
+3:	xnor	%o5,0,%o5
+	! End of inline udiv_qrnnd
+	mov %o4,%i0
+	mov %o5,%o1
+	! Inlined umul_ppmm
+	wr	%g0,%o1,%y	! SPARC has 0-3 delay insn after a wr
+	sra	%o3,31,%g2	! Do not move this insn
+	and	%o1,%g2,%g2	! Do not move this insn
+	andcc	%g0,0,%g1	! Do not move this insn
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,%o3,%g1
+	mulscc	%g1,0,%g1
+	add	%g1,%g2,%o0
+	rd	%y,%o2
+	cmp %o0,%i0
+	bgu,a .LL73
+	add %o1,-1,%o1
+	bne,a .LL45
+	mov 0,%o2
+	cmp %o2,%i3
+	bleu .LL45
+	mov 0,%o2
+	add %o1,-1,%o1
+.LL73:
+	mov 0,%o2
+.LL45:
+	mov %o1,%l1
+.LL78:
+	mov %o2,%l0
+	mov %l0,%i0
+	mov %l1,%i1
+	ret
+	restore
diff --git a/arch/sparc/lib/xor.S b/arch/sparc/lib/xor.S
new file mode 100644
index 0000000..f6af7c7
--- /dev/null
+++ b/arch/sparc/lib/xor.S
@@ -0,0 +1,646 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sparc64/lib/xor.S
+ *
+ * High speed xor_block operation for RAID4/5 utilizing the
+ * UltraSparc Visual Instruction Set and Niagara store-init/twin-load.
+ *
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+#include <asm/asi.h>
+#include <asm/dcu.h>
+#include <asm/spitfire.h>
+#include <asm/export.h>
+
+/*
+ *	Requirements:
+ *	!(((long)dest | (long)sourceN) & (64 - 1)) &&
+ *	!(len & 127) && len >= 256
+ */
+	.text
+
+	/* VIS versions. */
+ENTRY(xor_vis_2)
+	rd	%fprs, %o5
+	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
+	be,pt	%icc, 0f
+	 sethi	%hi(VISenter), %g1
+	jmpl	%g1 + %lo(VISenter), %g7
+	 add	%g7, 8, %g7
+0:	wr	%g0, FPRS_FEF, %fprs
+	rd	%asi, %g1
+	wr	%g0, ASI_BLK_P, %asi
+	membar	#LoadStore|#StoreLoad|#StoreStore
+	sub	%o0, 128, %o0
+	ldda	[%o1] %asi, %f0
+	ldda	[%o2] %asi, %f16
+
+2:	ldda	[%o1 + 64] %asi, %f32
+	fxor	%f0, %f16, %f16
+	fxor	%f2, %f18, %f18
+	fxor	%f4, %f20, %f20
+	fxor	%f6, %f22, %f22
+	fxor	%f8, %f24, %f24
+	fxor	%f10, %f26, %f26
+	fxor	%f12, %f28, %f28
+	fxor	%f14, %f30, %f30
+	stda	%f16, [%o1] %asi
+	ldda	[%o2 + 64] %asi, %f48
+	ldda	[%o1 + 128] %asi, %f0
+	fxor	%f32, %f48, %f48
+	fxor	%f34, %f50, %f50
+	add	%o1, 128, %o1
+	fxor	%f36, %f52, %f52
+	add	%o2, 128, %o2
+	fxor	%f38, %f54, %f54
+	subcc	%o0, 128, %o0
+	fxor	%f40, %f56, %f56
+	fxor	%f42, %f58, %f58
+	fxor	%f44, %f60, %f60
+	fxor	%f46, %f62, %f62
+	stda	%f48, [%o1 - 64] %asi
+	bne,pt	%xcc, 2b
+	 ldda	[%o2] %asi, %f16
+
+	ldda	[%o1 + 64] %asi, %f32
+	fxor	%f0, %f16, %f16
+	fxor	%f2, %f18, %f18
+	fxor	%f4, %f20, %f20
+	fxor	%f6, %f22, %f22
+	fxor	%f8, %f24, %f24
+	fxor	%f10, %f26, %f26
+	fxor	%f12, %f28, %f28
+	fxor	%f14, %f30, %f30
+	stda	%f16, [%o1] %asi
+	ldda	[%o2 + 64] %asi, %f48
+	membar	#Sync
+	fxor	%f32, %f48, %f48
+	fxor	%f34, %f50, %f50
+	fxor	%f36, %f52, %f52
+	fxor	%f38, %f54, %f54
+	fxor	%f40, %f56, %f56
+	fxor	%f42, %f58, %f58
+	fxor	%f44, %f60, %f60
+	fxor	%f46, %f62, %f62
+	stda	%f48, [%o1 + 64] %asi
+	membar	#Sync|#StoreStore|#StoreLoad
+	wr	%g1, %g0, %asi
+	retl
+	  wr	%g0, 0, %fprs
+ENDPROC(xor_vis_2)
+EXPORT_SYMBOL(xor_vis_2)
+
+ENTRY(xor_vis_3)
+	rd	%fprs, %o5
+	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
+	be,pt	%icc, 0f
+	 sethi	%hi(VISenter), %g1
+	jmpl	%g1 + %lo(VISenter), %g7
+	 add	%g7, 8, %g7
+0:	wr	%g0, FPRS_FEF, %fprs
+	rd	%asi, %g1
+	wr	%g0, ASI_BLK_P, %asi
+	membar	#LoadStore|#StoreLoad|#StoreStore
+	sub	%o0, 64, %o0
+	ldda	[%o1] %asi, %f0
+	ldda	[%o2] %asi, %f16
+
+3:	ldda	[%o3] %asi, %f32
+	fxor	%f0, %f16, %f48
+	fxor	%f2, %f18, %f50
+	add	%o1, 64, %o1
+	fxor	%f4, %f20, %f52
+	fxor	%f6, %f22, %f54
+	add	%o2, 64, %o2
+	fxor	%f8, %f24, %f56
+	fxor	%f10, %f26, %f58
+	fxor	%f12, %f28, %f60
+	fxor	%f14, %f30, %f62
+	ldda	[%o1] %asi, %f0
+	fxor	%f48, %f32, %f48
+	fxor	%f50, %f34, %f50
+	fxor	%f52, %f36, %f52
+	fxor	%f54, %f38, %f54
+	add	%o3, 64, %o3
+	fxor	%f56, %f40, %f56
+	fxor	%f58, %f42, %f58
+	subcc	%o0, 64, %o0
+	fxor	%f60, %f44, %f60
+	fxor	%f62, %f46, %f62
+	stda	%f48, [%o1 - 64] %asi
+	bne,pt	%xcc, 3b
+	 ldda	[%o2] %asi, %f16
+
+	ldda	[%o3] %asi, %f32
+	fxor	%f0, %f16, %f48
+	fxor	%f2, %f18, %f50
+	fxor	%f4, %f20, %f52
+	fxor	%f6, %f22, %f54
+	fxor	%f8, %f24, %f56
+	fxor	%f10, %f26, %f58
+	fxor	%f12, %f28, %f60
+	fxor	%f14, %f30, %f62
+	membar	#Sync
+	fxor	%f48, %f32, %f48
+	fxor	%f50, %f34, %f50
+	fxor	%f52, %f36, %f52
+	fxor	%f54, %f38, %f54
+	fxor	%f56, %f40, %f56
+	fxor	%f58, %f42, %f58
+	fxor	%f60, %f44, %f60
+	fxor	%f62, %f46, %f62
+	stda	%f48, [%o1] %asi
+	membar	#Sync|#StoreStore|#StoreLoad
+	wr	%g1, %g0, %asi
+	retl
+	 wr	%g0, 0, %fprs
+ENDPROC(xor_vis_3)
+EXPORT_SYMBOL(xor_vis_3)
+
+ENTRY(xor_vis_4)
+	rd	%fprs, %o5
+	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
+	be,pt	%icc, 0f
+	 sethi	%hi(VISenter), %g1
+	jmpl	%g1 + %lo(VISenter), %g7
+	 add	%g7, 8, %g7
+0:	wr	%g0, FPRS_FEF, %fprs
+	rd	%asi, %g1
+	wr	%g0, ASI_BLK_P, %asi
+	membar	#LoadStore|#StoreLoad|#StoreStore
+	sub	%o0, 64, %o0
+	ldda	[%o1] %asi, %f0
+	ldda	[%o2] %asi, %f16
+
+4:	ldda	[%o3] %asi, %f32
+	fxor	%f0, %f16, %f16
+	fxor	%f2, %f18, %f18
+	add	%o1, 64, %o1
+	fxor	%f4, %f20, %f20
+	fxor	%f6, %f22, %f22
+	add	%o2, 64, %o2
+	fxor	%f8, %f24, %f24
+	fxor	%f10, %f26, %f26
+	fxor	%f12, %f28, %f28
+	fxor	%f14, %f30, %f30
+	ldda	[%o4] %asi, %f48
+	fxor	%f16, %f32, %f32
+	fxor	%f18, %f34, %f34
+	fxor	%f20, %f36, %f36
+	fxor	%f22, %f38, %f38
+	add	%o3, 64, %o3
+	fxor	%f24, %f40, %f40
+	fxor	%f26, %f42, %f42
+	fxor	%f28, %f44, %f44
+	fxor	%f30, %f46, %f46
+	ldda	[%o1] %asi, %f0
+	fxor	%f32, %f48, %f48
+	fxor	%f34, %f50, %f50
+	fxor	%f36, %f52, %f52
+	add	%o4, 64, %o4
+	fxor	%f38, %f54, %f54
+	fxor	%f40, %f56, %f56
+	fxor	%f42, %f58, %f58
+	subcc	%o0, 64, %o0
+	fxor	%f44, %f60, %f60
+	fxor	%f46, %f62, %f62
+	stda	%f48, [%o1 - 64] %asi
+	bne,pt	%xcc, 4b
+	 ldda	[%o2] %asi, %f16
+
+	ldda	[%o3] %asi, %f32
+	fxor	%f0, %f16, %f16
+	fxor	%f2, %f18, %f18
+	fxor	%f4, %f20, %f20
+	fxor	%f6, %f22, %f22
+	fxor	%f8, %f24, %f24
+	fxor	%f10, %f26, %f26
+	fxor	%f12, %f28, %f28
+	fxor	%f14, %f30, %f30
+	ldda	[%o4] %asi, %f48
+	fxor	%f16, %f32, %f32
+	fxor	%f18, %f34, %f34
+	fxor	%f20, %f36, %f36
+	fxor	%f22, %f38, %f38
+	fxor	%f24, %f40, %f40
+	fxor	%f26, %f42, %f42
+	fxor	%f28, %f44, %f44
+	fxor	%f30, %f46, %f46
+	membar	#Sync
+	fxor	%f32, %f48, %f48
+	fxor	%f34, %f50, %f50
+	fxor	%f36, %f52, %f52
+	fxor	%f38, %f54, %f54
+	fxor	%f40, %f56, %f56
+	fxor	%f42, %f58, %f58
+	fxor	%f44, %f60, %f60
+	fxor	%f46, %f62, %f62
+	stda	%f48, [%o1] %asi
+	membar	#Sync|#StoreStore|#StoreLoad
+	wr	%g1, %g0, %asi
+	retl
+	 wr	%g0, 0, %fprs
+ENDPROC(xor_vis_4)
+EXPORT_SYMBOL(xor_vis_4)
+
+ENTRY(xor_vis_5)
+	save	%sp, -192, %sp
+	rd	%fprs, %o5
+	andcc	%o5, FPRS_FEF|FPRS_DU, %g0
+	be,pt	%icc, 0f
+	 sethi	%hi(VISenter), %g1
+	jmpl	%g1 + %lo(VISenter), %g7
+	 add	%g7, 8, %g7
+0:	wr	%g0, FPRS_FEF, %fprs
+	rd	%asi, %g1
+	wr	%g0, ASI_BLK_P, %asi
+	membar	#LoadStore|#StoreLoad|#StoreStore
+	sub	%i0, 64, %i0
+	ldda	[%i1] %asi, %f0
+	ldda	[%i2] %asi, %f16
+
+5:	ldda	[%i3] %asi, %f32
+	fxor	%f0, %f16, %f48
+	fxor	%f2, %f18, %f50
+	add	%i1, 64, %i1
+	fxor	%f4, %f20, %f52
+	fxor	%f6, %f22, %f54
+	add	%i2, 64, %i2
+	fxor	%f8, %f24, %f56
+	fxor	%f10, %f26, %f58
+	fxor	%f12, %f28, %f60
+	fxor	%f14, %f30, %f62
+	ldda	[%i4] %asi, %f16
+	fxor	%f48, %f32, %f48
+	fxor	%f50, %f34, %f50
+	fxor	%f52, %f36, %f52
+	fxor	%f54, %f38, %f54
+	add	%i3, 64, %i3
+	fxor	%f56, %f40, %f56
+	fxor	%f58, %f42, %f58
+	fxor	%f60, %f44, %f60
+	fxor	%f62, %f46, %f62
+	ldda	[%i5] %asi, %f32
+	fxor	%f48, %f16, %f48
+	fxor	%f50, %f18, %f50
+	add	%i4, 64, %i4
+	fxor	%f52, %f20, %f52
+	fxor	%f54, %f22, %f54
+	add	%i5, 64, %i5
+	fxor	%f56, %f24, %f56
+	fxor	%f58, %f26, %f58
+	fxor	%f60, %f28, %f60
+	fxor	%f62, %f30, %f62
+	ldda	[%i1] %asi, %f0
+	fxor	%f48, %f32, %f48
+	fxor	%f50, %f34, %f50
+	fxor	%f52, %f36, %f52
+	fxor	%f54, %f38, %f54
+	fxor	%f56, %f40, %f56
+	fxor	%f58, %f42, %f58
+	subcc	%i0, 64, %i0
+	fxor	%f60, %f44, %f60
+	fxor	%f62, %f46, %f62
+	stda	%f48, [%i1 - 64] %asi
+	bne,pt	%xcc, 5b
+	 ldda	[%i2] %asi, %f16
+
+	ldda	[%i3] %asi, %f32
+	fxor	%f0, %f16, %f48
+	fxor	%f2, %f18, %f50
+	fxor	%f4, %f20, %f52
+	fxor	%f6, %f22, %f54
+	fxor	%f8, %f24, %f56
+	fxor	%f10, %f26, %f58
+	fxor	%f12, %f28, %f60
+	fxor	%f14, %f30, %f62
+	ldda	[%i4] %asi, %f16
+	fxor	%f48, %f32, %f48
+	fxor	%f50, %f34, %f50
+	fxor	%f52, %f36, %f52
+	fxor	%f54, %f38, %f54
+	fxor	%f56, %f40, %f56
+	fxor	%f58, %f42, %f58
+	fxor	%f60, %f44, %f60
+	fxor	%f62, %f46, %f62
+	ldda	[%i5] %asi, %f32
+	fxor	%f48, %f16, %f48
+	fxor	%f50, %f18, %f50
+	fxor	%f52, %f20, %f52
+	fxor	%f54, %f22, %f54
+	fxor	%f56, %f24, %f56
+	fxor	%f58, %f26, %f58
+	fxor	%f60, %f28, %f60
+	fxor	%f62, %f30, %f62
+	membar	#Sync
+	fxor	%f48, %f32, %f48
+	fxor	%f50, %f34, %f50
+	fxor	%f52, %f36, %f52
+	fxor	%f54, %f38, %f54
+	fxor	%f56, %f40, %f56
+	fxor	%f58, %f42, %f58
+	fxor	%f60, %f44, %f60
+	fxor	%f62, %f46, %f62
+	stda	%f48, [%i1] %asi
+	membar	#Sync|#StoreStore|#StoreLoad
+	wr	%g1, %g0, %asi
+	wr	%g0, 0, %fprs
+	ret
+	 restore
+ENDPROC(xor_vis_5)
+EXPORT_SYMBOL(xor_vis_5)
+
+	/* Niagara versions. */
+ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */
+	save		%sp, -192, %sp
+	prefetch	[%i1], #n_writes
+	prefetch	[%i2], #one_read
+	rd		%asi, %g7
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	srlx		%i0, 6, %g1
+	mov		%i1, %i0
+	mov		%i2, %i1
+1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src  + 0x00 */
+	ldda		[%i1 + 0x10] %asi, %i4	/* %i4/%i5 = src  + 0x10 */
+	ldda		[%i1 + 0x20] %asi, %g2	/* %g2/%g3 = src  + 0x20 */
+	ldda		[%i1 + 0x30] %asi, %l0	/* %l0/%l1 = src  + 0x30 */
+	prefetch	[%i1 + 0x40], #one_read
+	ldda		[%i0 + 0x00] %asi, %o0  /* %o0/%o1 = dest + 0x00 */
+	ldda		[%i0 + 0x10] %asi, %o2  /* %o2/%o3 = dest + 0x10 */
+	ldda		[%i0 + 0x20] %asi, %o4  /* %o4/%o5 = dest + 0x20 */
+	ldda		[%i0 + 0x30] %asi, %l2  /* %l2/%l3 = dest + 0x30 */
+	prefetch	[%i0 + 0x40], #n_writes
+	xor		%o0, %i2, %o0
+	xor		%o1, %i3, %o1
+	stxa		%o0, [%i0 + 0x00] %asi
+	stxa		%o1, [%i0 + 0x08] %asi
+	xor		%o2, %i4, %o2
+	xor		%o3, %i5, %o3
+	stxa		%o2, [%i0 + 0x10] %asi
+	stxa		%o3, [%i0 + 0x18] %asi
+	xor		%o4, %g2, %o4
+	xor		%o5, %g3, %o5
+	stxa		%o4, [%i0 + 0x20] %asi
+	stxa		%o5, [%i0 + 0x28] %asi
+	xor		%l2, %l0, %l2
+	xor		%l3, %l1, %l3
+	stxa		%l2, [%i0 + 0x30] %asi
+	stxa		%l3, [%i0 + 0x38] %asi
+	add		%i0, 0x40, %i0
+	subcc		%g1, 1, %g1
+	bne,pt		%xcc, 1b
+	 add		%i1, 0x40, %i1
+	membar		#Sync
+	wr		%g7, 0x0, %asi
+	ret
+	 restore
+ENDPROC(xor_niagara_2)
+EXPORT_SYMBOL(xor_niagara_2)
+
+ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
+	save		%sp, -192, %sp
+	prefetch	[%i1], #n_writes
+	prefetch	[%i2], #one_read
+	prefetch	[%i3], #one_read
+	rd		%asi, %g7
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	srlx		%i0, 6, %g1
+	mov		%i1, %i0
+	mov		%i2, %i1
+	mov		%i3, %l7
+1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src1 + 0x00 */
+	ldda		[%i1 + 0x10] %asi, %i4	/* %i4/%i5 = src1 + 0x10 */
+	ldda		[%l7 + 0x00] %asi, %g2	/* %g2/%g3 = src2 + 0x00 */
+	ldda		[%l7 + 0x10] %asi, %l0	/* %l0/%l1 = src2 + 0x10 */
+	ldda		[%i0 + 0x00] %asi, %o0  /* %o0/%o1 = dest + 0x00 */
+	ldda		[%i0 + 0x10] %asi, %o2  /* %o2/%o3 = dest + 0x10 */
+	xor		%g2, %i2, %g2
+	xor		%g3, %i3, %g3
+	xor		%o0, %g2, %o0
+	xor		%o1, %g3, %o1
+	stxa		%o0, [%i0 + 0x00] %asi
+	stxa		%o1, [%i0 + 0x08] %asi
+	ldda		[%i1 + 0x20] %asi, %i2	/* %i2/%i3 = src1 + 0x20 */
+	ldda		[%l7 + 0x20] %asi, %g2	/* %g2/%g3 = src2 + 0x20 */
+	ldda		[%i0 + 0x20] %asi, %o0	/* %o0/%o1 = dest + 0x20 */
+	xor		%l0, %i4, %l0
+	xor		%l1, %i5, %l1
+	xor		%o2, %l0, %o2
+	xor		%o3, %l1, %o3
+	stxa		%o2, [%i0 + 0x10] %asi
+	stxa		%o3, [%i0 + 0x18] %asi
+	ldda		[%i1 + 0x30] %asi, %i4	/* %i4/%i5 = src1 + 0x30 */
+	ldda		[%l7 + 0x30] %asi, %l0	/* %l0/%l1 = src2 + 0x30 */
+	ldda		[%i0 + 0x30] %asi, %o2	/* %o2/%o3 = dest + 0x30 */
+	prefetch	[%i1 + 0x40], #one_read
+	prefetch	[%l7 + 0x40], #one_read
+	prefetch	[%i0 + 0x40], #n_writes
+	xor		%g2, %i2, %g2
+	xor		%g3, %i3, %g3
+	xor		%o0, %g2, %o0
+	xor		%o1, %g3, %o1
+	stxa		%o0, [%i0 + 0x20] %asi
+	stxa		%o1, [%i0 + 0x28] %asi
+	xor		%l0, %i4, %l0
+	xor		%l1, %i5, %l1
+	xor		%o2, %l0, %o2
+	xor		%o3, %l1, %o3
+	stxa		%o2, [%i0 + 0x30] %asi
+	stxa		%o3, [%i0 + 0x38] %asi
+	add		%i0, 0x40, %i0
+	add		%i1, 0x40, %i1
+	subcc		%g1, 1, %g1
+	bne,pt		%xcc, 1b
+	 add		%l7, 0x40, %l7
+	membar		#Sync
+	wr		%g7, 0x0, %asi
+	ret
+	 restore
+ENDPROC(xor_niagara_3)
+EXPORT_SYMBOL(xor_niagara_3)
+
+ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
+	save		%sp, -192, %sp
+	prefetch	[%i1], #n_writes
+	prefetch	[%i2], #one_read
+	prefetch	[%i3], #one_read
+	prefetch	[%i4], #one_read
+	rd		%asi, %g7
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	srlx		%i0, 6, %g1
+	mov		%i1, %i0
+	mov		%i2, %i1
+	mov		%i3, %l7
+	mov		%i4, %l6
+1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src1 + 0x00 */
+	ldda		[%l7 + 0x00] %asi, %i4	/* %i4/%i5 = src2 + 0x00 */
+	ldda		[%l6 + 0x00] %asi, %g2	/* %g2/%g3 = src3 + 0x00 */
+	ldda		[%i0 + 0x00] %asi, %l0	/* %l0/%l1 = dest + 0x00 */
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	ldda		[%i1 + 0x10] %asi, %i2	/* %i2/%i3 = src1 + 0x10 */
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	ldda		[%l7 + 0x10] %asi, %i4	/* %i4/%i5 = src2 + 0x10 */
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	stxa		%l0, [%i0 + 0x00] %asi
+	stxa		%l1, [%i0 + 0x08] %asi
+	ldda		[%l6 + 0x10] %asi, %g2	/* %g2/%g3 = src3 + 0x10 */
+	ldda		[%i0 + 0x10] %asi, %l0	/* %l0/%l1 = dest + 0x10 */
+
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	ldda		[%i1 + 0x20] %asi, %i2	/* %i2/%i3 = src1 + 0x20 */
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	ldda		[%l7 + 0x20] %asi, %i4	/* %i4/%i5 = src2 + 0x20 */
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	stxa		%l0, [%i0 + 0x10] %asi
+	stxa		%l1, [%i0 + 0x18] %asi
+	ldda		[%l6 + 0x20] %asi, %g2	/* %g2/%g3 = src3 + 0x20 */
+	ldda		[%i0 + 0x20] %asi, %l0	/* %l0/%l1 = dest + 0x20 */
+
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	ldda		[%i1 + 0x30] %asi, %i2	/* %i2/%i3 = src1 + 0x30 */
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	ldda		[%l7 + 0x30] %asi, %i4	/* %i4/%i5 = src2 + 0x30 */
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	stxa		%l0, [%i0 + 0x20] %asi
+	stxa		%l1, [%i0 + 0x28] %asi
+	ldda		[%l6 + 0x30] %asi, %g2	/* %g2/%g3 = src3 + 0x30 */
+	ldda		[%i0 + 0x30] %asi, %l0	/* %l0/%l1 = dest + 0x30 */
+
+	prefetch	[%i1 + 0x40], #one_read
+	prefetch	[%l7 + 0x40], #one_read
+	prefetch	[%l6 + 0x40], #one_read
+	prefetch	[%i0 + 0x40], #n_writes
+
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	stxa		%l0, [%i0 + 0x30] %asi
+	stxa		%l1, [%i0 + 0x38] %asi
+
+	add		%i0, 0x40, %i0
+	add		%i1, 0x40, %i1
+	add		%l7, 0x40, %l7
+	subcc		%g1, 1, %g1
+	bne,pt		%xcc, 1b
+	 add		%l6, 0x40, %l6
+	membar		#Sync
+	wr		%g7, 0x0, %asi
+	ret
+	 restore
+ENDPROC(xor_niagara_4)
+EXPORT_SYMBOL(xor_niagara_4)
+
+ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */
+	save		%sp, -192, %sp
+	prefetch	[%i1], #n_writes
+	prefetch	[%i2], #one_read
+	prefetch	[%i3], #one_read
+	prefetch	[%i4], #one_read
+	prefetch	[%i5], #one_read
+	rd		%asi, %g7
+	wr		%g0, ASI_BLK_INIT_QUAD_LDD_P, %asi
+	srlx		%i0, 6, %g1
+	mov		%i1, %i0
+	mov		%i2, %i1
+	mov		%i3, %l7
+	mov		%i4, %l6
+	mov		%i5, %l5
+1:	ldda		[%i1 + 0x00] %asi, %i2	/* %i2/%i3 = src1 + 0x00 */
+	ldda		[%l7 + 0x00] %asi, %i4	/* %i4/%i5 = src2 + 0x00 */
+	ldda		[%l6 + 0x00] %asi, %g2	/* %g2/%g3 = src3 + 0x00 */
+	ldda		[%l5 + 0x00] %asi, %l0	/* %l0/%l1 = src4 + 0x00 */
+	ldda		[%i0 + 0x00] %asi, %l2	/* %l2/%l3 = dest + 0x00 */
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	ldda		[%i1 + 0x10] %asi, %i2	/* %i2/%i3 = src1 + 0x10 */
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	ldda		[%l7 + 0x10] %asi, %i4	/* %i4/%i5 = src2 + 0x10 */
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	ldda		[%l6 + 0x10] %asi, %g2	/* %g2/%g3 = src3 + 0x10 */
+	xor		%l2, %l0, %l2
+	xor		%l3, %l1, %l3
+	stxa		%l2, [%i0 + 0x00] %asi
+	stxa		%l3, [%i0 + 0x08] %asi
+	ldda		[%l5 + 0x10] %asi, %l0	/* %l0/%l1 = src4 + 0x10 */
+	ldda		[%i0 + 0x10] %asi, %l2	/* %l2/%l3 = dest + 0x10 */
+
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	ldda		[%i1 + 0x20] %asi, %i2	/* %i2/%i3 = src1 + 0x20 */
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	ldda		[%l7 + 0x20] %asi, %i4	/* %i4/%i5 = src2 + 0x20 */
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	ldda		[%l6 + 0x20] %asi, %g2	/* %g2/%g3 = src3 + 0x20 */
+	xor		%l2, %l0, %l2
+	xor		%l3, %l1, %l3
+	stxa		%l2, [%i0 + 0x10] %asi
+	stxa		%l3, [%i0 + 0x18] %asi
+	ldda		[%l5 + 0x20] %asi, %l0	/* %l0/%l1 = src4 + 0x20 */
+	ldda		[%i0 + 0x20] %asi, %l2	/* %l2/%l3 = dest + 0x20 */
+
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	ldda		[%i1 + 0x30] %asi, %i2	/* %i2/%i3 = src1 + 0x30 */
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	ldda		[%l7 + 0x30] %asi, %i4	/* %i4/%i5 = src2 + 0x30 */
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	ldda		[%l6 + 0x30] %asi, %g2	/* %g2/%g3 = src3 + 0x30 */
+	xor		%l2, %l0, %l2
+	xor		%l3, %l1, %l3
+	stxa		%l2, [%i0 + 0x20] %asi
+	stxa		%l3, [%i0 + 0x28] %asi
+	ldda		[%l5 + 0x30] %asi, %l0	/* %l0/%l1 = src4 + 0x30 */
+	ldda		[%i0 + 0x30] %asi, %l2	/* %l2/%l3 = dest + 0x30 */
+
+	prefetch	[%i1 + 0x40], #one_read
+	prefetch	[%l7 + 0x40], #one_read
+	prefetch	[%l6 + 0x40], #one_read
+	prefetch	[%l5 + 0x40], #one_read
+	prefetch	[%i0 + 0x40], #n_writes
+
+	xor		%i4, %i2, %i4
+	xor		%i5, %i3, %i5
+	xor		%g2, %i4, %g2
+	xor		%g3, %i5, %g3
+	xor		%l0, %g2, %l0
+	xor		%l1, %g3, %l1
+	xor		%l2, %l0, %l2
+	xor		%l3, %l1, %l3
+	stxa		%l2, [%i0 + 0x30] %asi
+	stxa		%l3, [%i0 + 0x38] %asi
+
+	add		%i0, 0x40, %i0
+	add		%i1, 0x40, %i1
+	add		%l7, 0x40, %l7
+	add		%l6, 0x40, %l6
+	subcc		%g1, 1, %g1
+	bne,pt		%xcc, 1b
+	 add		%l5, 0x40, %l5
+	membar		#Sync
+	wr		%g7, 0x0, %asi
+	ret
+	 restore
+ENDPROC(xor_niagara_5)
+EXPORT_SYMBOL(xor_niagara_5)
diff --git a/arch/sparc/math-emu/Makefile b/arch/sparc/math-emu/Makefile
new file mode 100644
index 0000000..825dbee
--- /dev/null
+++ b/arch/sparc/math-emu/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the FPU instruction emulation.
+#
+
+# suppress all warnings - as math.c produces a lot!
+ccflags-y := -w
+
+obj-y    := math_$(BITS).o
diff --git a/arch/sparc/math-emu/math_32.c b/arch/sparc/math-emu/math_32.c
new file mode 100644
index 0000000..72e560e
--- /dev/null
+++ b/arch/sparc/math-emu/math_32.c
@@ -0,0 +1,515 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sparc/math-emu/math.c
+ *
+ * Copyright (C) 1998 Peter Maydell (pmaydell@chiark.greenend.org.uk)
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ *
+ * This is a good place to start if you're trying to understand the
+ * emulation code, because it's pretty simple. What we do is
+ * essentially analyse the instruction to work out what the operation
+ * is and which registers are involved. We then execute the appropriate
+ * FXXXX function. [The floating point queue introduces a minor wrinkle;
+ * see below...]
+ * The fxxxxx.c files each emulate a single insn. They look relatively
+ * simple because the complexity is hidden away in an unholy tangle
+ * of preprocessor macros.
+ *
+ * The first layer of macros is single.h, double.h, quad.h. Generally
+ * these files define macros for working with floating point numbers
+ * of the three IEEE formats. FP_ADD_D(R,A,B) is for adding doubles,
+ * for instance. These macros are usually defined as calls to more
+ * generic macros (in this case _FP_ADD(D,2,R,X,Y) where the number
+ * of machine words required to store the given IEEE format is passed
+ * as a parameter. [double.h and co check the number of bits in a word
+ * and define FP_ADD_D & co appropriately].
+ * The generic macros are defined in op-common.h. This is where all
+ * the grotty stuff like handling NaNs is coded. To handle the possible
+ * word sizes macros in op-common.h use macros like _FP_FRAC_SLL_##wc()
+ * where wc is the 'number of machine words' parameter (here 2).
+ * These are defined in the third layer of macros: op-1.h, op-2.h
+ * and op-4.h. These handle operations on floating point numbers composed
+ * of 1,2 and 4 machine words respectively. [For example, on sparc64
+ * doubles are one machine word so macros in double.h eventually use
+ * constructs in op-1.h, but on sparc32 they use op-2.h definitions.]
+ * soft-fp.h is on the same level as op-common.h, and defines some
+ * macros which are independent of both word size and FP format.
+ * Finally, sfp-machine.h is the machine dependent part of the
+ * code: it defines the word size and what type a word is. It also
+ * defines how _FP_MUL_MEAT_t() maps to _FP_MUL_MEAT_n_* : op-n.h
+ * provide several possible flavours of multiply algorithm, most
+ * of which require that you supply some form of asm or C primitive to
+ * do the actual multiply. (such asm primitives should be defined
+ * in sfp-machine.h too). udivmodti4.c is the same sort of thing.
+ *
+ * There may be some errors here because I'm working from a
+ * SPARC architecture manual V9, and what I really want is V8...
+ * Also, the insns which can generate exceptions seem to be a
+ * greater subset of the FPops than for V9 (for example, FCMPED
+ * has to be emulated on V8). So I think I'm going to have
+ * to emulate them all just to be on the safe side...
+ *
+ * Emulation routines originate from soft-fp package, which is
+ * part of glibc and has appropriate copyrights in it (allegedly).
+ *
+ * NB: on sparc int == long == 4 bytes, long long == 8 bytes.
+ * Most bits of the kernel seem to go for long rather than int,
+ * so we follow that practice...
+ */
+
+/* TODO:
+ * fpsave() saves the FP queue but fpload() doesn't reload it.
+ * Therefore when we context switch or change FPU ownership
+ * we have to check to see if the queue had anything in it and
+ * emulate it if it did. This is going to be a pain.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+#include "sfp-util_32.h"
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+#include <math-emu/double.h>
+#include <math-emu/quad.h>
+
+#define FLOATFUNC(x) extern int x(void *,void *,void *)
+
+/* The Vn labels indicate what version of the SPARC architecture gas thinks
+ * each insn is. This is from the binutils source :->
+ */
+/* quadword instructions */
+#define FSQRTQ	0x02b		/* v8 */
+#define FADDQ	0x043		/* v8 */
+#define FSUBQ	0x047		/* v8 */
+#define FMULQ	0x04b		/* v8 */
+#define FDIVQ	0x04f		/* v8 */
+#define FDMULQ	0x06e		/* v8 */
+#define FQTOS	0x0c7		/* v8 */
+#define FQTOD	0x0cb		/* v8 */
+#define FITOQ	0x0cc		/* v8 */
+#define FSTOQ	0x0cd		/* v8 */
+#define FDTOQ	0x0ce		/* v8 */
+#define FQTOI	0x0d3		/* v8 */
+#define FCMPQ	0x053		/* v8 */
+#define FCMPEQ	0x057		/* v8 */
+/* single/double instructions (subnormal): should all work */
+#define FSQRTS	0x029		/* v7 */
+#define FSQRTD	0x02a		/* v7 */
+#define FADDS	0x041		/* v6 */
+#define FADDD	0x042		/* v6 */
+#define FSUBS	0x045		/* v6 */
+#define FSUBD	0x046		/* v6 */
+#define FMULS	0x049		/* v6 */
+#define FMULD	0x04a		/* v6 */
+#define FDIVS	0x04d		/* v6 */
+#define FDIVD	0x04e		/* v6 */
+#define FSMULD	0x069		/* v6 */
+#define FDTOS	0x0c6		/* v6 */
+#define FSTOD	0x0c9		/* v6 */
+#define FSTOI	0x0d1		/* v6 */
+#define FDTOI	0x0d2		/* v6 */
+#define FABSS	0x009		/* v6 */
+#define FCMPS	0x051		/* v6 */
+#define FCMPES	0x055		/* v6 */
+#define FCMPD	0x052		/* v6 */
+#define FCMPED	0x056		/* v6 */
+#define FMOVS	0x001		/* v6 */
+#define FNEGS	0x005		/* v6 */
+#define FITOS	0x0c4		/* v6 */
+#define FITOD	0x0c8		/* v6 */
+
+#define FSR_TEM_SHIFT	23UL
+#define FSR_TEM_MASK	(0x1fUL << FSR_TEM_SHIFT)
+#define FSR_AEXC_SHIFT	5UL
+#define FSR_AEXC_MASK	(0x1fUL << FSR_AEXC_SHIFT)
+#define FSR_CEXC_SHIFT	0UL
+#define FSR_CEXC_MASK	(0x1fUL << FSR_CEXC_SHIFT)
+
+static int do_one_mathemu(u32 insn, unsigned long *fsr, unsigned long *fregs);
+
+/* Unlike the Sparc64 version (which has a struct fpustate), we
+ * pass the taskstruct corresponding to the task which currently owns the
+ * FPU. This is partly because we don't have the fpustate struct and
+ * partly because the task owning the FPU isn't always current (as is
+ * the case for the Sparc64 port). This is probably SMP-related...
+ * This function returns 1 if all queued insns were emulated successfully.
+ * The test for unimplemented FPop in kernel mode has been moved into
+ * kernel/traps.c for simplicity.
+ */
+int do_mathemu(struct pt_regs *regs, struct task_struct *fpt)
+{
+	/* regs->pc isn't necessarily the PC at which the offending insn is sitting.
+	 * The FPU maintains a queue of FPops which cause traps.
+	 * When it hits an instruction that requires that the trapped op succeeded
+	 * (usually because it reads a reg. that the trapped op wrote) then it
+	 * causes this exception. We need to emulate all the insns on the queue
+	 * and then allow the op to proceed.
+	 * This code should also handle the case where the trap was precise,
+	 * in which case the queue length is zero and regs->pc points at the
+	 * single FPop to be emulated. (this case is untested, though :->)
+	 * You'll need this case if you want to be able to emulate all FPops
+	 * because the FPU either doesn't exist or has been software-disabled.
+	 * [The UltraSPARC makes FP a precise trap; this isn't as stupid as it
+	 * might sound because the Ultra does funky things with a superscalar
+	 * architecture.]
+	 */
+
+	/* You wouldn't believe how often I typed 'ftp' when I meant 'fpt' :-> */
+
+	int i;
+	int retcode = 0;                               /* assume all succeed */
+	unsigned long insn;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+#ifdef DEBUG_MATHEMU
+	printk("In do_mathemu()... pc is %08lx\n", regs->pc);
+	printk("fpqdepth is %ld\n", fpt->thread.fpqdepth);
+	for (i = 0; i < fpt->thread.fpqdepth; i++)
+		printk("%d: %08lx at %08lx\n", i, fpt->thread.fpqueue[i].insn,
+		       (unsigned long)fpt->thread.fpqueue[i].insn_addr);
+#endif
+
+	if (fpt->thread.fpqdepth == 0) {                   /* no queue, guilty insn is at regs->pc */
+#ifdef DEBUG_MATHEMU
+		printk("precise trap at %08lx\n", regs->pc);
+#endif
+		if (!get_user(insn, (u32 __user *) regs->pc)) {
+			retcode = do_one_mathemu(insn, &fpt->thread.fsr, fpt->thread.float_regs);
+			if (retcode) {
+				/* in this case we need to fix up PC & nPC */
+				regs->pc = regs->npc;
+				regs->npc += 4;
+			}
+		}
+		return retcode;
+	}
+
+	/* Normal case: need to empty the queue... */
+	for (i = 0; i < fpt->thread.fpqdepth; i++) {
+		retcode = do_one_mathemu(fpt->thread.fpqueue[i].insn, &(fpt->thread.fsr), fpt->thread.float_regs);
+		if (!retcode)                               /* insn failed, no point doing any more */
+			break;
+	}
+	/* Now empty the queue and clear the queue_not_empty flag */
+	if (retcode)
+		fpt->thread.fsr &= ~(0x3000 | FSR_CEXC_MASK);
+	else
+		fpt->thread.fsr &= ~0x3000;
+	fpt->thread.fpqdepth = 0;
+
+	return retcode;
+}
+
+/* All routines returning an exception to raise should detect
+ * such exceptions _before_ rounding to be consistent with
+ * the behavior of the hardware in the implemented cases
+ * (and thus with the recommendations in the V9 architecture
+ * manual).
+ *
+ * We return 0 if a SIGFPE should be sent, 1 otherwise.
+ */
+static inline int record_exception(unsigned long *pfsr, int eflag)
+{
+	unsigned long fsr = *pfsr;
+	int would_trap;
+
+	/* Determine if this exception would have generated a trap. */
+	would_trap = (fsr & ((long)eflag << FSR_TEM_SHIFT)) != 0UL;
+
+	/* If trapping, we only want to signal one bit. */
+	if (would_trap != 0) {
+		eflag &= ((fsr & FSR_TEM_MASK) >> FSR_TEM_SHIFT);
+		if ((eflag & (eflag - 1)) != 0) {
+			if (eflag & FP_EX_INVALID)
+				eflag = FP_EX_INVALID;
+			else if (eflag & FP_EX_OVERFLOW)
+				eflag = FP_EX_OVERFLOW;
+			else if (eflag & FP_EX_UNDERFLOW)
+				eflag = FP_EX_UNDERFLOW;
+			else if (eflag & FP_EX_DIVZERO)
+				eflag = FP_EX_DIVZERO;
+			else if (eflag & FP_EX_INEXACT)
+				eflag = FP_EX_INEXACT;
+		}
+	}
+
+	/* Set CEXC, here is the rule:
+	 *
+	 *    In general all FPU ops will set one and only one
+	 *    bit in the CEXC field, this is always the case
+	 *    when the IEEE exception trap is enabled in TEM.
+	 */
+	fsr &= ~(FSR_CEXC_MASK);
+	fsr |= ((long)eflag << FSR_CEXC_SHIFT);
+
+	/* Set the AEXC field, rule is:
+	 *
+	 *    If a trap would not be generated, the
+	 *    CEXC just generated is OR'd into the
+	 *    existing value of AEXC.
+	 */
+	if (would_trap == 0)
+		fsr |= ((long)eflag << FSR_AEXC_SHIFT);
+
+	/* If trapping, indicate fault trap type IEEE. */
+	if (would_trap != 0)
+		fsr |= (1UL << 14);
+
+	*pfsr = fsr;
+
+	return (would_trap ? 0 : 1);
+}
+
+typedef union {
+	u32 s;
+	u64 d;
+	u64 q[2];
+} *argp;
+
+static int do_one_mathemu(u32 insn, unsigned long *pfsr, unsigned long *fregs)
+{
+	/* Emulate the given insn, updating fsr and fregs appropriately. */
+	int type = 0;
+	/* r is rd, b is rs2 and a is rs1. The *u arg tells
+	   whether the argument should be packed/unpacked (0 - do not unpack/pack, 1 - unpack/pack)
+	   non-u args tells the size of the argument (0 - no argument, 1 - single, 2 - double, 3 - quad */
+#define TYPE(dummy, r, ru, b, bu, a, au) type = (au << 2) | (a << 0) | (bu << 5) | (b << 3) | (ru << 8) | (r << 6)
+	int freg;
+	argp rs1 = NULL, rs2 = NULL, rd = NULL;
+	FP_DECL_EX;
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
+	FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
+	int IR;
+	long fsr;
+
+#ifdef DEBUG_MATHEMU
+	printk("In do_mathemu(), emulating %08lx\n", insn);
+#endif
+
+	if ((insn & 0xc1f80000) == 0x81a00000)	/* FPOP1 */ {
+		switch ((insn >> 5) & 0x1ff) {
+		case FSQRTQ: TYPE(3,3,1,3,1,0,0); break;
+		case FADDQ:
+		case FSUBQ:
+		case FMULQ:
+		case FDIVQ: TYPE(3,3,1,3,1,3,1); break;
+		case FDMULQ: TYPE(3,3,1,2,1,2,1); break;
+		case FQTOS: TYPE(3,1,1,3,1,0,0); break;
+		case FQTOD: TYPE(3,2,1,3,1,0,0); break;
+		case FITOQ: TYPE(3,3,1,1,0,0,0); break;
+		case FSTOQ: TYPE(3,3,1,1,1,0,0); break;
+		case FDTOQ: TYPE(3,3,1,2,1,0,0); break;
+		case FQTOI: TYPE(3,1,0,3,1,0,0); break;
+		case FSQRTS: TYPE(2,1,1,1,1,0,0); break;
+		case FSQRTD: TYPE(2,2,1,2,1,0,0); break;
+		case FADDD:
+		case FSUBD:
+		case FMULD:
+		case FDIVD: TYPE(2,2,1,2,1,2,1); break;
+		case FADDS:
+		case FSUBS:
+		case FMULS:
+		case FDIVS: TYPE(2,1,1,1,1,1,1); break;
+		case FSMULD: TYPE(2,2,1,1,1,1,1); break;
+		case FDTOS: TYPE(2,1,1,2,1,0,0); break;
+		case FSTOD: TYPE(2,2,1,1,1,0,0); break;
+		case FSTOI: TYPE(2,1,0,1,1,0,0); break;
+		case FDTOI: TYPE(2,1,0,2,1,0,0); break;
+		case FITOS: TYPE(2,1,1,1,0,0,0); break;
+		case FITOD: TYPE(2,2,1,1,0,0,0); break;
+		case FMOVS:
+		case FABSS:
+		case FNEGS: TYPE(2,1,0,1,0,0,0); break;
+		}
+	} else if ((insn & 0xc1f80000) == 0x81a80000)	/* FPOP2 */ {
+		switch ((insn >> 5) & 0x1ff) {
+		case FCMPS: TYPE(3,0,0,1,1,1,1); break;
+		case FCMPES: TYPE(3,0,0,1,1,1,1); break;
+		case FCMPD: TYPE(3,0,0,2,1,2,1); break;
+		case FCMPED: TYPE(3,0,0,2,1,2,1); break;
+		case FCMPQ: TYPE(3,0,0,3,1,3,1); break;
+		case FCMPEQ: TYPE(3,0,0,3,1,3,1); break;
+		}
+	}
+
+	if (!type) {	/* oops, didn't recognise that FPop */
+#ifdef DEBUG_MATHEMU
+		printk("attempt to emulate unrecognised FPop!\n");
+#endif
+		return 0;
+	}
+
+	/* Decode the registers to be used */
+	freg = (*pfsr >> 14) & 0xf;
+
+	*pfsr &= ~0x1c000;				/* clear the traptype bits */
+	
+	freg = ((insn >> 14) & 0x1f);
+	switch (type & 0x3) {				/* is rs1 single, double or quad? */
+	case 3:
+		if (freg & 3) {				/* quadwords must have bits 4&5 of the */
+							/* encoded reg. number set to zero. */
+			*pfsr |= (6 << 14);
+			return 0;			/* simulate invalid_fp_register exception */
+		}
+	/* fall through */
+	case 2:
+		if (freg & 1) {				/* doublewords must have bit 5 zeroed */
+			*pfsr |= (6 << 14);
+			return 0;
+		}
+	}
+	rs1 = (argp)&fregs[freg];
+	switch (type & 0x7) {
+	case 7: FP_UNPACK_QP (QA, rs1); break;
+	case 6: FP_UNPACK_DP (DA, rs1); break;
+	case 5: FP_UNPACK_SP (SA, rs1); break;
+	}
+	freg = (insn & 0x1f);
+	switch ((type >> 3) & 0x3) {			/* same again for rs2 */
+	case 3:
+		if (freg & 3) {				/* quadwords must have bits 4&5 of the */
+							/* encoded reg. number set to zero. */
+			*pfsr |= (6 << 14);
+			return 0;			/* simulate invalid_fp_register exception */
+		}
+	/* fall through */
+	case 2:
+		if (freg & 1) {				/* doublewords must have bit 5 zeroed */
+			*pfsr |= (6 << 14);
+			return 0;
+		}
+	}
+	rs2 = (argp)&fregs[freg];
+	switch ((type >> 3) & 0x7) {
+	case 7: FP_UNPACK_QP (QB, rs2); break;
+	case 6: FP_UNPACK_DP (DB, rs2); break;
+	case 5: FP_UNPACK_SP (SB, rs2); break;
+	}
+	freg = ((insn >> 25) & 0x1f);
+	switch ((type >> 6) & 0x3) {			/* and finally rd. This one's a bit different */
+	case 0:						/* dest is fcc. (this must be FCMPQ or FCMPEQ) */
+		if (freg) {				/* V8 has only one set of condition codes, so */
+							/* anything but 0 in the rd field is an error */
+			*pfsr |= (6 << 14);		/* (should probably flag as invalid opcode */
+			return 0;			/* but SIGFPE will do :-> ) */
+		}
+		break;
+	case 3:
+		if (freg & 3) {				/* quadwords must have bits 4&5 of the */
+							/* encoded reg. number set to zero. */
+			*pfsr |= (6 << 14);
+			return 0;			/* simulate invalid_fp_register exception */
+		}
+	/* fall through */
+	case 2:
+		if (freg & 1) {				/* doublewords must have bit 5 zeroed */
+			*pfsr |= (6 << 14);
+			return 0;
+		}
+	/* fall through */
+	case 1:
+		rd = (void *)&fregs[freg];
+		break;
+	}
+#ifdef DEBUG_MATHEMU
+	printk("executing insn...\n");
+#endif
+	/* do the Right Thing */
+	switch ((insn >> 5) & 0x1ff) {
+	/* + */
+	case FADDS: FP_ADD_S (SR, SA, SB); break;
+	case FADDD: FP_ADD_D (DR, DA, DB); break;
+	case FADDQ: FP_ADD_Q (QR, QA, QB); break;
+	/* - */
+	case FSUBS: FP_SUB_S (SR, SA, SB); break;
+	case FSUBD: FP_SUB_D (DR, DA, DB); break;
+	case FSUBQ: FP_SUB_Q (QR, QA, QB); break;
+	/* * */
+	case FMULS: FP_MUL_S (SR, SA, SB); break;
+	case FSMULD: FP_CONV (D, S, 2, 1, DA, SA);
+		     FP_CONV (D, S, 2, 1, DB, SB);
+	case FMULD: FP_MUL_D (DR, DA, DB); break;
+	case FDMULQ: FP_CONV (Q, D, 4, 2, QA, DA);
+		     FP_CONV (Q, D, 4, 2, QB, DB);
+	case FMULQ: FP_MUL_Q (QR, QA, QB); break;
+	/* / */
+	case FDIVS: FP_DIV_S (SR, SA, SB); break;
+	case FDIVD: FP_DIV_D (DR, DA, DB); break;
+	case FDIVQ: FP_DIV_Q (QR, QA, QB); break;
+	/* sqrt */
+	case FSQRTS: FP_SQRT_S (SR, SB); break;
+	case FSQRTD: FP_SQRT_D (DR, DB); break;
+	case FSQRTQ: FP_SQRT_Q (QR, QB); break;
+	/* mov */
+	case FMOVS: rd->s = rs2->s; break;
+	case FABSS: rd->s = rs2->s & 0x7fffffff; break;
+	case FNEGS: rd->s = rs2->s ^ 0x80000000; break;
+	/* float to int */
+	case FSTOI: FP_TO_INT_S (IR, SB, 32, 1); break;
+	case FDTOI: FP_TO_INT_D (IR, DB, 32, 1); break;
+	case FQTOI: FP_TO_INT_Q (IR, QB, 32, 1); break;
+	/* int to float */
+	case FITOS: IR = rs2->s; FP_FROM_INT_S (SR, IR, 32, int); break;
+	case FITOD: IR = rs2->s; FP_FROM_INT_D (DR, IR, 32, int); break;
+	case FITOQ: IR = rs2->s; FP_FROM_INT_Q (QR, IR, 32, int); break;
+	/* float to float */
+	case FSTOD: FP_CONV (D, S, 2, 1, DR, SB); break;
+	case FSTOQ: FP_CONV (Q, S, 4, 1, QR, SB); break;
+	case FDTOQ: FP_CONV (Q, D, 4, 2, QR, DB); break;
+	case FDTOS: FP_CONV (S, D, 1, 2, SR, DB); break;
+	case FQTOS: FP_CONV (S, Q, 1, 4, SR, QB); break;
+	case FQTOD: FP_CONV (D, Q, 2, 4, DR, QB); break;
+	/* comparison */
+	case FCMPS:
+	case FCMPES:
+		FP_CMP_S(IR, SB, SA, 3);
+		if (IR == 3 &&
+		    (((insn >> 5) & 0x1ff) == FCMPES ||
+		     FP_ISSIGNAN_S(SA) ||
+		     FP_ISSIGNAN_S(SB)))
+			FP_SET_EXCEPTION (FP_EX_INVALID);
+		break;
+	case FCMPD:
+	case FCMPED:
+		FP_CMP_D(IR, DB, DA, 3);
+		if (IR == 3 &&
+		    (((insn >> 5) & 0x1ff) == FCMPED ||
+		     FP_ISSIGNAN_D(DA) ||
+		     FP_ISSIGNAN_D(DB)))
+			FP_SET_EXCEPTION (FP_EX_INVALID);
+		break;
+	case FCMPQ:
+	case FCMPEQ:
+		FP_CMP_Q(IR, QB, QA, 3);
+		if (IR == 3 &&
+		    (((insn >> 5) & 0x1ff) == FCMPEQ ||
+		     FP_ISSIGNAN_Q(QA) ||
+		     FP_ISSIGNAN_Q(QB)))
+			FP_SET_EXCEPTION (FP_EX_INVALID);
+	}
+	if (!FP_INHIBIT_RESULTS) {
+		switch ((type >> 6) & 0x7) {
+		case 0: fsr = *pfsr;
+			if (IR == -1) IR = 2;
+			/* fcc is always fcc0 */
+			fsr &= ~0xc00; fsr |= (IR << 10);
+			*pfsr = fsr;
+			break;
+		case 1: rd->s = IR; break;
+		case 5: FP_PACK_SP (rd, SR); break;
+		case 6: FP_PACK_DP (rd, DR); break;
+		case 7: FP_PACK_QP (rd, QR); break;
+		}
+	}
+	if (_fex == 0)
+		return 1;				/* success! */
+	return record_exception(pfsr, _fex);
+}
diff --git a/arch/sparc/math-emu/math_64.c b/arch/sparc/math-emu/math_64.c
new file mode 100644
index 0000000..1379dee
--- /dev/null
+++ b/arch/sparc/math-emu/math_64.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sparc64/math-emu/math.c
+ *
+ * Copyright (C) 1997,1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ *
+ * Emulation routines originate from soft-fp package, which is part
+ * of glibc and has appropriate copyrights in it.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/perf_event.h>
+
+#include <asm/fpumacro.h>
+#include <asm/ptrace.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#include "sfp-util_64.h"
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+#include <math-emu/double.h>
+#include <math-emu/quad.h>
+
+/* QUAD - ftt == 3 */
+#define FMOVQ	0x003
+#define FNEGQ	0x007
+#define FABSQ	0x00b
+#define FSQRTQ	0x02b
+#define FADDQ	0x043
+#define FSUBQ	0x047
+#define FMULQ	0x04b
+#define FDIVQ	0x04f
+#define FDMULQ	0x06e
+#define FQTOX	0x083
+#define FXTOQ	0x08c
+#define FQTOS	0x0c7
+#define FQTOD	0x0cb
+#define FITOQ	0x0cc
+#define FSTOQ	0x0cd
+#define FDTOQ	0x0ce
+#define FQTOI	0x0d3
+/* SUBNORMAL - ftt == 2 */
+#define FSQRTS	0x029
+#define FSQRTD	0x02a
+#define FADDS	0x041
+#define FADDD	0x042
+#define FSUBS	0x045
+#define FSUBD	0x046
+#define FMULS	0x049
+#define FMULD	0x04a
+#define FDIVS	0x04d
+#define FDIVD	0x04e
+#define FSMULD	0x069
+#define FSTOX	0x081
+#define FDTOX	0x082
+#define FDTOS	0x0c6
+#define FSTOD	0x0c9
+#define FSTOI	0x0d1
+#define FDTOI	0x0d2
+#define FXTOS	0x084 /* Only Ultra-III generates this. */
+#define FXTOD	0x088 /* Only Ultra-III generates this. */
+#if 0	/* Optimized inline in sparc64/kernel/entry.S */
+#define FITOS	0x0c4 /* Only Ultra-III generates this. */
+#endif
+#define FITOD	0x0c8 /* Only Ultra-III generates this. */
+/* FPOP2 */
+#define FCMPQ	0x053
+#define FCMPEQ	0x057
+#define FMOVQ0	0x003
+#define FMOVQ1	0x043
+#define FMOVQ2	0x083
+#define FMOVQ3	0x0c3
+#define FMOVQI	0x103
+#define FMOVQX	0x183
+#define FMOVQZ	0x027
+#define FMOVQLE	0x047
+#define FMOVQLZ 0x067
+#define FMOVQNZ	0x0a7
+#define FMOVQGZ	0x0c7
+#define FMOVQGE 0x0e7
+
+#define FSR_TEM_SHIFT	23UL
+#define FSR_TEM_MASK	(0x1fUL << FSR_TEM_SHIFT)
+#define FSR_AEXC_SHIFT	5UL
+#define FSR_AEXC_MASK	(0x1fUL << FSR_AEXC_SHIFT)
+#define FSR_CEXC_SHIFT	0UL
+#define FSR_CEXC_MASK	(0x1fUL << FSR_CEXC_SHIFT)
+
+/* All routines returning an exception to raise should detect
+ * such exceptions _before_ rounding to be consistent with
+ * the behavior of the hardware in the implemented cases
+ * (and thus with the recommendations in the V9 architecture
+ * manual).
+ *
+ * We return 0 if a SIGFPE should be sent, 1 otherwise.
+ */
+static inline int record_exception(struct pt_regs *regs, int eflag)
+{
+	u64 fsr = current_thread_info()->xfsr[0];
+	int would_trap;
+
+	/* Determine if this exception would have generated a trap. */
+	would_trap = (fsr & ((long)eflag << FSR_TEM_SHIFT)) != 0UL;
+
+	/* If trapping, we only want to signal one bit. */
+	if(would_trap != 0) {
+		eflag &= ((fsr & FSR_TEM_MASK) >> FSR_TEM_SHIFT);
+		if((eflag & (eflag - 1)) != 0) {
+			if(eflag & FP_EX_INVALID)
+				eflag = FP_EX_INVALID;
+			else if(eflag & FP_EX_OVERFLOW)
+				eflag = FP_EX_OVERFLOW;
+			else if(eflag & FP_EX_UNDERFLOW)
+				eflag = FP_EX_UNDERFLOW;
+			else if(eflag & FP_EX_DIVZERO)
+				eflag = FP_EX_DIVZERO;
+			else if(eflag & FP_EX_INEXACT)
+				eflag = FP_EX_INEXACT;
+		}
+	}
+
+	/* Set CEXC, here is the rule:
+	 *
+	 *    In general all FPU ops will set one and only one
+	 *    bit in the CEXC field, this is always the case
+	 *    when the IEEE exception trap is enabled in TEM.
+	 */
+	fsr &= ~(FSR_CEXC_MASK);
+	fsr |= ((long)eflag << FSR_CEXC_SHIFT);
+
+	/* Set the AEXC field, rule is:
+	 *
+	 *    If a trap would not be generated, the
+	 *    CEXC just generated is OR'd into the
+	 *    existing value of AEXC.
+	 */
+	if(would_trap == 0)
+		fsr |= ((long)eflag << FSR_AEXC_SHIFT);
+
+	/* If trapping, indicate fault trap type IEEE. */
+	if(would_trap != 0)
+		fsr |= (1UL << 14);
+
+	current_thread_info()->xfsr[0] = fsr;
+
+	/* If we will not trap, advance the program counter over
+	 * the instruction being handled.
+	 */
+	if(would_trap == 0) {
+		regs->tpc = regs->tnpc;
+		regs->tnpc += 4;
+	}
+
+	return (would_trap ? 0 : 1);
+}
+
+typedef union {
+	u32 s;
+	u64 d;
+	u64 q[2];
+} *argp;
+
+int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap)
+{
+	unsigned long pc = regs->tpc;
+	unsigned long tstate = regs->tstate;
+	u32 insn = 0;
+	int type = 0;
+	/* ftt tells which ftt it may happen in, r is rd, b is rs2 and a is rs1. The *u arg tells
+	   whether the argument should be packed/unpacked (0 - do not unpack/pack, 1 - unpack/pack)
+	   non-u args tells the size of the argument (0 - no argument, 1 - single, 2 - double, 3 - quad */
+#define TYPE(ftt, r, ru, b, bu, a, au) type = (au << 2) | (a << 0) | (bu << 5) | (b << 3) | (ru << 8) | (r << 6) | (ftt << 9)
+	int freg;
+	static u64 zero[2] = { 0L, 0L };
+	int flags;
+	FP_DECL_EX;
+	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
+	FP_DECL_Q(QA); FP_DECL_Q(QB); FP_DECL_Q(QR);
+	int IR;
+	long XR, xfsr;
+
+	if (tstate & TSTATE_PRIV)
+		die_if_kernel("unfinished/unimplemented FPop from kernel", regs);
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+	if (test_thread_flag(TIF_32BIT))
+		pc = (u32)pc;
+	if (get_user(insn, (u32 __user *) pc) != -EFAULT) {
+		if ((insn & 0xc1f80000) == 0x81a00000) /* FPOP1 */ {
+			switch ((insn >> 5) & 0x1ff) {
+			/* QUAD - ftt == 3 */
+			case FMOVQ:
+			case FNEGQ:
+			case FABSQ: TYPE(3,3,0,3,0,0,0); break;
+			case FSQRTQ: TYPE(3,3,1,3,1,0,0); break;
+			case FADDQ:
+			case FSUBQ:
+			case FMULQ:
+			case FDIVQ: TYPE(3,3,1,3,1,3,1); break;
+			case FDMULQ: TYPE(3,3,1,2,1,2,1); break;
+			case FQTOX: TYPE(3,2,0,3,1,0,0); break;
+			case FXTOQ: TYPE(3,3,1,2,0,0,0); break;
+			case FQTOS: TYPE(3,1,1,3,1,0,0); break;
+			case FQTOD: TYPE(3,2,1,3,1,0,0); break;
+			case FITOQ: TYPE(3,3,1,1,0,0,0); break;
+			case FSTOQ: TYPE(3,3,1,1,1,0,0); break;
+			case FDTOQ: TYPE(3,3,1,2,1,0,0); break;
+			case FQTOI: TYPE(3,1,0,3,1,0,0); break;
+
+			/* We can get either unimplemented or unfinished
+			 * for these cases.  Pre-Niagara systems generate
+			 * unfinished fpop for SUBNORMAL cases, and Niagara
+			 * always gives unimplemented fpop for fsqrt{s,d}.
+			 */
+			case FSQRTS: {
+				unsigned long x = current_thread_info()->xfsr[0];
+
+				x = (x >> 14) & 0x7;
+				TYPE(x,1,1,1,1,0,0);
+				break;
+			}
+
+			case FSQRTD: {
+				unsigned long x = current_thread_info()->xfsr[0];
+
+				x = (x >> 14) & 0x7;
+				TYPE(x,2,1,2,1,0,0);
+				break;
+			}
+
+			/* SUBNORMAL - ftt == 2 */
+			case FADDD:
+			case FSUBD:
+			case FMULD:
+			case FDIVD: TYPE(2,2,1,2,1,2,1); break;
+			case FADDS:
+			case FSUBS:
+			case FMULS:
+			case FDIVS: TYPE(2,1,1,1,1,1,1); break;
+			case FSMULD: TYPE(2,2,1,1,1,1,1); break;
+			case FSTOX: TYPE(2,2,0,1,1,0,0); break;
+			case FDTOX: TYPE(2,2,0,2,1,0,0); break;
+			case FDTOS: TYPE(2,1,1,2,1,0,0); break;
+			case FSTOD: TYPE(2,2,1,1,1,0,0); break;
+			case FSTOI: TYPE(2,1,0,1,1,0,0); break;
+			case FDTOI: TYPE(2,1,0,2,1,0,0); break;
+
+			/* Only Ultra-III generates these */
+			case FXTOS: TYPE(2,1,1,2,0,0,0); break;
+			case FXTOD: TYPE(2,2,1,2,0,0,0); break;
+#if 0			/* Optimized inline in sparc64/kernel/entry.S */
+			case FITOS: TYPE(2,1,1,1,0,0,0); break;
+#endif
+			case FITOD: TYPE(2,2,1,1,0,0,0); break;
+			}
+		}
+		else if ((insn & 0xc1f80000) == 0x81a80000) /* FPOP2 */ {
+			IR = 2;
+			switch ((insn >> 5) & 0x1ff) {
+			case FCMPQ: TYPE(3,0,0,3,1,3,1); break;
+			case FCMPEQ: TYPE(3,0,0,3,1,3,1); break;
+			/* Now the conditional fmovq support */
+			case FMOVQ0:
+			case FMOVQ1:
+			case FMOVQ2:
+			case FMOVQ3:
+				/* fmovq %fccX, %fY, %fZ */
+				if (!((insn >> 11) & 3))
+					XR = current_thread_info()->xfsr[0] >> 10;
+				else
+					XR = current_thread_info()->xfsr[0] >> (30 + ((insn >> 10) & 0x6));
+				XR &= 3;
+				IR = 0;
+				switch ((insn >> 14) & 0x7) {
+				/* case 0: IR = 0; break; */			/* Never */
+				case 1: if (XR) IR = 1; break;			/* Not Equal */
+				case 2: if (XR == 1 || XR == 2) IR = 1; break;	/* Less or Greater */
+				case 3: if (XR & 1) IR = 1; break;		/* Unordered or Less */
+				case 4: if (XR == 1) IR = 1; break;		/* Less */
+				case 5: if (XR & 2) IR = 1; break;		/* Unordered or Greater */
+				case 6: if (XR == 2) IR = 1; break;		/* Greater */
+				case 7: if (XR == 3) IR = 1; break;		/* Unordered */
+				}
+				if ((insn >> 14) & 8)
+					IR ^= 1;
+				break;
+			case FMOVQI:
+			case FMOVQX:
+				/* fmovq %[ix]cc, %fY, %fZ */
+				XR = regs->tstate >> 32;
+				if ((insn >> 5) & 0x80)
+					XR >>= 4;
+				XR &= 0xf;
+				IR = 0;
+				freg = ((XR >> 2) ^ XR) & 2;
+				switch ((insn >> 14) & 0x7) {
+				/* case 0: IR = 0; break; */			/* Never */
+				case 1: if (XR & 4) IR = 1; break;		/* Equal */
+				case 2: if ((XR & 4) || freg) IR = 1; break;	/* Less or Equal */
+				case 3: if (freg) IR = 1; break;		/* Less */
+				case 4: if (XR & 5) IR = 1; break;		/* Less or Equal Unsigned */
+				case 5: if (XR & 1) IR = 1; break;		/* Carry Set */
+				case 6: if (XR & 8) IR = 1; break;		/* Negative */
+				case 7: if (XR & 2) IR = 1; break;		/* Overflow Set */
+				}
+				if ((insn >> 14) & 8)
+					IR ^= 1;
+				break;
+			case FMOVQZ:
+			case FMOVQLE:
+			case FMOVQLZ:
+			case FMOVQNZ:
+			case FMOVQGZ:
+			case FMOVQGE:
+				freg = (insn >> 14) & 0x1f;
+				if (!freg)
+					XR = 0;
+				else if (freg < 16)
+					XR = regs->u_regs[freg];
+				else if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) {
+					struct reg_window32 __user *win32;
+					flushw_user ();
+					win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
+					get_user(XR, &win32->locals[freg - 16]);
+				} else {
+					struct reg_window __user *win;
+					flushw_user ();
+					win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
+					get_user(XR, &win->locals[freg - 16]);
+				}
+				IR = 0;
+				switch ((insn >> 10) & 3) {
+				case 1: if (!XR) IR = 1; break;			/* Register Zero */
+				case 2: if (XR <= 0) IR = 1; break;		/* Register Less Than or Equal to Zero */
+				case 3: if (XR < 0) IR = 1; break;		/* Register Less Than Zero */
+				}
+				if ((insn >> 10) & 4)
+					IR ^= 1;
+				break;
+			}
+			if (IR == 0) {
+				/* The fmov test was false. Do a nop instead */
+				current_thread_info()->xfsr[0] &= ~(FSR_CEXC_MASK);
+				regs->tpc = regs->tnpc;
+				regs->tnpc += 4;
+				return 1;
+			} else if (IR == 1) {
+				/* Change the instruction into plain fmovq */
+				insn = (insn & 0x3e00001f) | 0x81a00060;
+				TYPE(3,3,0,3,0,0,0); 
+			}
+		}
+	}
+	if (type) {
+		argp rs1 = NULL, rs2 = NULL, rd = NULL;
+		
+		/* Starting with UltraSPARC-T2, the cpu does not set the FP Trap
+		 * Type field in the %fsr to unimplemented_FPop.  Nor does it
+		 * use the fp_exception_other trap.  Instead it signals an
+		 * illegal instruction and leaves the FP trap type field of
+		 * the %fsr unchanged.
+		 */
+		if (!illegal_insn_trap) {
+			int ftt = (current_thread_info()->xfsr[0] >> 14) & 0x7;
+			if (ftt != (type >> 9))
+				goto err;
+		}
+		current_thread_info()->xfsr[0] &= ~0x1c000;
+		freg = ((insn >> 14) & 0x1f);
+		switch (type & 0x3) {
+		case 3: if (freg & 2) {
+				current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
+				goto err;
+			}
+		case 2: freg = ((freg & 1) << 5) | (freg & 0x1e);
+		case 1: rs1 = (argp)&f->regs[freg];
+			flags = (freg < 32) ? FPRS_DL : FPRS_DU; 
+			if (!(current_thread_info()->fpsaved[0] & flags))
+				rs1 = (argp)&zero;
+			break;
+		}
+		switch (type & 0x7) {
+		case 7: FP_UNPACK_QP (QA, rs1); break;
+		case 6: FP_UNPACK_DP (DA, rs1); break;
+		case 5: FP_UNPACK_SP (SA, rs1); break;
+		}
+		freg = (insn & 0x1f);
+		switch ((type >> 3) & 0x3) {
+		case 3: if (freg & 2) {
+				current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
+				goto err;
+			}
+		case 2: freg = ((freg & 1) << 5) | (freg & 0x1e);
+		case 1: rs2 = (argp)&f->regs[freg];
+			flags = (freg < 32) ? FPRS_DL : FPRS_DU; 
+			if (!(current_thread_info()->fpsaved[0] & flags))
+				rs2 = (argp)&zero;
+			break;
+		}
+		switch ((type >> 3) & 0x7) {
+		case 7: FP_UNPACK_QP (QB, rs2); break;
+		case 6: FP_UNPACK_DP (DB, rs2); break;
+		case 5: FP_UNPACK_SP (SB, rs2); break;
+		}
+		freg = ((insn >> 25) & 0x1f);
+		switch ((type >> 6) & 0x3) {
+		case 3: if (freg & 2) {
+				current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */;
+				goto err;
+			}
+		case 2: freg = ((freg & 1) << 5) | (freg & 0x1e);
+		case 1: rd = (argp)&f->regs[freg];
+			flags = (freg < 32) ? FPRS_DL : FPRS_DU; 
+			if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) {
+				current_thread_info()->fpsaved[0] = FPRS_FEF;
+				current_thread_info()->gsr[0] = 0;
+			}
+			if (!(current_thread_info()->fpsaved[0] & flags)) {
+				if (freg < 32)
+					memset(f->regs, 0, 32*sizeof(u32));
+				else
+					memset(f->regs+32, 0, 32*sizeof(u32));
+			}
+			current_thread_info()->fpsaved[0] |= flags;
+			break;
+		}
+		switch ((insn >> 5) & 0x1ff) {
+		/* + */
+		case FADDS: FP_ADD_S (SR, SA, SB); break;
+		case FADDD: FP_ADD_D (DR, DA, DB); break;
+		case FADDQ: FP_ADD_Q (QR, QA, QB); break;
+		/* - */
+		case FSUBS: FP_SUB_S (SR, SA, SB); break;
+		case FSUBD: FP_SUB_D (DR, DA, DB); break;
+		case FSUBQ: FP_SUB_Q (QR, QA, QB); break;
+		/* * */
+		case FMULS: FP_MUL_S (SR, SA, SB); break;
+		case FSMULD: FP_CONV (D, S, 1, 1, DA, SA);
+			     FP_CONV (D, S, 1, 1, DB, SB);
+		case FMULD: FP_MUL_D (DR, DA, DB); break;
+		case FDMULQ: FP_CONV (Q, D, 2, 1, QA, DA);
+			     FP_CONV (Q, D, 2, 1, QB, DB);
+		case FMULQ: FP_MUL_Q (QR, QA, QB); break;
+		/* / */
+		case FDIVS: FP_DIV_S (SR, SA, SB); break;
+		case FDIVD: FP_DIV_D (DR, DA, DB); break;
+		case FDIVQ: FP_DIV_Q (QR, QA, QB); break;
+		/* sqrt */
+		case FSQRTS: FP_SQRT_S (SR, SB); break;
+		case FSQRTD: FP_SQRT_D (DR, DB); break;
+		case FSQRTQ: FP_SQRT_Q (QR, QB); break;
+		/* mov */
+		case FMOVQ: rd->q[0] = rs2->q[0]; rd->q[1] = rs2->q[1]; break;
+		case FABSQ: rd->q[0] = rs2->q[0] & 0x7fffffffffffffffUL; rd->q[1] = rs2->q[1]; break;
+		case FNEGQ: rd->q[0] = rs2->q[0] ^ 0x8000000000000000UL; rd->q[1] = rs2->q[1]; break;
+		/* float to int */
+		case FSTOI: FP_TO_INT_S (IR, SB, 32, 1); break;
+		case FDTOI: FP_TO_INT_D (IR, DB, 32, 1); break;
+		case FQTOI: FP_TO_INT_Q (IR, QB, 32, 1); break;
+		case FSTOX: FP_TO_INT_S (XR, SB, 64, 1); break;
+		case FDTOX: FP_TO_INT_D (XR, DB, 64, 1); break;
+		case FQTOX: FP_TO_INT_Q (XR, QB, 64, 1); break;
+		/* int to float */
+		case FITOQ: IR = rs2->s; FP_FROM_INT_Q (QR, IR, 32, int); break;
+		case FXTOQ: XR = rs2->d; FP_FROM_INT_Q (QR, XR, 64, long); break;
+		/* Only Ultra-III generates these */
+		case FXTOS: XR = rs2->d; FP_FROM_INT_S (SR, XR, 64, long); break;
+		case FXTOD: XR = rs2->d; FP_FROM_INT_D (DR, XR, 64, long); break;
+#if 0		/* Optimized inline in sparc64/kernel/entry.S */
+		case FITOS: IR = rs2->s; FP_FROM_INT_S (SR, IR, 32, int); break;
+#endif
+		case FITOD: IR = rs2->s; FP_FROM_INT_D (DR, IR, 32, int); break;
+		/* float to float */
+		case FSTOD: FP_CONV (D, S, 1, 1, DR, SB); break;
+		case FSTOQ: FP_CONV (Q, S, 2, 1, QR, SB); break;
+		case FDTOQ: FP_CONV (Q, D, 2, 1, QR, DB); break;
+		case FDTOS: FP_CONV (S, D, 1, 1, SR, DB); break;
+		case FQTOS: FP_CONV (S, Q, 1, 2, SR, QB); break;
+		case FQTOD: FP_CONV (D, Q, 1, 2, DR, QB); break;
+		/* comparison */
+		case FCMPQ:
+		case FCMPEQ:
+			FP_CMP_Q(XR, QB, QA, 3);
+			if (XR == 3 &&
+			    (((insn >> 5) & 0x1ff) == FCMPEQ ||
+			     FP_ISSIGNAN_Q(QA) ||
+			     FP_ISSIGNAN_Q(QB)))
+				FP_SET_EXCEPTION (FP_EX_INVALID);
+		}
+		if (!FP_INHIBIT_RESULTS) {
+			switch ((type >> 6) & 0x7) {
+			case 0: xfsr = current_thread_info()->xfsr[0];
+				if (XR == -1) XR = 2;
+				switch (freg & 3) {
+				/* fcc0, 1, 2, 3 */
+				case 0: xfsr &= ~0xc00; xfsr |= (XR << 10); break;
+				case 1: xfsr &= ~0x300000000UL; xfsr |= (XR << 32); break;
+				case 2: xfsr &= ~0xc00000000UL; xfsr |= (XR << 34); break;
+				case 3: xfsr &= ~0x3000000000UL; xfsr |= (XR << 36); break;
+				}
+				current_thread_info()->xfsr[0] = xfsr;
+				break;
+			case 1: rd->s = IR; break;
+			case 2: rd->d = XR; break;
+			case 5: FP_PACK_SP (rd, SR); break;
+			case 6: FP_PACK_DP (rd, DR); break;
+			case 7: FP_PACK_QP (rd, QR); break;
+			}
+		}
+
+		if(_fex != 0)
+			return record_exception(regs, _fex);
+
+		/* Success and no exceptions detected. */
+		current_thread_info()->xfsr[0] &= ~(FSR_CEXC_MASK);
+		regs->tpc = regs->tnpc;
+		regs->tnpc += 4;
+		return 1;
+	}
+err:	return 0;
+}
diff --git a/arch/sparc/math-emu/sfp-util_32.h b/arch/sparc/math-emu/sfp-util_32.h
new file mode 100644
index 0000000..b57375f
--- /dev/null
+++ b/arch/sparc/math-emu/sfp-util_32.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) 				\
+  __asm__ ("addcc %r4,%5,%1\n\t"					\
+	   "addx %r2,%3,%0\n"						\
+	   : "=r" (sh),							\
+	     "=&r" (sl)							\
+	   : "%rJ" ((USItype)(ah)),					\
+	     "rI" ((USItype)(bh)),					\
+	     "%rJ" ((USItype)(al)),					\
+	     "rI" ((USItype)(bl))					\
+	   : "cc")
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) 				\
+  __asm__ ("subcc %r4,%5,%1\n\t"					\
+	   "subx %r2,%3,%0\n"						\
+	   : "=r" (sh),							\
+	     "=&r" (sl)							\
+	   : "rJ" ((USItype)(ah)),					\
+	     "rI" ((USItype)(bh)),					\
+	     "rJ" ((USItype)(al)),					\
+	     "rI" ((USItype)(bl))					\
+	   : "cc")
+
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("! Inlined umul_ppmm\n\t"					\
+	"wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n\t" \
+	"sra	%3,31,%%g2	! Don't move this insn\n\t"		\
+	"and	%2,%%g2,%%g2	! Don't move this insn\n\t"		\
+	"andcc	%%g0,0,%%g1	! Don't move this insn\n\t"		\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,%3,%%g1\n\t"					\
+	"mulscc	%%g1,0,%%g1\n\t" 					\
+	"add	%%g1,%%g2,%0\n\t" 					\
+	"rd	%%y,%1\n"						\
+	   : "=r" (w1),							\
+	     "=r" (w0)							\
+	   : "%rI" ((USItype)(u)),					\
+	     "r" ((USItype)(v))						\
+	   : "%g1", "%g2", "cc")
+
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n\t"					\
+	   "mov	32,%%g1\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "1:	bcs	5f\n\t"						\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "addx	%1,%1,%1	! so this can't give carry\n\t"	\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "2:	bne	1b\n\t"						\
+	   "subcc	%1,%2,%%g0\n\t"					\
+	   "bcs	3f\n\t"							\
+	   "addxcc %0,%0,%0	! shift n1n0 and a q-bit in lsb\n\t"	\
+	   "b		3f\n\t"						\
+	   "sub	%1,%2,%1	! this kills msb of n\n\t"		\
+	   "4:	sub	%1,%2,%1\n\t"					\
+	   "5:	addxcc	%1,%1,%1\n\t"					\
+	   "bcc	2b\n\t"							\
+	   "subcc	%%g1,1,%%g1\n\t"				\
+	   "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
+	   "bne	4b\n\t"							\
+	   "addcc	%0,%0,%0	! shift n1n0 and a 0-bit in lsb\n\t" \
+	   "sub	%1,%2,%1\n\t"						\
+	   "3:	xnor	%0,0,%0\n\t"					\
+	   "! End of inline udiv_qrnnd\n"				\
+	   : "=&r" (q),							\
+	     "=&r" (r)							\
+	   : "r" ((USItype)(d)),					\
+	     "1" ((USItype)(n1)),					\
+	     "0" ((USItype)(n0)) : "%g1", "cc")
+#define UDIV_NEEDS_NORMALIZATION 0
+
+#define abort()								\
+	return 0
+
+#ifdef __BIG_ENDIAN
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
diff --git a/arch/sparc/math-emu/sfp-util_64.h b/arch/sparc/math-emu/sfp-util_64.h
new file mode 100644
index 0000000..8fdb55a
--- /dev/null
+++ b/arch/sparc/math-emu/sfp-util_64.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/sparc64/math-emu/sfp-util.h
+ *
+ * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) 	\
+  __asm__ ("addcc %4,%5,%1\n\t"			\
+	   "add %2,%3,%0\n\t"			\
+  	   "bcs,a,pn %%xcc, 1f\n\t"		\
+  	   "add %0, 1, %0\n"			\
+  	   "1:"					\
+	   : "=r" (sh),				\
+	     "=&r" (sl)				\
+	   : "r" ((UDItype)(ah)),		\
+	     "r" ((UDItype)(bh)),		\
+	     "r" ((UDItype)(al)),		\
+	     "r" ((UDItype)(bl))		\
+	   : "cc")
+	   
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) 	\
+  __asm__ ("subcc %4,%5,%1\n\t"			\
+  	   "sub %2,%3,%0\n\t"			\
+  	   "bcs,a,pn %%xcc, 1f\n\t"		\
+  	   "sub %0, 1, %0\n"			\
+  	   "1:"					\
+	   : "=r" (sh),				\
+	     "=&r" (sl)				\
+	   : "r" ((UDItype)(ah)),		\
+	     "r" ((UDItype)(bh)),		\
+	     "r" ((UDItype)(al)),		\
+	     "r" ((UDItype)(bl))		\
+	   : "cc")
+
+#define umul_ppmm(wh, wl, u, v)				\
+  do {							\
+	  UDItype tmp1, tmp2, tmp3, tmp4;		\
+	  __asm__ __volatile__ (			\
+		   "srl %7,0,%3\n\t"			\
+		   "mulx %3,%6,%1\n\t"			\
+		   "srlx %6,32,%2\n\t"			\
+		   "mulx %2,%3,%4\n\t"			\
+		   "sllx %4,32,%5\n\t"			\
+		   "srl %6,0,%3\n\t"			\
+		   "sub %1,%5,%5\n\t"			\
+		   "srlx %5,32,%5\n\t"			\
+		   "addcc %4,%5,%4\n\t"			\
+		   "srlx %7,32,%5\n\t"			\
+		   "mulx %3,%5,%3\n\t"			\
+		   "mulx %2,%5,%5\n\t"			\
+		   "sethi %%hi(0x80000000),%2\n\t"	\
+		   "addcc %4,%3,%4\n\t"			\
+		   "srlx %4,32,%4\n\t"			\
+		   "add %2,%2,%2\n\t"			\
+		   "movcc %%xcc,%%g0,%2\n\t"		\
+		   "addcc %5,%4,%5\n\t"			\
+		   "sllx %3,32,%3\n\t"			\
+		   "add %1,%3,%1\n\t"			\
+		   "add %5,%2,%0"			\
+	   : "=r" (wh),					\
+	     "=&r" (wl),				\
+	     "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \
+	   : "r" ((UDItype)(u)),			\
+	     "r" ((UDItype)(v))				\
+	   : "cc");					\
+  } while (0)
+  
+#define udiv_qrnnd(q, r, n1, n0, d) 			\
+  do {                                                  \
+    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;     \
+    __d1 = (d >> 32);                                   \
+    __d0 = (USItype)d;                                  \
+                                                        \
+    __r1 = (n1) % __d1;                                 \
+    __q1 = (n1) / __d1;                                 \
+    __m = (UWtype) __q1 * __d0;                         \
+    __r1 = (__r1 << 32) | (n0 >> 32);                   \
+    if (__r1 < __m)                                     \
+      {                                                 \
+        __q1--, __r1 += (d);                            \
+        if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
+          if (__r1 < __m)                               \
+            __q1--, __r1 += (d);                        \
+      }                                                 \
+    __r1 -= __m;                                        \
+                                                        \
+    __r0 = __r1 % __d1;                                 \
+    __q0 = __r1 / __d1;                                 \
+    __m = (UWtype) __q0 * __d0;                         \
+    __r0 = (__r0 << 32) | ((USItype)n0);                \
+    if (__r0 < __m)                                     \
+      {                                                 \
+        __q0--, __r0 += (d);                            \
+        if (__r0 >= (d))                                \
+          if (__r0 < __m)                               \
+            __q0--, __r0 += (d);                        \
+      }                                                 \
+    __r0 -= __m;                                        \
+                                                        \
+    (q) = (UWtype) (__q1 << 32)  | __q0;                \
+    (r) = __r0;                                         \
+  } while (0)
+
+#define UDIV_NEEDS_NORMALIZATION 1  
+
+#define abort() \
+	return 0
+
+#ifdef __BIG_ENDIAN
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
new file mode 100644
index 0000000..d39075b
--- /dev/null
+++ b/arch/sparc/mm/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for the linux Sparc-specific parts of the memory manager.
+#
+
+asflags-y := -ansi
+ccflags-y := -Werror
+
+obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o gup.o
+obj-y                   += fault_$(BITS).o
+obj-y                   += init_$(BITS).o
+obj-$(CONFIG_SPARC32)   += extable.o srmmu.o iommu.o io-unit.o
+obj-$(CONFIG_SPARC32)   += srmmu_access.o
+obj-$(CONFIG_SPARC32)   += hypersparc.o viking.o tsunami.o swift.o
+obj-$(CONFIG_SPARC32)   += leon_mm.o
+
+# Only used by sparc64
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+
+# Only used by sparc32
+obj-$(CONFIG_HIGHMEM)   += highmem.o
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
new file mode 100644
index 0000000..241b406
--- /dev/null
+++ b/arch/sparc/mm/extable.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/arch/sparc/mm/extable.c
+ */
+
+#include <linux/module.h>
+#include <linux/extable.h>
+#include <linux/uaccess.h>
+
+void sort_extable(struct exception_table_entry *start,
+		  struct exception_table_entry *finish)
+{
+}
+
+/* Caller knows they are in a range if ret->fixup == 0 */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *base,
+	       const size_t num,
+	       unsigned long value)
+{
+	int i;
+
+	/* Single insn entries are encoded as:
+	 *	word 1:	insn address
+	 *	word 2:	fixup code address
+	 *
+	 * Range entries are encoded as:
+	 *	word 1: first insn address
+	 *	word 2: 0
+	 *	word 3: last insn address + 4 bytes
+	 *	word 4: fixup code address
+	 *
+	 * Deleted entries are encoded as:
+	 *	word 1: unused
+	 *	word 2: -1
+	 *
+	 * See asm/uaccess.h for more details.
+	 */
+
+	/* 1. Try to find an exact match. */
+	for (i = 0; i < num; i++) {
+		if (base[i].fixup == 0) {
+			/* A range entry, skip both parts. */
+			i++;
+			continue;
+		}
+
+		/* A deleted entry; see trim_init_extable */
+		if (base[i].fixup == -1)
+			continue;
+
+		if (base[i].insn == value)
+			return &base[i];
+	}
+
+	/* 2. Try to find a range match. */
+	for (i = 0; i < (num - 1); i++) {
+		if (base[i].fixup)
+			continue;
+
+		if (base[i].insn <= value && base[i + 1].insn > value)
+			return &base[i];
+
+		i++;
+	}
+
+        return NULL;
+}
+
+#ifdef CONFIG_MODULES
+/* We could memmove them around; easier to mark the trimmed ones. */
+void trim_init_extable(struct module *m)
+{
+	unsigned int i;
+	bool range;
+
+	for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
+		range = m->extable[i].fixup == 0;
+
+		if (within_module_init(m->extable[i].insn, m)) {
+			m->extable[i].fixup = -1;
+			if (range)
+				m->extable[i+1].fixup = -1;
+		}
+		if (range)
+			i++;
+	}
+}
+#endif /* CONFIG_MODULES */
+
+/* Special extable search, which handles ranges.  Returns fixup */
+unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
+{
+	const struct exception_table_entry *entry;
+
+	entry = search_exception_tables(addr);
+	if (!entry)
+		return 0;
+
+	/* Inside range?  Fix g2 and return correct fixup */
+	if (!entry->fixup) {
+		*g2 = (addr - entry->insn) / 4;
+		return (entry + 1)->fixup;
+	}
+
+	return entry->fixup;
+}
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
new file mode 100644
index 0000000..b0440b0
--- /dev/null
+++ b/arch/sparc/mm/fault_32.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fault.c:  Page fault handlers for the Sparc.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <asm/head.h>
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/perf_event.h>
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/traps.h>
+
+#include "mm_32.h"
+
+int show_unhandled_signals = 1;
+
+static void __noreturn unhandled_fault(unsigned long address,
+				       struct task_struct *tsk,
+				       struct pt_regs *regs)
+{
+	if ((unsigned long) address < PAGE_SIZE) {
+		printk(KERN_ALERT
+		    "Unable to handle kernel NULL pointer dereference\n");
+	} else {
+		printk(KERN_ALERT "Unable to handle kernel paging request at virtual address %08lx\n",
+		       address);
+	}
+	printk(KERN_ALERT "tsk->{mm,active_mm}->context = %08lx\n",
+		(tsk->mm ? tsk->mm->context : tsk->active_mm->context));
+	printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %08lx\n",
+		(tsk->mm ? (unsigned long) tsk->mm->pgd :
+			(unsigned long) tsk->active_mm->pgd));
+	die_if_kernel("Oops", regs);
+}
+
+asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
+			    unsigned long address)
+{
+	struct pt_regs regs;
+	unsigned long g2;
+	unsigned int insn;
+	int i;
+
+	i = search_extables_range(ret_pc, &g2);
+	switch (i) {
+	case 3:
+		/* load & store will be handled by fixup */
+		return 3;
+
+	case 1:
+		/* store will be handled by fixup, load will bump out */
+		/* for _to_ macros */
+		insn = *((unsigned int *) pc);
+		if ((insn >> 21) & 1)
+			return 1;
+		break;
+
+	case 2:
+		/* load will be handled by fixup, store will bump out */
+		/* for _from_ macros */
+		insn = *((unsigned int *) pc);
+		if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15)
+			return 2;
+		break;
+
+	default:
+		break;
+	}
+
+	memset(&regs, 0, sizeof(regs));
+	regs.pc = pc;
+	regs.npc = pc + 4;
+	__asm__ __volatile__(
+		"rd %%psr, %0\n\t"
+		"nop\n\t"
+		"nop\n\t"
+		"nop\n" : "=r" (regs.psr));
+	unhandled_fault(address, current, &regs);
+
+	/* Not reached */
+	return 0;
+}
+
+static inline void
+show_signal_msg(struct pt_regs *regs, int sig, int code,
+		unsigned long address, struct task_struct *tsk)
+{
+	if (!unhandled_signal(tsk, sig))
+		return;
+
+	if (!printk_ratelimit())
+		return;
+
+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
+	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+	       tsk->comm, task_pid_nr(tsk), address,
+	       (void *)regs->pc, (void *)regs->u_regs[UREG_I7],
+	       (void *)regs->u_regs[UREG_FP], code);
+
+	print_vma_addr(KERN_CONT " in ", regs->pc);
+
+	printk(KERN_CONT "\n");
+}
+
+static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs,
+			       unsigned long addr)
+{
+	if (unlikely(show_unhandled_signals))
+		show_signal_msg(regs, sig, code,
+				addr, current);
+
+	force_sig_fault(sig, code, (void __user *) addr, 0, current);
+}
+
+static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault)
+{
+	unsigned int insn;
+
+	if (text_fault)
+		return regs->pc;
+
+	if (regs->psr & PSR_PS)
+		insn = *(unsigned int *) regs->pc;
+	else
+		__get_user(insn, (unsigned int *) regs->pc);
+
+	return safe_compute_effective_address(regs, insn);
+}
+
+static noinline void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
+				      int text_fault)
+{
+	unsigned long addr = compute_si_addr(regs, text_fault);
+
+	__do_fault_siginfo(code, sig, regs, addr);
+}
+
+asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
+			       unsigned long address)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+	unsigned int fixup;
+	unsigned long g2;
+	int from_user = !(regs->psr & PSR_PS);
+	int code;
+	vm_fault_t fault;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	if (text_fault)
+		address = regs->pc;
+
+	/*
+	 * We fault-in kernel-space virtual memory on-demand. The
+	 * 'reference' page table is init_mm.pgd.
+	 *
+	 * NOTE! We MUST NOT take any locks for this case. We may
+	 * be in an interrupt or a critical region, and should
+	 * only copy the information from the master page table,
+	 * nothing more.
+	 */
+	code = SEGV_MAPERR;
+	if (address >= TASK_SIZE)
+		goto vmalloc_fault;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (pagefault_disabled() || !mm)
+		goto no_context;
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+retry:
+	down_read(&mm->mmap_sem);
+
+	if (!from_user && address >= PAGE_OFFSET)
+		goto bad_area;
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+good_area:
+	code = SEGV_ACCERR;
+	if (write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	} else {
+		/* Allow reads even for write-only mappings */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	if (from_user)
+		flags |= FAULT_FLAG_USER;
+	if (write)
+		flags |= FAULT_FLAG_WRITE;
+
+	/*
+	 * If for any reason at all we couldn't handle the fault,
+	 * make sure we exit gracefully rather than endlessly redo
+	 * the fault.
+	 */
+	fault = handle_mm_fault(vma, address, flags);
+
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+		return;
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			current->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
+				      1, regs, address);
+		} else {
+			current->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
+				      1, regs, address);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+
+			/* No need to up_read(&mm->mmap_sem) as we would
+			 * have already released it in __lock_page_or_retry
+			 * in mm/filemap.c.
+			 */
+
+			goto retry;
+		}
+	}
+
+	up_read(&mm->mmap_sem);
+	return;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+bad_area:
+	up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+	/* User mode accesses just cause a SIGSEGV */
+	if (from_user) {
+		do_fault_siginfo(code, SIGSEGV, regs, text_fault);
+		return;
+	}
+
+	/* Is this in ex_table? */
+no_context:
+	g2 = regs->u_regs[UREG_G2];
+	if (!from_user) {
+		fixup = search_extables_range(regs->pc, &g2);
+		/* Values below 10 are reserved for other things */
+		if (fixup > 10) {
+			extern const unsigned int __memset_start[];
+			extern const unsigned int __memset_end[];
+			extern const unsigned int __csum_partial_copy_start[];
+			extern const unsigned int __csum_partial_copy_end[];
+
+#ifdef DEBUG_EXCEPTIONS
+			printk("Exception: PC<%08lx> faddr<%08lx>\n",
+			       regs->pc, address);
+			printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n",
+				regs->pc, fixup, g2);
+#endif
+			if ((regs->pc >= (unsigned long)__memset_start &&
+			     regs->pc < (unsigned long)__memset_end) ||
+			    (regs->pc >= (unsigned long)__csum_partial_copy_start &&
+			     regs->pc < (unsigned long)__csum_partial_copy_end)) {
+				regs->u_regs[UREG_I4] = address;
+				regs->u_regs[UREG_I5] = regs->pc;
+			}
+			regs->u_regs[UREG_G2] = g2;
+			regs->pc = fixup;
+			regs->npc = regs->pc + 4;
+			return;
+		}
+	}
+
+	unhandled_fault(address, tsk, regs);
+	do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	up_read(&mm->mmap_sem);
+	if (from_user) {
+		pagefault_out_of_memory();
+		return;
+	}
+	goto no_context;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, text_fault);
+	if (!from_user)
+		goto no_context;
+
+vmalloc_fault:
+	{
+		/*
+		 * Synchronize this task's top level page-table
+		 * with the 'reference' page table.
+		 */
+		int offset = pgd_index(address);
+		pgd_t *pgd, *pgd_k;
+		pmd_t *pmd, *pmd_k;
+
+		pgd = tsk->active_mm->pgd + offset;
+		pgd_k = init_mm.pgd + offset;
+
+		if (!pgd_present(*pgd)) {
+			if (!pgd_present(*pgd_k))
+				goto bad_area_nosemaphore;
+			pgd_val(*pgd) = pgd_val(*pgd_k);
+			return;
+		}
+
+		pmd = pmd_offset(pgd, address);
+		pmd_k = pmd_offset(pgd_k, address);
+
+		if (pmd_present(*pmd) || !pmd_present(*pmd_k))
+			goto bad_area_nosemaphore;
+
+		*pmd = *pmd_k;
+		return;
+	}
+}
+
+/* This always deals with user addresses. */
+static void force_user_fault(unsigned long address, int write)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+	unsigned int flags = FAULT_FLAG_USER;
+	int code;
+
+	code = SEGV_MAPERR;
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+good_area:
+	code = SEGV_ACCERR;
+	if (write) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+		flags |= FAULT_FLAG_WRITE;
+	} else {
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+	switch (handle_mm_fault(vma, address, flags)) {
+	case VM_FAULT_SIGBUS:
+	case VM_FAULT_OOM:
+		goto do_sigbus;
+	}
+	up_read(&mm->mmap_sem);
+	return;
+bad_area:
+	up_read(&mm->mmap_sem);
+	__do_fault_siginfo(code, SIGSEGV, tsk->thread.kregs, address);
+	return;
+
+do_sigbus:
+	up_read(&mm->mmap_sem);
+	__do_fault_siginfo(BUS_ADRERR, SIGBUS, tsk->thread.kregs, address);
+}
+
+static void check_stack_aligned(unsigned long sp)
+{
+	if (sp & 0x7UL)
+		force_sig(SIGILL, current);
+}
+
+void window_overflow_fault(void)
+{
+	unsigned long sp;
+
+	sp = current_thread_info()->rwbuf_stkptrs[0];
+	if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
+		force_user_fault(sp + 0x38, 1);
+	force_user_fault(sp, 1);
+
+	check_stack_aligned(sp);
+}
+
+void window_underflow_fault(unsigned long sp)
+{
+	if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
+		force_user_fault(sp + 0x38, 0);
+	force_user_fault(sp, 0);
+
+	check_stack_aligned(sp);
+}
+
+void window_ret_fault(struct pt_regs *regs)
+{
+	unsigned long sp;
+
+	sp = regs->u_regs[UREG_FP];
+	if (((sp + 0x38) & PAGE_MASK) != (sp & PAGE_MASK))
+		force_user_fault(sp + 0x38, 0);
+	force_user_fault(sp, 0);
+
+	check_stack_aligned(sp);
+}
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
new file mode 100644
index 0000000..8f8a604
--- /dev/null
+++ b/arch/sparc/mm/fault_64.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc.
+ *
+ * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
+ */
+
+#include <asm/head.h>
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/extable.h>
+#include <linux/init.h>
+#include <linux/perf_event.h>
+#include <linux/interrupt.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/percpu.h>
+#include <linux/context_tracking.h>
+#include <linux/uaccess.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/asi.h>
+#include <asm/lsu.h>
+#include <asm/sections.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+
+int show_unhandled_signals = 1;
+
+static inline __kprobes int notify_page_fault(struct pt_regs *regs)
+{
+	int ret = 0;
+
+	/* kprobe_running() needs smp_processor_id() */
+	if (kprobes_built_in() && !user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, 0))
+			ret = 1;
+		preempt_enable();
+	}
+	return ret;
+}
+
+static void __kprobes unhandled_fault(unsigned long address,
+				      struct task_struct *tsk,
+				      struct pt_regs *regs)
+{
+	if ((unsigned long) address < PAGE_SIZE) {
+		printk(KERN_ALERT "Unable to handle kernel NULL "
+		       "pointer dereference\n");
+	} else {
+		printk(KERN_ALERT "Unable to handle kernel paging request "
+		       "at virtual address %016lx\n", (unsigned long)address);
+	}
+	printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n",
+	       (tsk->mm ?
+		CTX_HWBITS(tsk->mm->context) :
+		CTX_HWBITS(tsk->active_mm->context)));
+	printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n",
+	       (tsk->mm ? (unsigned long) tsk->mm->pgd :
+		          (unsigned long) tsk->active_mm->pgd));
+	die_if_kernel("Oops", regs);
+}
+
+static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
+{
+	printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
+	       regs->tpc);
+	printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]);
+	printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]);
+	printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
+	dump_stack();
+	unhandled_fault(regs->tpc, current, regs);
+}
+
+/*
+ * We now make sure that mmap_sem is held in all paths that call 
+ * this. Additionally, to prevent kswapd from ripping ptes from
+ * under us, raise interrupts around the time that we look at the
+ * pte, kswapd will have to wait to get his smp ipi response from
+ * us. vmtruncate likewise. This saves us having to get pte lock.
+ */
+static unsigned int get_user_insn(unsigned long tpc)
+{
+	pgd_t *pgdp = pgd_offset(current->mm, tpc);
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep, pte;
+	unsigned long pa;
+	u32 insn = 0;
+
+	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
+		goto out;
+	pudp = pud_offset(pgdp, tpc);
+	if (pud_none(*pudp) || unlikely(pud_bad(*pudp)))
+		goto out;
+
+	/* This disables preemption for us as well. */
+	local_irq_disable();
+
+	pmdp = pmd_offset(pudp, tpc);
+	if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
+		goto out_irq_enable;
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	if (is_hugetlb_pmd(*pmdp)) {
+		pa  = pmd_pfn(*pmdp) << PAGE_SHIFT;
+		pa += tpc & ~HPAGE_MASK;
+
+		/* Use phys bypass so we don't pollute dtlb/dcache. */
+		__asm__ __volatile__("lduwa [%1] %2, %0"
+				     : "=r" (insn)
+				     : "r" (pa), "i" (ASI_PHYS_USE_EC));
+	} else
+#endif
+	{
+		ptep = pte_offset_map(pmdp, tpc);
+		pte = *ptep;
+		if (pte_present(pte)) {
+			pa  = (pte_pfn(pte) << PAGE_SHIFT);
+			pa += (tpc & ~PAGE_MASK);
+
+			/* Use phys bypass so we don't pollute dtlb/dcache. */
+			__asm__ __volatile__("lduwa [%1] %2, %0"
+					     : "=r" (insn)
+					     : "r" (pa), "i" (ASI_PHYS_USE_EC));
+		}
+		pte_unmap(ptep);
+	}
+out_irq_enable:
+	local_irq_enable();
+out:
+	return insn;
+}
+
+static inline void
+show_signal_msg(struct pt_regs *regs, int sig, int code,
+		unsigned long address, struct task_struct *tsk)
+{
+	if (!unhandled_signal(tsk, sig))
+		return;
+
+	if (!printk_ratelimit())
+		return;
+
+	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
+	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+	       tsk->comm, task_pid_nr(tsk), address,
+	       (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
+	       (void *)regs->u_regs[UREG_FP], code);
+
+	print_vma_addr(KERN_CONT " in ", regs->tpc);
+
+	printk(KERN_CONT "\n");
+}
+
+static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
+			     unsigned long fault_addr, unsigned int insn,
+			     int fault_code)
+{
+	unsigned long addr;
+
+	if (fault_code & FAULT_CODE_ITLB) {
+		addr = regs->tpc;
+	} else {
+		/* If we were able to probe the faulting instruction, use it
+		 * to compute a precise fault address.  Otherwise use the fault
+		 * time provided address which may only have page granularity.
+		 */
+		if (insn)
+			addr = compute_effective_address(regs, insn, 0);
+		else
+			addr = fault_addr;
+	}
+
+	if (unlikely(show_unhandled_signals))
+		show_signal_msg(regs, sig, code, addr, current);
+
+	force_sig_fault(sig, code, (void __user *) addr, 0, current);
+}
+
+static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
+{
+	if (!insn) {
+		if (!regs->tpc || (regs->tpc & 0x3))
+			return 0;
+		if (regs->tstate & TSTATE_PRIV) {
+			insn = *(unsigned int *) regs->tpc;
+		} else {
+			insn = get_user_insn(regs->tpc);
+		}
+	}
+	return insn;
+}
+
+static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code,
+				      int fault_code, unsigned int insn,
+				      unsigned long address)
+{
+	unsigned char asi = ASI_P;
+ 
+	if ((!insn) && (regs->tstate & TSTATE_PRIV))
+		goto cannot_handle;
+
+	/* If user insn could be read (thus insn is zero), that
+	 * is fine.  We will just gun down the process with a signal
+	 * in that case.
+	 */
+
+	if (!(fault_code & (FAULT_CODE_WRITE|FAULT_CODE_ITLB)) &&
+	    (insn & 0xc0800000) == 0xc0800000) {
+		if (insn & 0x2000)
+			asi = (regs->tstate >> 24);
+		else
+			asi = (insn >> 5);
+		if ((asi & 0xf2) == 0x82) {
+			if (insn & 0x1000000) {
+				handle_ldf_stq(insn, regs);
+			} else {
+				/* This was a non-faulting load. Just clear the
+				 * destination register(s) and continue with the next
+				 * instruction. -jj
+				 */
+				handle_ld_nf(insn, regs);
+			}
+			return;
+		}
+	}
+		
+	/* Is this in ex_table? */
+	if (regs->tstate & TSTATE_PRIV) {
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return;
+		}
+	} else {
+		/* The si_code was set to make clear whether
+		 * this was a SEGV_MAPERR or SEGV_ACCERR fault.
+		 */
+		do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code);
+		return;
+	}
+
+cannot_handle:
+	unhandled_fault (address, current, regs);
+}
+
+static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs)
+{
+	static int times;
+
+	if (times++ < 10)
+		printk(KERN_ERR "FAULT[%s:%d]: 32-bit process reports "
+		       "64-bit TPC [%lx]\n",
+		       current->comm, current->pid,
+		       regs->tpc);
+	show_regs(regs);
+}
+
+asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
+{
+	enum ctx_state prev_state = exception_enter();
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned int insn = 0;
+	int si_code, fault_code;
+	vm_fault_t fault;
+	unsigned long address, mm_rss;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	fault_code = get_thread_fault_code();
+
+	if (notify_page_fault(regs))
+		goto exit_exception;
+
+	si_code = SEGV_MAPERR;
+	address = current_thread_info()->fault_address;
+
+	if ((fault_code & FAULT_CODE_ITLB) &&
+	    (fault_code & FAULT_CODE_DTLB))
+		BUG();
+
+	if (test_thread_flag(TIF_32BIT)) {
+		if (!(regs->tstate & TSTATE_PRIV)) {
+			if (unlikely((regs->tpc >> 32) != 0)) {
+				bogus_32bit_fault_tpc(regs);
+				goto intr_or_no_mm;
+			}
+		}
+		if (unlikely((address >> 32) != 0))
+			goto intr_or_no_mm;
+	}
+
+	if (regs->tstate & TSTATE_PRIV) {
+		unsigned long tpc = regs->tpc;
+
+		/* Sanity check the PC. */
+		if ((tpc >= KERNBASE && tpc < (unsigned long) __init_end) ||
+		    (tpc >= MODULES_VADDR && tpc < MODULES_END)) {
+			/* Valid, no problems... */
+		} else {
+			bad_kernel_pc(regs, address);
+			goto exit_exception;
+		}
+	} else
+		flags |= FAULT_FLAG_USER;
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (faulthandler_disabled() || !mm)
+		goto intr_or_no_mm;
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if ((regs->tstate & TSTATE_PRIV) &&
+		    !search_exception_tables(regs->tpc)) {
+			insn = get_fault_insn(regs, insn);
+			goto handle_kernel_fault;
+		}
+
+retry:
+		down_read(&mm->mmap_sem);
+	}
+
+	if (fault_code & FAULT_CODE_BAD_RA)
+		goto do_sigbus;
+
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+
+	/* Pure DTLB misses do not tell us whether the fault causing
+	 * load/store/atomic was a write or not, it only says that there
+	 * was no match.  So in such a case we (carefully) read the
+	 * instruction to try and figure this out.  It's an optimization
+	 * so it's ok if we can't do this.
+	 *
+	 * Special hack, window spill/fill knows the exact fault type.
+	 */
+	if (((fault_code &
+	      (FAULT_CODE_DTLB | FAULT_CODE_WRITE | FAULT_CODE_WINFIXUP)) == FAULT_CODE_DTLB) &&
+	    (vma->vm_flags & VM_WRITE) != 0) {
+		insn = get_fault_insn(regs, 0);
+		if (!insn)
+			goto continue_fault;
+		/* All loads, stores and atomics have bits 30 and 31 both set
+		 * in the instruction.  Bit 21 is set in all stores, but we
+		 * have to avoid prefetches which also have bit 21 set.
+		 */
+		if ((insn & 0xc0200000) == 0xc0200000 &&
+		    (insn & 0x01780000) != 0x01680000) {
+			/* Don't bother updating thread struct value,
+			 * because update_mmu_cache only cares which tlb
+			 * the access came from.
+			 */
+			fault_code |= FAULT_CODE_WRITE;
+		}
+	}
+continue_fault:
+
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (!(fault_code & FAULT_CODE_WRITE)) {
+		/* Non-faulting loads shouldn't expand stack. */
+		insn = get_fault_insn(regs, insn);
+		if ((insn & 0xc0800000) == 0xc0800000) {
+			unsigned char asi;
+
+			if (insn & 0x2000)
+				asi = (regs->tstate >> 24);
+			else
+				asi = (insn >> 5);
+			if ((asi & 0xf2) == 0x82)
+				goto bad_area;
+		}
+	}
+	if (expand_stack(vma, address))
+		goto bad_area;
+	/*
+	 * Ok, we have a good vm_area for this memory access, so
+	 * we can handle it..
+	 */
+good_area:
+	si_code = SEGV_ACCERR;
+
+	/* If we took a ITLB miss on a non-executable page, catch
+	 * that here.
+	 */
+	if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) {
+		WARN(address != regs->tpc,
+		     "address (%lx) != regs->tpc (%lx)\n", address, regs->tpc);
+		WARN_ON(regs->tstate & TSTATE_PRIV);
+		goto bad_area;
+	}
+
+	if (fault_code & FAULT_CODE_WRITE) {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+
+		/* Spitfire has an icache which does not snoop
+		 * processor stores.  Later processors do...
+		 */
+		if (tlb_type == spitfire &&
+		    (vma->vm_flags & VM_EXEC) != 0 &&
+		    vma->vm_file != NULL)
+			set_thread_fault_code(fault_code |
+					      FAULT_CODE_BLKCOMMIT);
+
+		flags |= FAULT_FLAG_WRITE;
+	} else {
+		/* Allow reads even for write-only mappings */
+		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+			goto bad_area;
+	}
+
+	fault = handle_mm_fault(vma, address, flags);
+
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+		goto exit_exception;
+
+	if (unlikely(fault & VM_FAULT_ERROR)) {
+		if (fault & VM_FAULT_OOM)
+			goto out_of_memory;
+		else if (fault & VM_FAULT_SIGSEGV)
+			goto bad_area;
+		else if (fault & VM_FAULT_SIGBUS)
+			goto do_sigbus;
+		BUG();
+	}
+
+	if (flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			current->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
+				      1, regs, address);
+		} else {
+			current->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
+				      1, regs, address);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+
+			/* No need to up_read(&mm->mmap_sem) as we would
+			 * have already released it in __lock_page_or_retry
+			 * in mm/filemap.c.
+			 */
+
+			goto retry;
+		}
+	}
+	up_read(&mm->mmap_sem);
+
+	mm_rss = get_mm_rss(mm);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
+#endif
+	if (unlikely(mm_rss >
+		     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
+		tsb_grow(mm, MM_TSB_BASE, mm_rss);
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
+	mm_rss *= REAL_HPAGE_PER_HPAGE;
+	if (unlikely(mm_rss >
+		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
+		if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
+			tsb_grow(mm, MM_TSB_HUGE, mm_rss);
+		else
+			hugetlb_setup(regs);
+
+	}
+#endif
+exit_exception:
+	exception_exit(prev_state);
+	return;
+
+	/*
+	 * Something tried to access memory that isn't in our memory map..
+	 * Fix it, but check if it's kernel or user first..
+	 */
+bad_area:
+	insn = get_fault_insn(regs, insn);
+	up_read(&mm->mmap_sem);
+
+handle_kernel_fault:
+	do_kernel_fault(regs, si_code, fault_code, insn, address);
+	goto exit_exception;
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+	insn = get_fault_insn(regs, insn);
+	up_read(&mm->mmap_sem);
+	if (!(regs->tstate & TSTATE_PRIV)) {
+		pagefault_out_of_memory();
+		goto exit_exception;
+	}
+	goto handle_kernel_fault;
+
+intr_or_no_mm:
+	insn = get_fault_insn(regs, 0);
+	goto handle_kernel_fault;
+
+do_sigbus:
+	insn = get_fault_insn(regs, insn);
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (regs->tstate & TSTATE_PRIV)
+		goto handle_kernel_fault;
+}
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
new file mode 100644
index 0000000..aee6dba
--- /dev/null
+++ b/arch/sparc/mm/gup.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lockless get_user_pages_fast for sparc, cribbed from powerpc
+ *
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmstat.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <asm/pgtable.h>
+#include <asm/adi.h>
+
+/*
+ * The performance critical leaf functions are made noinline otherwise gcc
+ * inlines everything into a single function which results in too much
+ * register pressure.
+ */
+static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	unsigned long mask, result;
+	pte_t *ptep;
+
+	if (tlb_type == hypervisor) {
+		result = _PAGE_PRESENT_4V|_PAGE_P_4V;
+		if (write)
+			result |= _PAGE_WRITE_4V;
+	} else {
+		result = _PAGE_PRESENT_4U|_PAGE_P_4U;
+		if (write)
+			result |= _PAGE_WRITE_4U;
+	}
+	mask = result | _PAGE_SPECIAL;
+
+	ptep = pte_offset_kernel(&pmd, addr);
+	do {
+		struct page *page, *head;
+		pte_t pte = *ptep;
+
+		if ((pte_val(pte) & mask) != result)
+			return 0;
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+		/* The hugepage case is simplified on sparc64 because
+		 * we encode the sub-page pfn offsets into the
+		 * hugepage PTEs.  We could optimize this in the future
+		 * use page_cache_add_speculative() for the hugepage case.
+		 */
+		page = pte_page(pte);
+		head = compound_head(page);
+		if (!page_cache_get_speculative(head))
+			return 0;
+		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+			put_page(head);
+			return 0;
+		}
+
+		pages[*nr] = page;
+		(*nr)++;
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+
+	return 1;
+}
+
+static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
+			unsigned long end, int write, struct page **pages,
+			int *nr)
+{
+	struct page *head, *page;
+	int refs;
+
+	if (!(pmd_val(pmd) & _PAGE_VALID))
+		return 0;
+
+	if (write && !pmd_write(pmd))
+		return 0;
+
+	refs = 0;
+	page = pmd_page(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+	head = compound_head(page);
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
+			unsigned long end, int write, struct page **pages,
+			int *nr)
+{
+	struct page *head, *page;
+	int refs;
+
+	if (!(pud_val(pud) & _PAGE_VALID))
+		return 0;
+
+	if (write && !pud_write(pud))
+		return 0;
+
+	refs = 0;
+	page = pud_page(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+	head = compound_head(page);
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pud_val(pud) != pud_val(*pudp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	return 1;
+}
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(&pud, addr);
+	do {
+		pmd_t pmd = *pmdp;
+
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(pmd))
+			return 0;
+		if (unlikely(pmd_large(pmd))) {
+			if (!gup_huge_pmd(pmdp, pmd, addr, next,
+					  write, pages, nr))
+				return 0;
+		} else if (!gup_pte_range(pmd, addr, next, write,
+					  pages, nr))
+			return 0;
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp;
+
+	pudp = pud_offset(&pgd, addr);
+	do {
+		pud_t pud = *pudp;
+
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (unlikely(pud_large(pud))) {
+			if (!gup_huge_pud(pudp, pud, addr, next,
+					  write, pages, nr))
+				return 0;
+		} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+/*
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next, flags;
+	pgd_t *pgdp;
+	int nr = 0;
+
+#ifdef CONFIG_SPARC64
+	if (adi_capable()) {
+		long addr = start;
+
+		/* If userspace has passed a versioned address, kernel
+		 * will not find it in the VMAs since it does not store
+		 * the version tags in the list of VMAs. Storing version
+		 * tags in list of VMAs is impractical since they can be
+		 * changed any time from userspace without dropping into
+		 * kernel. Any address search in VMAs will be done with
+		 * non-versioned addresses. Ensure the ADI version bits
+		 * are dropped here by sign extending the last bit before
+		 * ADI bits. IOMMU does not implement version tags.
+		 */
+		addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
+		start = addr;
+	}
+#endif
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	local_irq_save(flags);
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			break;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			break;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_restore(flags);
+
+	return nr;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next;
+	pgd_t *pgdp;
+	int nr = 0;
+
+#ifdef CONFIG_SPARC64
+	if (adi_capable()) {
+		long addr = start;
+
+		/* If userspace has passed a versioned address, kernel
+		 * will not find it in the VMAs since it does not store
+		 * the version tags in the list of VMAs. Storing version
+		 * tags in list of VMAs is impractical since they can be
+		 * changed any time from userspace without dropping into
+		 * kernel. Any address search in VMAs will be done with
+		 * non-versioned addresses. Ensure the ADI version bits
+		 * are dropped here by sign extending the last bit before
+		 * ADI bits. IOMMU does not implements version tags,
+		 */
+		addr = (addr << (long)adi_nbits()) >> (long)adi_nbits();
+		start = addr;
+	}
+#endif
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	/*
+	 * XXX: batch / limit 'nr', to avoid large irq off latency
+	 * needs some instrumenting to determine the common sizes used by
+	 * important workloads (eg. DB2), and whether limiting the batch size
+	 * will decrease performance.
+	 *
+	 * It seems like we're in the clear for the moment. Direct-IO is
+	 * the main guy that batches up lots of get_user_pages, and even
+	 * they are limited to 64-at-a-time which is not so many.
+	 */
+	/*
+	 * This doesn't prevent pagetable teardown, but does prevent
+	 * the pagetables from being freed on sparc.
+	 *
+	 * So long as we atomically load page table pointers versus teardown,
+	 * we can follow the address down to the the page and take a ref on it.
+	 */
+	local_irq_disable();
+
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			goto slow;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			goto slow;
+	} while (pgdp++, addr = next, addr != end);
+
+	local_irq_enable();
+
+	VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
+	return nr;
+
+	{
+		int ret;
+
+slow:
+		local_irq_enable();
+
+		/* Try to get the remaining pages with get_user_pages */
+		start += nr << PAGE_SHIFT;
+		pages += nr;
+
+		ret = get_user_pages_unlocked(start,
+			(end - start) >> PAGE_SHIFT, pages,
+			write ? FOLL_WRITE : 0);
+
+		/* Have to be a bit careful with return values */
+		if (nr > 0) {
+			if (ret < 0)
+				ret = nr;
+			else
+				ret += nr;
+		}
+
+		return ret;
+	}
+}
diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c
new file mode 100644
index 0000000..86bc2a5
--- /dev/null
+++ b/arch/sparc/mm/highmem.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  highmem.c: virtual kernel memory mappings for high memory
+ *
+ *  Provides kernel-static versions of atomic kmap functions originally
+ *  found as inlines in include/asm-sparc/highmem.h.  These became
+ *  needed as kmap_atomic() and kunmap_atomic() started getting
+ *  called from within modules.
+ *  -- Tomas Szepe <szepe@pinerecords.com>, September 2002
+ *
+ *  But kmap_atomic() and kunmap_atomic() cannot be inlined in
+ *  modules because they are loaded with btfixup-ped functions.
+ */
+
+/*
+ * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
+ * gives a more generic (and caching) interface. But kmap_atomic can
+ * be used in IRQ contexts, so in some (very limited) cases we need it.
+ *
+ * XXX This is an old text. Actually, it's good to use atomic kmaps,
+ * provided you remember that they are atomic and not try to sleep
+ * with a kmap taken, much like a spinlock. Non-atomic kmaps are
+ * shared by CPUs, and so precious, and establishing them requires IPI.
+ * Atomic kmaps are lightweight and we may have NCPUS more of them.
+ */
+#include <linux/highmem.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm/vaddrs.h>
+
+pgprot_t kmap_prot;
+
+static pte_t *kmap_pte;
+
+void __init kmap_init(void)
+{
+	unsigned long address;
+	pmd_t *dir;
+
+	address = __fix_to_virt(FIX_KMAP_BEGIN);
+	dir = pmd_offset(pgd_offset_k(address), address);
+
+        /* cache the first kmap pte */
+        kmap_pte = pte_offset_kernel(dir, address);
+        kmap_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV | SRMMU_CACHE);
+}
+
+void *kmap_atomic(struct page *page)
+{
+	unsigned long vaddr;
+	long idx, type;
+
+	preempt_disable();
+	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	type = kmap_atomic_idx_push();
+	idx = type + KM_TYPE_NR*smp_processor_id();
+	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+
+/* XXX Fix - Anton */
+#if 0
+	__flush_cache_one(vaddr);
+#else
+	flush_cache_all();
+#endif
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(!pte_none(*(kmap_pte-idx)));
+#endif
+	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+/* XXX Fix - Anton */
+#if 0
+	__flush_tlb_one(vaddr);
+#else
+	flush_tlb_all();
+#endif
+
+	return (void*) vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kvaddr)
+{
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	int type;
+
+	if (vaddr < FIXADDR_START) { // FIXME
+		pagefault_enable();
+		preempt_enable();
+		return;
+	}
+
+	type = kmap_atomic_idx();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+	{
+		unsigned long idx;
+
+		idx = type + KM_TYPE_NR * smp_processor_id();
+		BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx));
+
+		/* XXX Fix - Anton */
+#if 0
+		__flush_cache_one(vaddr);
+#else
+		flush_cache_all();
+#endif
+
+		/*
+		 * force other mappings to Oops if they'll try to access
+		 * this pte without first remap it
+		 */
+		pte_clear(&init_mm, vaddr, kmap_pte-idx);
+		/* XXX Fix - Anton */
+#if 0
+		__flush_tlb_one(vaddr);
+#else
+		flush_tlb_all();
+#endif
+	}
+#endif
+
+	kmap_atomic_idx_pop();
+	pagefault_enable();
+	preempt_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
new file mode 100644
index 0000000..f78793a
--- /dev/null
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SPARC64 Huge TLB page support.
+ *
+ * Copyright (C) 2002, 2003, 2006 David S. Miller (davem@davemloft.net)
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+
+/* Slightly simplified from the non-hugepage variant because by
+ * definition we don't have to worry about any page coloring stuff
+ */
+
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp,
+							unsigned long addr,
+							unsigned long len,
+							unsigned long pgoff,
+							unsigned long flags)
+{
+	struct hstate *h = hstate_file(filp);
+	unsigned long task_size = TASK_SIZE;
+	struct vm_unmapped_area_info info;
+
+	if (test_thread_flag(TIF_32BIT))
+		task_size = STACK_TOP32;
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = TASK_UNMAPPED_BASE;
+	info.high_limit = min(task_size, VA_EXCLUDE_START);
+	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+	info.align_offset = 0;
+	addr = vm_unmapped_area(&info);
+
+	if ((addr & ~PAGE_MASK) && task_size > VA_EXCLUDE_END) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.low_limit = VA_EXCLUDE_END;
+		info.high_limit = task_size;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+static unsigned long
+hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+				  const unsigned long len,
+				  const unsigned long pgoff,
+				  const unsigned long flags)
+{
+	struct hstate *h = hstate_file(filp);
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	struct vm_unmapped_area_info info;
+
+	/* This should only ever run for 32-bit processes.  */
+	BUG_ON(!test_thread_flag(TIF_32BIT));
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = mm->mmap_base;
+	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+	info.align_offset = 0;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = TASK_UNMAPPED_BASE;
+		info.high_limit = STACK_TOP32;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct hstate *h = hstate_file(file);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long task_size = TASK_SIZE;
+
+	if (test_thread_flag(TIF_32BIT))
+		task_size = STACK_TOP32;
+
+	if (len & ~huge_page_mask(h))
+		return -EINVAL;
+	if (len > task_size)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (prepare_hugepage_range(file, addr, len))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (addr) {
+		addr = ALIGN(addr, huge_page_size(h));
+		vma = find_vma(mm, addr);
+		if (task_size - len >= addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+	if (mm->get_unmapped_area == arch_get_unmapped_area)
+		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
+				pgoff, flags);
+	else
+		return hugetlb_get_unmapped_area_topdown(file, addr, len,
+				pgoff, flags);
+}
+
+static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+	return entry;
+}
+
+static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+	unsigned long hugepage_size = _PAGE_SZ4MB_4V;
+
+	pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
+
+	switch (shift) {
+	case HPAGE_16GB_SHIFT:
+		hugepage_size = _PAGE_SZ16GB_4V;
+		pte_val(entry) |= _PAGE_PUD_HUGE;
+		break;
+	case HPAGE_2GB_SHIFT:
+		hugepage_size = _PAGE_SZ2GB_4V;
+		pte_val(entry) |= _PAGE_PMD_HUGE;
+		break;
+	case HPAGE_256MB_SHIFT:
+		hugepage_size = _PAGE_SZ256MB_4V;
+		pte_val(entry) |= _PAGE_PMD_HUGE;
+		break;
+	case HPAGE_SHIFT:
+		pte_val(entry) |= _PAGE_PMD_HUGE;
+		break;
+	case HPAGE_64K_SHIFT:
+		hugepage_size = _PAGE_SZ64K_4V;
+		break;
+	default:
+		WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift);
+	}
+
+	pte_val(entry) = pte_val(entry) | hugepage_size;
+	return entry;
+}
+
+static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift)
+{
+	if (tlb_type == hypervisor)
+		return sun4v_hugepage_shift_to_tte(entry, shift);
+	else
+		return sun4u_hugepage_shift_to_tte(entry, shift);
+}
+
+pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+			 struct page *page, int writeable)
+{
+	unsigned int shift = huge_page_shift(hstate_vma(vma));
+	pte_t pte;
+
+	pte = hugepage_shift_to_tte(entry, shift);
+
+#ifdef CONFIG_SPARC64
+	/* If this vma has ADI enabled on it, turn on TTE.mcd
+	 */
+	if (vma->vm_flags & VM_SPARC_ADI)
+		return pte_mkmcd(pte);
+	else
+		return pte_mknotmcd(pte);
+#else
+	return pte;
+#endif
+}
+
+static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
+{
+	unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V;
+	unsigned int shift;
+
+	switch (tte_szbits) {
+	case _PAGE_SZ16GB_4V:
+		shift = HPAGE_16GB_SHIFT;
+		break;
+	case _PAGE_SZ2GB_4V:
+		shift = HPAGE_2GB_SHIFT;
+		break;
+	case _PAGE_SZ256MB_4V:
+		shift = HPAGE_256MB_SHIFT;
+		break;
+	case _PAGE_SZ4MB_4V:
+		shift = REAL_HPAGE_SHIFT;
+		break;
+	case _PAGE_SZ64K_4V:
+		shift = HPAGE_64K_SHIFT;
+		break;
+	default:
+		shift = PAGE_SHIFT;
+		break;
+	}
+	return shift;
+}
+
+static unsigned int sun4u_huge_tte_to_shift(pte_t entry)
+{
+	unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U;
+	unsigned int shift;
+
+	switch (tte_szbits) {
+	case _PAGE_SZ256MB_4U:
+		shift = HPAGE_256MB_SHIFT;
+		break;
+	case _PAGE_SZ4MB_4U:
+		shift = REAL_HPAGE_SHIFT;
+		break;
+	case _PAGE_SZ64K_4U:
+		shift = HPAGE_64K_SHIFT;
+		break;
+	default:
+		shift = PAGE_SHIFT;
+		break;
+	}
+	return shift;
+}
+
+static unsigned int huge_tte_to_shift(pte_t entry)
+{
+	unsigned long shift;
+
+	if (tlb_type == hypervisor)
+		shift = sun4v_huge_tte_to_shift(entry);
+	else
+		shift = sun4u_huge_tte_to_shift(entry);
+
+	if (shift == PAGE_SHIFT)
+		WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n",
+			  pte_val(entry));
+
+	return shift;
+}
+
+static unsigned long huge_tte_to_size(pte_t pte)
+{
+	unsigned long size = 1UL << huge_tte_to_shift(pte);
+
+	if (size == REAL_HPAGE_SIZE)
+		size = HPAGE_SIZE;
+	return size;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+			unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return NULL;
+	if (sz >= PUD_SIZE)
+		return (pte_t *)pud;
+	pmd = pmd_alloc(mm, pud, addr);
+	if (!pmd)
+		return NULL;
+	if (sz >= PMD_SIZE)
+		return (pte_t *)pmd;
+	return pte_alloc_map(mm, pmd, addr);
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_none(*pgd))
+		return NULL;
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return NULL;
+	if (is_hugetlb_pud(*pud))
+		return (pte_t *)pud;
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return NULL;
+	if (is_hugetlb_pmd(*pmd))
+		return (pte_t *)pmd;
+	return pte_offset_map(pmd, addr);
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry)
+{
+	unsigned int nptes, orig_shift, shift;
+	unsigned long i, size;
+	pte_t orig;
+
+	size = huge_tte_to_size(entry);
+
+	shift = PAGE_SHIFT;
+	if (size >= PUD_SIZE)
+		shift = PUD_SHIFT;
+	else if (size >= PMD_SIZE)
+		shift = PMD_SHIFT;
+	else
+		shift = PAGE_SHIFT;
+
+	nptes = size >> shift;
+
+	if (!pte_present(*ptep) && pte_present(entry))
+		mm->context.hugetlb_pte_count += nptes;
+
+	addr &= ~(size - 1);
+	orig = *ptep;
+	orig_shift = pte_none(orig) ? PAGE_SHIFT : huge_tte_to_shift(orig);
+
+	for (i = 0; i < nptes; i++)
+		ptep[i] = __pte(pte_val(entry) + (i << shift));
+
+	maybe_tlb_batch_add(mm, addr, ptep, orig, 0, orig_shift);
+	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
+	if (size == HPAGE_SIZE)
+		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0,
+				    orig_shift);
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
+{
+	unsigned int i, nptes, orig_shift, shift;
+	unsigned long size;
+	pte_t entry;
+
+	entry = *ptep;
+	size = huge_tte_to_size(entry);
+
+	shift = PAGE_SHIFT;
+	if (size >= PUD_SIZE)
+		shift = PUD_SHIFT;
+	else if (size >= PMD_SIZE)
+		shift = PMD_SHIFT;
+	else
+		shift = PAGE_SHIFT;
+
+	nptes = size >> shift;
+	orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
+
+	if (pte_present(entry))
+		mm->context.hugetlb_pte_count -= nptes;
+
+	addr &= ~(size - 1);
+	for (i = 0; i < nptes; i++)
+		ptep[i] = __pte(0UL);
+
+	maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
+	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
+	if (size == HPAGE_SIZE)
+		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
+				    orig_shift);
+
+	return entry;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return !pmd_none(pmd) &&
+		(pmd_val(pmd) & (_PAGE_VALID|_PAGE_PMD_HUGE)) != _PAGE_VALID;
+}
+
+int pud_huge(pud_t pud)
+{
+	return !pud_none(pud) &&
+		(pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
+}
+
+static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+			   unsigned long addr)
+{
+	pgtable_t token = pmd_pgtable(*pmd);
+
+	pmd_clear(pmd);
+	pte_free_tlb(tlb, token, addr);
+	mm_dec_nr_ptes(tlb->mm);
+}
+
+static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pmd_t *pmd;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(*pmd))
+			continue;
+		if (is_hugetlb_pmd(*pmd))
+			pmd_clear(pmd);
+		else
+			hugetlb_free_pte_range(tlb, pmd, addr);
+	} while (pmd++, addr = next, addr != end);
+
+	start &= PUD_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PUD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pmd = pmd_offset(pud, start);
+	pud_clear(pud);
+	pmd_free_tlb(tlb, pmd, start);
+	mm_dec_nr_pmds(tlb->mm);
+}
+
+static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+				   unsigned long addr, unsigned long end,
+				   unsigned long floor, unsigned long ceiling)
+{
+	pud_t *pud;
+	unsigned long next;
+	unsigned long start;
+
+	start = addr;
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		if (is_hugetlb_pud(*pud))
+			pud_clear(pud);
+		else
+			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+					       ceiling);
+	} while (pud++, addr = next, addr != end);
+
+	start &= PGDIR_MASK;
+	if (start < floor)
+		return;
+	if (ceiling) {
+		ceiling &= PGDIR_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		return;
+
+	pud = pud_offset(pgd, start);
+	pgd_clear(pgd);
+	pud_free_tlb(tlb, pud, start);
+	mm_dec_nr_puds(tlb->mm);
+}
+
+void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+			    unsigned long addr, unsigned long end,
+			    unsigned long floor, unsigned long ceiling)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	addr &= PMD_MASK;
+	if (addr < floor) {
+		addr += PMD_SIZE;
+		if (!addr)
+			return;
+	}
+	if (ceiling) {
+		ceiling &= PMD_MASK;
+		if (!ceiling)
+			return;
+	}
+	if (end - 1 > ceiling - 1)
+		end -= PMD_SIZE;
+	if (addr > end - 1)
+		return;
+
+	pgd = pgd_offset(tlb->mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
+	} while (pgd++, addr = next, addr != end);
+}
diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S
new file mode 100644
index 0000000..66885a8
--- /dev/null
+++ b/arch/sparc/mm/hypersparc.S
@@ -0,0 +1,413 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * hypersparc.S: High speed Hypersparc mmu/cache operations.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/asm-offsets.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtsrmmu.h>
+#include <linux/init.h>
+
+	.text
+	.align	4
+
+	.globl	hypersparc_flush_cache_all, hypersparc_flush_cache_mm
+	.globl	hypersparc_flush_cache_range, hypersparc_flush_cache_page
+	.globl	hypersparc_flush_page_to_ram
+	.globl	hypersparc_flush_page_for_dma, hypersparc_flush_sig_insns
+	.globl	hypersparc_flush_tlb_all, hypersparc_flush_tlb_mm
+	.globl	hypersparc_flush_tlb_range, hypersparc_flush_tlb_page
+
+hypersparc_flush_cache_all:
+	WINDOW_FLUSH(%g4, %g5)
+	sethi	%hi(vac_cache_size), %g4
+	ld	[%g4 + %lo(vac_cache_size)], %g5
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %g2
+1:	
+	subcc	%g5, %g2, %g5			! hyper_flush_unconditional_combined
+	bne	1b
+	 sta	%g0, [%g5] ASI_M_FLUSH_CTX
+	retl
+	 sta	%g0, [%g0] ASI_M_FLUSH_IWHOLE	! hyper_flush_whole_icache
+
+	/* We expand the window flush to get maximum performance. */
+hypersparc_flush_cache_mm:
+#ifndef CONFIG_SMP
+	ld	[%o0 + AOFF_mm_context], %g1
+	cmp	%g1, -1
+	be	hypersparc_flush_cache_mm_out
+#endif
+	WINDOW_FLUSH(%g4, %g5)
+
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o1
+	sethi	%hi(vac_cache_size), %g2
+	ld	[%g2 + %lo(vac_cache_size)], %o0
+	add	%o1, %o1, %g1
+	add	%o1, %g1, %g2
+	add	%o1, %g2, %g3
+	add	%o1, %g3, %g4
+	add	%o1, %g4, %g5
+	add	%o1, %g5, %o4
+	add	%o1, %o4, %o5
+
+	/* BLAMMO! */
+1:
+	subcc	%o0, %o5, %o0				! hyper_flush_cache_user
+	sta	%g0, [%o0 + %g0] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %o1] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g1] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g4] ASI_M_FLUSH_USER
+	sta	%g0, [%o0 + %g5] ASI_M_FLUSH_USER
+	bne	1b
+	 sta	%g0, [%o0 + %o4] ASI_M_FLUSH_USER
+hypersparc_flush_cache_mm_out:
+	retl
+	 nop
+
+	/* The things we do for performance... */
+hypersparc_flush_cache_range:
+	ld	[%o0 + VMA_VM_MM], %o0
+#ifndef CONFIG_SMP
+	ld	[%o0 + AOFF_mm_context], %g1
+	cmp	%g1, -1
+	be	hypersparc_flush_cache_range_out
+#endif
+	WINDOW_FLUSH(%g4, %g5)
+
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o4
+	sethi	%hi(vac_cache_size), %g2
+	ld	[%g2 + %lo(vac_cache_size)], %o3
+
+	/* Here comes the fun part... */
+	add	%o2, (PAGE_SIZE - 1), %o2
+	andn	%o1, (PAGE_SIZE - 1), %o1
+	add	%o4, %o4, %o5
+	andn	%o2, (PAGE_SIZE - 1), %o2
+	add	%o4, %o5, %g1
+	sub	%o2, %o1, %g4
+	add	%o4, %g1, %g2
+	sll	%o3, 2, %g5
+	add	%o4, %g2, %g3
+	cmp	%g4, %g5
+	add	%o4, %g3, %g4
+	blu	0f
+	 add	%o4, %g4, %g5
+	add	%o4, %g5, %g7
+
+	/* Flush entire user space, believe it or not this is quicker
+	 * than page at a time flushings for range > (cache_size<<2).
+	 */
+1:
+	subcc	%o3, %g7, %o3
+	sta	%g0, [%o3 + %g0] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %o4] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %o5] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g1] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g2] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g3] ASI_M_FLUSH_USER
+	sta	%g0, [%o3 + %g4] ASI_M_FLUSH_USER
+	bne	1b
+	 sta	%g0, [%o3 + %g5] ASI_M_FLUSH_USER
+	retl
+	 nop
+
+	/* Below our threshold, flush one page at a time. */
+0:
+	ld	[%o0 + AOFF_mm_context], %o0
+	mov	SRMMU_CTX_REG, %g7
+	lda	[%g7] ASI_M_MMUREGS, %o3
+	sta	%o0, [%g7] ASI_M_MMUREGS
+	add	%o2, -PAGE_SIZE, %o0
+1:
+	or	%o0, 0x400, %g7
+	lda	[%g7] ASI_M_FLUSH_PROBE, %g7
+	orcc	%g7, 0, %g0
+	be,a	3f
+	 mov	%o0, %o2
+	add	%o4, %g5, %g7
+2:
+	sub	%o2, %g7, %o2
+	sta	%g0, [%o2 + %g0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %g1] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %g2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o2 + %g3] ASI_M_FLUSH_PAGE
+	andcc	%o2, 0xffc, %g0
+	sta	%g0, [%o2 + %g4] ASI_M_FLUSH_PAGE
+	bne	2b
+	 sta	%g0, [%o2 + %g5] ASI_M_FLUSH_PAGE
+3:
+	cmp	%o2, %o1
+	bne	1b
+	 add	%o2, -PAGE_SIZE, %o0
+	mov	SRMMU_FAULT_STATUS, %g5
+	lda	[%g5] ASI_M_MMUREGS, %g0
+	mov	SRMMU_CTX_REG, %g7
+	sta	%o3, [%g7] ASI_M_MMUREGS
+hypersparc_flush_cache_range_out:
+	retl
+	 nop
+
+	/* HyperSparc requires a valid mapping where we are about to flush
+	 * in order to check for a physical tag match during the flush.
+	 */
+	/* Verified, my ass... */
+hypersparc_flush_cache_page:
+	ld	[%o0 + VMA_VM_MM], %o0
+	ld	[%o0 + AOFF_mm_context], %g2
+#ifndef CONFIG_SMP
+	cmp	%g2, -1
+	be	hypersparc_flush_cache_page_out
+#endif
+	WINDOW_FLUSH(%g4, %g5)
+
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o4
+	mov	SRMMU_CTX_REG, %o3
+	andn	%o1, (PAGE_SIZE - 1), %o1
+	lda	[%o3] ASI_M_MMUREGS, %o2
+	sta	%g2, [%o3] ASI_M_MMUREGS
+	or	%o1, 0x400, %o5
+	lda	[%o5] ASI_M_FLUSH_PROBE, %g1
+	orcc	%g0, %g1, %g0
+	be	2f
+	 add	%o4, %o4, %o5
+	sub	%o1, -PAGE_SIZE, %o1
+	add	%o4, %o5, %g1
+	add	%o4, %g1, %g2
+	add	%o4, %g2, %g3
+	add	%o4, %g3, %g4
+	add	%o4, %g4, %g5
+	add	%o4, %g5, %g7
+
+	/* BLAMMO! */
+1:
+	sub	%o1, %g7, %o1
+	sta	%g0, [%o1 + %g0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g1] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
+	andcc	%o1, 0xffc, %g0
+	sta	%g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
+	bne	1b
+	 sta	%g0, [%o1 + %g5] ASI_M_FLUSH_PAGE
+2:
+	mov	SRMMU_FAULT_STATUS, %g7
+	mov	SRMMU_CTX_REG, %g4
+	lda	[%g7] ASI_M_MMUREGS, %g0
+	sta	%o2, [%g4] ASI_M_MMUREGS
+hypersparc_flush_cache_page_out:
+	retl
+	 nop
+
+hypersparc_flush_sig_insns:
+	flush	%o1
+	retl
+	 flush	%o1 + 4
+
+	/* HyperSparc is copy-back. */
+hypersparc_flush_page_to_ram:
+	sethi	%hi(vac_line_size), %g1
+	ld	[%g1 + %lo(vac_line_size)], %o4
+	andn	%o0, (PAGE_SIZE - 1), %o0
+	add	%o4, %o4, %o5
+	or	%o0, 0x400, %g7
+	lda	[%g7] ASI_M_FLUSH_PROBE, %g5
+	add	%o4, %o5, %g1
+	orcc	%g5, 0, %g0
+	be	2f
+	 add	%o4, %g1, %g2
+	add	%o4, %g2, %g3
+	sub	%o0, -PAGE_SIZE, %o0
+	add	%o4, %g3, %g4
+	add	%o4, %g4, %g5
+	add	%o4, %g5, %g7
+
+	/* BLAMMO! */
+1:
+	sub	%o0, %g7, %o0
+	sta	%g0, [%o0 + %g0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %g1] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_PAGE
+	andcc	%o0, 0xffc, %g0
+	sta	%g0, [%o0 + %g4] ASI_M_FLUSH_PAGE
+	bne	1b
+	 sta	%g0, [%o0 + %g5] ASI_M_FLUSH_PAGE
+2:
+	mov	SRMMU_FAULT_STATUS, %g1
+	retl
+	 lda	[%g1] ASI_M_MMUREGS, %g0
+
+	/* HyperSparc is IO cache coherent. */
+hypersparc_flush_page_for_dma:
+	retl
+	 nop
+
+	/* It was noted that at boot time a TLB flush all in a delay slot
+	 * can deliver an illegal instruction to the processor if the timing
+	 * is just right...
+	 */
+hypersparc_flush_tlb_all:
+	mov	0x400, %g1
+	sta	%g0, [%g1] ASI_M_FLUSH_PROBE
+	retl
+	 nop
+
+hypersparc_flush_tlb_mm:
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o1
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef CONFIG_SMP
+	cmp	%o1, -1
+	be	hypersparc_flush_tlb_mm_out
+#endif
+	 mov	0x300, %g2
+	sta	%o1, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
+hypersparc_flush_tlb_mm_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+hypersparc_flush_tlb_range:
+	ld	[%o0 + VMA_VM_MM], %o0
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef CONFIG_SMP
+	cmp	%o3, -1
+	be	hypersparc_flush_tlb_range_out
+#endif
+	 sethi	%hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	and	%o1, %o4, %o1
+	add	%o1, 0x200, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+1:
+	sub	%o1, %o4, %o1
+	cmp	%o1, %o2
+	blu,a	1b
+	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+hypersparc_flush_tlb_range_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+hypersparc_flush_tlb_page:
+	ld	[%o0 + VMA_VM_MM], %o0
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	andn	%o1, (PAGE_SIZE - 1), %o1
+#ifndef CONFIG_SMP
+	cmp	%o3, -1
+	be	hypersparc_flush_tlb_page_out
+#endif
+	 lda	[%g1] ASI_M_MMUREGS, %g5
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+hypersparc_flush_tlb_page_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+	__INIT
+	
+	/* High speed page clear/copy. */
+hypersparc_bzero_1page:
+/* NOTE: This routine has to be shorter than 40insns --jj */
+	clr	%g1
+	mov	32, %g2
+	mov	64, %g3
+	mov	96, %g4
+	mov	128, %g5
+	mov	160, %g7
+	mov	192, %o2
+	mov	224, %o3
+	mov	16, %o1
+1:
+	stda	%g0, [%o0 + %g0] ASI_M_BFILL
+	stda	%g0, [%o0 + %g2] ASI_M_BFILL
+	stda	%g0, [%o0 + %g3] ASI_M_BFILL
+	stda	%g0, [%o0 + %g4] ASI_M_BFILL
+	stda	%g0, [%o0 + %g5] ASI_M_BFILL
+	stda	%g0, [%o0 + %g7] ASI_M_BFILL
+	stda	%g0, [%o0 + %o2] ASI_M_BFILL
+	stda	%g0, [%o0 + %o3] ASI_M_BFILL
+	subcc	%o1, 1, %o1
+	bne	1b
+	 add	%o0, 256, %o0
+
+	retl
+	 nop
+
+hypersparc_copy_1page:
+/* NOTE: This routine has to be shorter than 70insns --jj */
+	sub	%o1, %o0, %o2		! difference
+	mov	16, %g1
+1:
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	add	%o0, 32, %o0
+	sta	%o0, [%o0 + %o2] ASI_M_BCOPY
+	subcc	%g1, 1, %g1
+	bne	1b
+	 add	%o0, 32, %o0
+
+	retl
+	 nop
+
+	.globl	hypersparc_setup_blockops
+hypersparc_setup_blockops:
+	sethi	%hi(bzero_1page), %o0
+	or	%o0, %lo(bzero_1page), %o0
+	sethi	%hi(hypersparc_bzero_1page), %o1
+	or	%o1, %lo(hypersparc_bzero_1page), %o1
+	sethi	%hi(hypersparc_copy_1page), %o2
+	or	%o2, %lo(hypersparc_copy_1page), %o2
+	ld	[%o1], %o4
+1:
+	add	%o1, 4, %o1
+	st	%o4, [%o0]
+	add	%o0, 4, %o0
+	cmp	%o1, %o2
+	bne	1b
+	 ld	[%o1], %o4
+	sethi	%hi(__copy_1page), %o0
+	or	%o0, %lo(__copy_1page), %o0
+	sethi	%hi(hypersparc_setup_blockops), %o2
+	or	%o2, %lo(hypersparc_setup_blockops), %o2
+	ld	[%o1], %o4
+1:
+	add	%o1, 4, %o1
+	st	%o4, [%o0]
+	add	%o0, 4, %o0
+	cmp	%o1, %o2
+	bne	1b
+	 ld	[%o1], %o4
+	sta	%g0, [%g0] ASI_M_FLUSH_IWHOLE
+	retl
+	 nop
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
new file mode 100644
index 0000000..92634d4
--- /dev/null
+++ b/arch/sparc/mm/init_32.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/sparc/mm/init.c
+ *
+ *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1995 Eddie C. Dost (ecd@skynet.be)
+ *  Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *  Copyright (C) 2000 Anton Blanchard (anton@samba.org)
+ */
+
+#include <linux/module.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/initrd.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/poison.h>
+#include <linux/gfp.h>
+
+#include <asm/sections.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/vaddrs.h>
+#include <asm/pgalloc.h>	/* bug in asm-generic/tlb.h: check_pgt_cache */
+#include <asm/setup.h>
+#include <asm/tlb.h>
+#include <asm/prom.h>
+#include <asm/leon.h>
+
+#include "mm_32.h"
+
+unsigned long *sparc_valid_addr_bitmap;
+EXPORT_SYMBOL(sparc_valid_addr_bitmap);
+
+unsigned long phys_base;
+EXPORT_SYMBOL(phys_base);
+
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
+struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS+1];
+
+/* Initial ramdisk setup */
+extern unsigned int sparc_ramdisk_image;
+extern unsigned int sparc_ramdisk_size;
+
+unsigned long highstart_pfn, highend_pfn;
+
+unsigned long last_valid_pfn;
+
+unsigned long calc_highpages(void)
+{
+	int i;
+	int nr = 0;
+
+	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+		unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
+		unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
+
+		if (end_pfn <= max_low_pfn)
+			continue;
+
+		if (start_pfn < max_low_pfn)
+			start_pfn = max_low_pfn;
+
+		nr += end_pfn - start_pfn;
+	}
+
+	return nr;
+}
+
+static unsigned long calc_max_low_pfn(void)
+{
+	int i;
+	unsigned long tmp = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
+	unsigned long curr_pfn, last_pfn;
+
+	last_pfn = (sp_banks[0].base_addr + sp_banks[0].num_bytes) >> PAGE_SHIFT;
+	for (i = 1; sp_banks[i].num_bytes != 0; i++) {
+		curr_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
+
+		if (curr_pfn >= tmp) {
+			if (last_pfn < tmp)
+				tmp = last_pfn;
+			break;
+		}
+
+		last_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
+	}
+
+	return tmp;
+}
+
+static void __init find_ramdisk(unsigned long end_of_phys_memory)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	unsigned long size;
+
+	/* Now have to check initial ramdisk, so that it won't pass
+	 * the end of memory
+	 */
+	if (sparc_ramdisk_image) {
+		if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
+			sparc_ramdisk_image -= KERNBASE;
+		initrd_start = sparc_ramdisk_image + phys_base;
+		initrd_end = initrd_start + sparc_ramdisk_size;
+		if (initrd_end > end_of_phys_memory) {
+			printk(KERN_CRIT "initrd extends beyond end of memory "
+			       "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
+			       initrd_end, end_of_phys_memory);
+			initrd_start = 0;
+		} else {
+			/* Reserve the initrd image area. */
+			size = initrd_end - initrd_start;
+			memblock_reserve(initrd_start, size);
+
+			initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
+			initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;
+		}
+	}
+#endif
+}
+
+unsigned long __init bootmem_init(unsigned long *pages_avail)
+{
+	unsigned long start_pfn, bytes_avail, size;
+	unsigned long end_of_phys_memory = 0;
+	unsigned long high_pages = 0;
+	int i;
+
+	memblock_set_bottom_up(true);
+	memblock_allow_resize();
+
+	bytes_avail = 0UL;
+	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+		end_of_phys_memory = sp_banks[i].base_addr +
+			sp_banks[i].num_bytes;
+		bytes_avail += sp_banks[i].num_bytes;
+		if (cmdline_memory_size) {
+			if (bytes_avail > cmdline_memory_size) {
+				unsigned long slack = bytes_avail - cmdline_memory_size;
+
+				bytes_avail -= slack;
+				end_of_phys_memory -= slack;
+
+				sp_banks[i].num_bytes -= slack;
+				if (sp_banks[i].num_bytes == 0) {
+					sp_banks[i].base_addr = 0xdeadbeef;
+				} else {
+					memblock_add(sp_banks[i].base_addr,
+						     sp_banks[i].num_bytes);
+					sp_banks[i+1].num_bytes = 0;
+					sp_banks[i+1].base_addr = 0xdeadbeef;
+				}
+				break;
+			}
+		}
+		memblock_add(sp_banks[i].base_addr, sp_banks[i].num_bytes);
+	}
+
+	/* Start with page aligned address of last symbol in kernel
+	 * image.
+	 */
+	start_pfn  = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &_end));
+
+	/* Now shift down to get the real physical page frame number. */
+	start_pfn >>= PAGE_SHIFT;
+
+	max_pfn = end_of_phys_memory >> PAGE_SHIFT;
+
+	max_low_pfn = max_pfn;
+	highstart_pfn = highend_pfn = max_pfn;
+
+	if (max_low_pfn > pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT)) {
+		highstart_pfn = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
+		max_low_pfn = calc_max_low_pfn();
+		high_pages = calc_highpages();
+		printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+		    high_pages >> (20 - PAGE_SHIFT));
+	}
+
+	find_ramdisk(end_of_phys_memory);
+
+	/* Reserve the kernel text/data/bss. */
+	size = (start_pfn << PAGE_SHIFT) - phys_base;
+	memblock_reserve(phys_base, size);
+
+	size = memblock_phys_mem_size() - memblock_reserved_size();
+	*pages_avail = (size >> PAGE_SHIFT) - high_pages;
+
+	return max_pfn;
+}
+
+/*
+ * paging_init() sets up the page tables: We call the MMU specific
+ * init routine based upon the Sun model type on the Sparc.
+ *
+ */
+void __init paging_init(void)
+{
+	srmmu_paging_init();
+	prom_build_devicetree();
+	of_fill_in_cpu_data();
+	device_scan();
+}
+
+static void __init taint_real_pages(void)
+{
+	int i;
+
+	for (i = 0; sp_banks[i].num_bytes; i++) {
+		unsigned long start, end;
+
+		start = sp_banks[i].base_addr;
+		end = start + sp_banks[i].num_bytes;
+
+		while (start < end) {
+			set_bit(start >> 20, sparc_valid_addr_bitmap);
+			start += PAGE_SIZE;
+		}
+	}
+}
+
+static void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
+{
+	unsigned long tmp;
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+	printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn);
+#endif
+
+	for (tmp = start_pfn; tmp < end_pfn; tmp++)
+		free_highmem_page(pfn_to_page(tmp));
+}
+
+void __init mem_init(void)
+{
+	int i;
+
+	if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
+		prom_printf("BUG: fixmap and pkmap areas overlap\n");
+		prom_printf("pkbase: 0x%lx pkend: 0x%lx fixstart 0x%lx\n",
+		       PKMAP_BASE,
+		       (unsigned long)PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
+		       FIXADDR_START);
+		prom_printf("Please mail sparclinux@vger.kernel.org.\n");
+		prom_halt();
+	}
+
+
+	/* Saves us work later. */
+	memset((void *)empty_zero_page, 0, PAGE_SIZE);
+
+	i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
+	i += 1;
+	sparc_valid_addr_bitmap = (unsigned long *)
+		__alloc_bootmem(i << 2, SMP_CACHE_BYTES, 0UL);
+
+	if (sparc_valid_addr_bitmap == NULL) {
+		prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
+		prom_halt();
+	}
+	memset(sparc_valid_addr_bitmap, 0, i << 2);
+
+	taint_real_pages();
+
+	max_mapnr = last_valid_pfn - pfn_base;
+	high_memory = __va(max_low_pfn << PAGE_SHIFT);
+	free_all_bootmem();
+
+	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+		unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
+		unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
+
+		if (end_pfn <= highstart_pfn)
+			continue;
+
+		if (start_pfn < highstart_pfn)
+			start_pfn = highstart_pfn;
+
+		map_high_region(start_pfn, end_pfn);
+	}
+
+	mem_init_print_info(NULL);
+}
+
+void free_initmem (void)
+{
+	free_initmem_default(POISON_FREE_INITMEM);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
+}
+#endif
+
+void sparc_flush_page_to_ram(struct page *page)
+{
+	unsigned long vaddr = (unsigned long)page_address(page);
+
+	if (vaddr)
+		__flush_page_to_ram(vaddr);
+}
+EXPORT_SYMBOL(sparc_flush_page_to_ram);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
new file mode 100644
index 0000000..39822f6
--- /dev/null
+++ b/arch/sparc/mm/init_64.c
@@ -0,0 +1,3232 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  arch/sparc64/mm/init.c
+ *
+ *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
+ *  Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+ 
+#include <linux/extable.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/initrd.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/poison.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/kprobes.h>
+#include <linux/cache.h>
+#include <linux/sort.h>
+#include <linux/ioport.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+#include <linux/mmzone.h>
+#include <linux/gfp.h>
+
+#include <asm/head.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/oplib.h>
+#include <asm/iommu.h>
+#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/dma.h>
+#include <asm/starfire.h>
+#include <asm/tlb.h>
+#include <asm/spitfire.h>
+#include <asm/sections.h>
+#include <asm/tsb.h>
+#include <asm/hypervisor.h>
+#include <asm/prom.h>
+#include <asm/mdesc.h>
+#include <asm/cpudata.h>
+#include <asm/setup.h>
+#include <asm/irq.h>
+
+#include "init_64.h"
+
+unsigned long kern_linear_pte_xor[4] __read_mostly;
+static unsigned long page_cache4v_flag;
+
+/* A bitmap, two bits for every 256MB of physical memory.  These two
+ * bits determine what page size we use for kernel linear
+ * translations.  They form an index into kern_linear_pte_xor[].  The
+ * value in the indexed slot is XOR'd with the TLB miss virtual
+ * address to form the resulting TTE.  The mapping is:
+ *
+ *	0	==>	4MB
+ *	1	==>	256MB
+ *	2	==>	2GB
+ *	3	==>	16GB
+ *
+ * All sun4v chips support 256MB pages.  Only SPARC-T4 and later
+ * support 2GB pages, and hopefully future cpus will support the 16GB
+ * pages as well.  For slots 2 and 3, we encode a 256MB TTE xor there
+ * if these larger page sizes are not supported by the cpu.
+ *
+ * It would be nice to determine this from the machine description
+ * 'cpu' properties, but we need to have this table setup before the
+ * MDESC is initialized.
+ */
+
+#ifndef CONFIG_DEBUG_PAGEALLOC
+/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
+ * Space is allocated for this right after the trap table in
+ * arch/sparc64/kernel/head.S
+ */
+extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
+#endif
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+static unsigned long cpu_pgsz_mask;
+
+#define MAX_BANKS	1024
+
+static struct linux_prom64_registers pavail[MAX_BANKS];
+static int pavail_ents;
+
+u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES];
+
+static int cmp_p64(const void *a, const void *b)
+{
+	const struct linux_prom64_registers *x = a, *y = b;
+
+	if (x->phys_addr > y->phys_addr)
+		return 1;
+	if (x->phys_addr < y->phys_addr)
+		return -1;
+	return 0;
+}
+
+static void __init read_obp_memory(const char *property,
+				   struct linux_prom64_registers *regs,
+				   int *num_ents)
+{
+	phandle node = prom_finddevice("/memory");
+	int prop_size = prom_getproplen(node, property);
+	int ents, ret, i;
+
+	ents = prop_size / sizeof(struct linux_prom64_registers);
+	if (ents > MAX_BANKS) {
+		prom_printf("The machine has more %s property entries than "
+			    "this kernel can support (%d).\n",
+			    property, MAX_BANKS);
+		prom_halt();
+	}
+
+	ret = prom_getproperty(node, property, (char *) regs, prop_size);
+	if (ret == -1) {
+		prom_printf("Couldn't get %s property from /memory.\n",
+				property);
+		prom_halt();
+	}
+
+	/* Sanitize what we got from the firmware, by page aligning
+	 * everything.
+	 */
+	for (i = 0; i < ents; i++) {
+		unsigned long base, size;
+
+		base = regs[i].phys_addr;
+		size = regs[i].reg_size;
+
+		size &= PAGE_MASK;
+		if (base & ~PAGE_MASK) {
+			unsigned long new_base = PAGE_ALIGN(base);
+
+			size -= new_base - base;
+			if ((long) size < 0L)
+				size = 0UL;
+			base = new_base;
+		}
+		if (size == 0UL) {
+			/* If it is empty, simply get rid of it.
+			 * This simplifies the logic of the other
+			 * functions that process these arrays.
+			 */
+			memmove(&regs[i], &regs[i + 1],
+				(ents - i - 1) * sizeof(regs[0]));
+			i--;
+			ents--;
+			continue;
+		}
+		regs[i].phys_addr = base;
+		regs[i].reg_size = size;
+	}
+
+	*num_ents = ents;
+
+	sort(regs, ents, sizeof(struct linux_prom64_registers),
+	     cmp_p64, NULL);
+}
+
+/* Kernel physical address base and size in bytes.  */
+unsigned long kern_base __read_mostly;
+unsigned long kern_size __read_mostly;
+
+/* Initial ramdisk setup */
+extern unsigned long sparc_ramdisk_image64;
+extern unsigned int sparc_ramdisk_image;
+extern unsigned int sparc_ramdisk_size;
+
+struct page *mem_map_zero __read_mostly;
+EXPORT_SYMBOL(mem_map_zero);
+
+unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
+
+unsigned long sparc64_kern_pri_context __read_mostly;
+unsigned long sparc64_kern_pri_nuc_bits __read_mostly;
+unsigned long sparc64_kern_sec_context __read_mostly;
+
+int num_kernel_image_mappings;
+
+#ifdef CONFIG_DEBUG_DCFLUSH
+atomic_t dcpage_flushes = ATOMIC_INIT(0);
+#ifdef CONFIG_SMP
+atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
+#endif
+#endif
+
+inline void flush_dcache_page_impl(struct page *page)
+{
+	BUG_ON(tlb_type == hypervisor);
+#ifdef CONFIG_DEBUG_DCFLUSH
+	atomic_inc(&dcpage_flushes);
+#endif
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	__flush_dcache_page(page_address(page),
+			    ((tlb_type == spitfire) &&
+			     page_mapping_file(page) != NULL));
+#else
+	if (page_mapping_file(page) != NULL &&
+	    tlb_type == spitfire)
+		__flush_icache_page(__pa(page_address(page)));
+#endif
+}
+
+#define PG_dcache_dirty		PG_arch_1
+#define PG_dcache_cpu_shift	32UL
+#define PG_dcache_cpu_mask	\
+	((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
+
+#define dcache_dirty_cpu(page) \
+	(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
+
+static inline void set_dcache_dirty(struct page *page, int this_cpu)
+{
+	unsigned long mask = this_cpu;
+	unsigned long non_cpu_bits;
+
+	non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift);
+	mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty);
+
+	__asm__ __volatile__("1:\n\t"
+			     "ldx	[%2], %%g7\n\t"
+			     "and	%%g7, %1, %%g1\n\t"
+			     "or	%%g1, %0, %%g1\n\t"
+			     "casx	[%2], %%g7, %%g1\n\t"
+			     "cmp	%%g7, %%g1\n\t"
+			     "bne,pn	%%xcc, 1b\n\t"
+			     " nop"
+			     : /* no outputs */
+			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
+			     : "g1", "g7");
+}
+
+static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
+{
+	unsigned long mask = (1UL << PG_dcache_dirty);
+
+	__asm__ __volatile__("! test_and_clear_dcache_dirty\n"
+			     "1:\n\t"
+			     "ldx	[%2], %%g7\n\t"
+			     "srlx	%%g7, %4, %%g1\n\t"
+			     "and	%%g1, %3, %%g1\n\t"
+			     "cmp	%%g1, %0\n\t"
+			     "bne,pn	%%icc, 2f\n\t"
+			     " andn	%%g7, %1, %%g1\n\t"
+			     "casx	[%2], %%g7, %%g1\n\t"
+			     "cmp	%%g7, %%g1\n\t"
+			     "bne,pn	%%xcc, 1b\n\t"
+			     " nop\n"
+			     "2:"
+			     : /* no outputs */
+			     : "r" (cpu), "r" (mask), "r" (&page->flags),
+			       "i" (PG_dcache_cpu_mask),
+			       "i" (PG_dcache_cpu_shift)
+			     : "g1", "g7");
+}
+
+static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
+{
+	unsigned long tsb_addr = (unsigned long) ent;
+
+	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+		tsb_addr = __pa(tsb_addr);
+
+	__tsb_insert(tsb_addr, tag, pte);
+}
+
+unsigned long _PAGE_ALL_SZ_BITS __read_mostly;
+
+static void flush_dcache(unsigned long pfn)
+{
+	struct page *page;
+
+	page = pfn_to_page(pfn);
+	if (page) {
+		unsigned long pg_flags;
+
+		pg_flags = page->flags;
+		if (pg_flags & (1UL << PG_dcache_dirty)) {
+			int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
+				   PG_dcache_cpu_mask);
+			int this_cpu = get_cpu();
+
+			/* This is just to optimize away some function calls
+			 * in the SMP case.
+			 */
+			if (cpu == this_cpu)
+				flush_dcache_page_impl(page);
+			else
+				smp_flush_dcache_page_impl(page, cpu);
+
+			clear_dcache_dirty_cpu(page, cpu);
+
+			put_cpu();
+		}
+	}
+}
+
+/* mm->context.lock must be held */
+static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
+				    unsigned long tsb_hash_shift, unsigned long address,
+				    unsigned long tte)
+{
+	struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
+	unsigned long tag;
+
+	if (unlikely(!tsb))
+		return;
+
+	tsb += ((address >> tsb_hash_shift) &
+		(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
+	tag = (address >> 22UL);
+	tsb_insert(tsb, tag, tte);
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static void __init add_huge_page_size(unsigned long size)
+{
+	unsigned int order;
+
+	if (size_to_hstate(size))
+		return;
+
+	order = ilog2(size) - PAGE_SHIFT;
+	hugetlb_add_hstate(order);
+}
+
+static int __init hugetlbpage_init(void)
+{
+	add_huge_page_size(1UL << HPAGE_64K_SHIFT);
+	add_huge_page_size(1UL << HPAGE_SHIFT);
+	add_huge_page_size(1UL << HPAGE_256MB_SHIFT);
+	add_huge_page_size(1UL << HPAGE_2GB_SHIFT);
+
+	return 0;
+}
+
+arch_initcall(hugetlbpage_init);
+
+static void __init pud_huge_patch(void)
+{
+	struct pud_huge_patch_entry *p;
+	unsigned long addr;
+
+	p = &__pud_huge_patch;
+	addr = p->addr;
+	*(unsigned int *)addr = p->insn;
+
+	__asm__ __volatile__("flush %0" : : "r" (addr));
+}
+
+static int __init setup_hugepagesz(char *string)
+{
+	unsigned long long hugepage_size;
+	unsigned int hugepage_shift;
+	unsigned short hv_pgsz_idx;
+	unsigned int hv_pgsz_mask;
+	int rc = 0;
+
+	hugepage_size = memparse(string, &string);
+	hugepage_shift = ilog2(hugepage_size);
+
+	switch (hugepage_shift) {
+	case HPAGE_16GB_SHIFT:
+		hv_pgsz_mask = HV_PGSZ_MASK_16GB;
+		hv_pgsz_idx = HV_PGSZ_IDX_16GB;
+		pud_huge_patch();
+		break;
+	case HPAGE_2GB_SHIFT:
+		hv_pgsz_mask = HV_PGSZ_MASK_2GB;
+		hv_pgsz_idx = HV_PGSZ_IDX_2GB;
+		break;
+	case HPAGE_256MB_SHIFT:
+		hv_pgsz_mask = HV_PGSZ_MASK_256MB;
+		hv_pgsz_idx = HV_PGSZ_IDX_256MB;
+		break;
+	case HPAGE_SHIFT:
+		hv_pgsz_mask = HV_PGSZ_MASK_4MB;
+		hv_pgsz_idx = HV_PGSZ_IDX_4MB;
+		break;
+	case HPAGE_64K_SHIFT:
+		hv_pgsz_mask = HV_PGSZ_MASK_64K;
+		hv_pgsz_idx = HV_PGSZ_IDX_64K;
+		break;
+	default:
+		hv_pgsz_mask = 0;
+	}
+
+	if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
+		hugetlb_bad_size();
+		pr_err("hugepagesz=%llu not supported by MMU.\n",
+			hugepage_size);
+		goto out;
+	}
+
+	add_huge_page_size(hugepage_size);
+	rc = 1;
+
+out:
+	return rc;
+}
+__setup("hugepagesz=", setup_hugepagesz);
+#endif	/* CONFIG_HUGETLB_PAGE */
+
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+	struct mm_struct *mm;
+	unsigned long flags;
+	bool is_huge_tsb;
+	pte_t pte = *ptep;
+
+	if (tlb_type != hypervisor) {
+		unsigned long pfn = pte_pfn(pte);
+
+		if (pfn_valid(pfn))
+			flush_dcache(pfn);
+	}
+
+	mm = vma->vm_mm;
+
+	/* Don't insert a non-valid PTE into the TSB, we'll deadlock.  */
+	if (!pte_accessible(mm, pte))
+		return;
+
+	spin_lock_irqsave(&mm->context.lock, flags);
+
+	is_huge_tsb = false;
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
+		unsigned long hugepage_size = PAGE_SIZE;
+
+		if (is_vm_hugetlb_page(vma))
+			hugepage_size = huge_page_size(hstate_vma(vma));
+
+		if (hugepage_size >= PUD_SIZE) {
+			unsigned long mask = 0x1ffc00000UL;
+
+			/* Transfer bits [32:22] from address to resolve
+			 * at 4M granularity.
+			 */
+			pte_val(pte) &= ~mask;
+			pte_val(pte) |= (address & mask);
+		} else if (hugepage_size >= PMD_SIZE) {
+			/* We are fabricating 8MB pages using 4MB
+			 * real hw pages.
+			 */
+			pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
+		}
+
+		if (hugepage_size >= PMD_SIZE) {
+			__update_mmu_tsb_insert(mm, MM_TSB_HUGE,
+				REAL_HPAGE_SHIFT, address, pte_val(pte));
+			is_huge_tsb = true;
+		}
+	}
+#endif
+	if (!is_huge_tsb)
+		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
+					address, pte_val(pte));
+
+	spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+
+void flush_dcache_page(struct page *page)
+{
+	struct address_space *mapping;
+	int this_cpu;
+
+	if (tlb_type == hypervisor)
+		return;
+
+	/* Do not bother with the expensive D-cache flush if it
+	 * is merely the zero page.  The 'bigcore' testcase in GDB
+	 * causes this case to run millions of times.
+	 */
+	if (page == ZERO_PAGE(0))
+		return;
+
+	this_cpu = get_cpu();
+
+	mapping = page_mapping_file(page);
+	if (mapping && !mapping_mapped(mapping)) {
+		int dirty = test_bit(PG_dcache_dirty, &page->flags);
+		if (dirty) {
+			int dirty_cpu = dcache_dirty_cpu(page);
+
+			if (dirty_cpu == this_cpu)
+				goto out;
+			smp_flush_dcache_page_impl(page, dirty_cpu);
+		}
+		set_dcache_dirty(page, this_cpu);
+	} else {
+		/* We could delay the flush for the !page_mapping
+		 * case too.  But that case is for exec env/arg
+		 * pages and those are %99 certainly going to get
+		 * faulted into the tlb (and thus flushed) anyways.
+		 */
+		flush_dcache_page_impl(page);
+	}
+
+out:
+	put_cpu();
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void __kprobes flush_icache_range(unsigned long start, unsigned long end)
+{
+	/* Cheetah and Hypervisor platform cpus have coherent I-cache. */
+	if (tlb_type == spitfire) {
+		unsigned long kaddr;
+
+		/* This code only runs on Spitfire cpus so this is
+		 * why we can assume _PAGE_PADDR_4U.
+		 */
+		for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) {
+			unsigned long paddr, mask = _PAGE_PADDR_4U;
+
+			if (kaddr >= PAGE_OFFSET)
+				paddr = kaddr & mask;
+			else {
+				pgd_t *pgdp = pgd_offset_k(kaddr);
+				pud_t *pudp = pud_offset(pgdp, kaddr);
+				pmd_t *pmdp = pmd_offset(pudp, kaddr);
+				pte_t *ptep = pte_offset_kernel(pmdp, kaddr);
+
+				paddr = pte_val(*ptep) & mask;
+			}
+			__flush_icache_page(paddr);
+		}
+	}
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+void mmu_info(struct seq_file *m)
+{
+	static const char *pgsz_strings[] = {
+		"8K", "64K", "512K", "4MB", "32MB",
+		"256MB", "2GB", "16GB",
+	};
+	int i, printed;
+
+	if (tlb_type == cheetah)
+		seq_printf(m, "MMU Type\t: Cheetah\n");
+	else if (tlb_type == cheetah_plus)
+		seq_printf(m, "MMU Type\t: Cheetah+\n");
+	else if (tlb_type == spitfire)
+		seq_printf(m, "MMU Type\t: Spitfire\n");
+	else if (tlb_type == hypervisor)
+		seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
+	else
+		seq_printf(m, "MMU Type\t: ???\n");
+
+	seq_printf(m, "MMU PGSZs\t: ");
+	printed = 0;
+	for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
+		if (cpu_pgsz_mask & (1UL << i)) {
+			seq_printf(m, "%s%s",
+				   printed ? "," : "", pgsz_strings[i]);
+			printed++;
+		}
+	}
+	seq_putc(m, '\n');
+
+#ifdef CONFIG_DEBUG_DCFLUSH
+	seq_printf(m, "DCPageFlushes\t: %d\n",
+		   atomic_read(&dcpage_flushes));
+#ifdef CONFIG_SMP
+	seq_printf(m, "DCPageFlushesXC\t: %d\n",
+		   atomic_read(&dcpage_flushes_xcall));
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_DEBUG_DCFLUSH */
+}
+
+struct linux_prom_translation prom_trans[512] __read_mostly;
+unsigned int prom_trans_ents __read_mostly;
+
+unsigned long kern_locked_tte_data;
+
+/* The obp translations are saved based on 8k pagesize, since obp can
+ * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
+ * HI_OBP_ADDRESS range are handled in ktlb.S.
+ */
+static inline int in_obp_range(unsigned long vaddr)
+{
+	return (vaddr >= LOW_OBP_ADDRESS &&
+		vaddr < HI_OBP_ADDRESS);
+}
+
+static int cmp_ptrans(const void *a, const void *b)
+{
+	const struct linux_prom_translation *x = a, *y = b;
+
+	if (x->virt > y->virt)
+		return 1;
+	if (x->virt < y->virt)
+		return -1;
+	return 0;
+}
+
+/* Read OBP translations property into 'prom_trans[]'.  */
+static void __init read_obp_translations(void)
+{
+	int n, node, ents, first, last, i;
+
+	node = prom_finddevice("/virtual-memory");
+	n = prom_getproplen(node, "translations");
+	if (unlikely(n == 0 || n == -1)) {
+		prom_printf("prom_mappings: Couldn't get size.\n");
+		prom_halt();
+	}
+	if (unlikely(n > sizeof(prom_trans))) {
+		prom_printf("prom_mappings: Size %d is too big.\n", n);
+		prom_halt();
+	}
+
+	if ((n = prom_getproperty(node, "translations",
+				  (char *)&prom_trans[0],
+				  sizeof(prom_trans))) == -1) {
+		prom_printf("prom_mappings: Couldn't get property.\n");
+		prom_halt();
+	}
+
+	n = n / sizeof(struct linux_prom_translation);
+
+	ents = n;
+
+	sort(prom_trans, ents, sizeof(struct linux_prom_translation),
+	     cmp_ptrans, NULL);
+
+	/* Now kick out all the non-OBP entries.  */
+	for (i = 0; i < ents; i++) {
+		if (in_obp_range(prom_trans[i].virt))
+			break;
+	}
+	first = i;
+	for (; i < ents; i++) {
+		if (!in_obp_range(prom_trans[i].virt))
+			break;
+	}
+	last = i;
+
+	for (i = 0; i < (last - first); i++) {
+		struct linux_prom_translation *src = &prom_trans[i + first];
+		struct linux_prom_translation *dest = &prom_trans[i];
+
+		*dest = *src;
+	}
+	for (; i < ents; i++) {
+		struct linux_prom_translation *dest = &prom_trans[i];
+		dest->virt = dest->size = dest->data = 0x0UL;
+	}
+
+	prom_trans_ents = last - first;
+
+	if (tlb_type == spitfire) {
+		/* Clear diag TTE bits. */
+		for (i = 0; i < prom_trans_ents; i++)
+			prom_trans[i].data &= ~0x0003fe0000000000UL;
+	}
+
+	/* Force execute bit on.  */
+	for (i = 0; i < prom_trans_ents; i++)
+		prom_trans[i].data |= (tlb_type == hypervisor ?
+				       _PAGE_EXEC_4V : _PAGE_EXEC_4U);
+}
+
+static void __init hypervisor_tlb_lock(unsigned long vaddr,
+				       unsigned long pte,
+				       unsigned long mmu)
+{
+	unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);
+
+	if (ret != 0) {
+		prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: "
+			    "errors with %lx\n", vaddr, 0, pte, mmu, ret);
+		prom_halt();
+	}
+}
+
+static unsigned long kern_large_tte(unsigned long paddr);
+
+static void __init remap_kernel(void)
+{
+	unsigned long phys_page, tte_vaddr, tte_data;
+	int i, tlb_ent = sparc64_highest_locked_tlbent();
+
+	tte_vaddr = (unsigned long) KERNBASE;
+	phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
+	tte_data = kern_large_tte(phys_page);
+
+	kern_locked_tte_data = tte_data;
+
+	/* Now lock us into the TLBs via Hypervisor or OBP. */
+	if (tlb_type == hypervisor) {
+		for (i = 0; i < num_kernel_image_mappings; i++) {
+			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
+			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
+			tte_vaddr += 0x400000;
+			tte_data += 0x400000;
+		}
+	} else {
+		for (i = 0; i < num_kernel_image_mappings; i++) {
+			prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr);
+			prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr);
+			tte_vaddr += 0x400000;
+			tte_data += 0x400000;
+		}
+		sparc64_highest_unlocked_tlb_ent = tlb_ent - i;
+	}
+	if (tlb_type == cheetah_plus) {
+		sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
+					    CTX_CHEETAH_PLUS_NUC);
+		sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC;
+		sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0;
+	}
+}
+
+
+static void __init inherit_prom_mappings(void)
+{
+	/* Now fixup OBP's idea about where we really are mapped. */
+	printk("Remapping the kernel... ");
+	remap_kernel();
+	printk("done.\n");
+}
+
+void prom_world(int enter)
+{
+	if (!enter)
+		set_fs(get_fs());
+
+	__asm__ __volatile__("flushw");
+}
+
+void __flush_dcache_range(unsigned long start, unsigned long end)
+{
+	unsigned long va;
+
+	if (tlb_type == spitfire) {
+		int n = 0;
+
+		for (va = start; va < end; va += 32) {
+			spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
+			if (++n >= 512)
+				break;
+		}
+	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+		start = __pa(start);
+		end = __pa(end);
+		for (va = start; va < end; va += 32)
+			__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+					     "membar #Sync"
+					     : /* no outputs */
+					     : "r" (va),
+					       "i" (ASI_DCACHE_INVALIDATE));
+	}
+}
+EXPORT_SYMBOL(__flush_dcache_range);
+
+/* get_new_mmu_context() uses "cache + 1".  */
+DEFINE_SPINLOCK(ctx_alloc_lock);
+unsigned long tlb_context_cache = CTX_FIRST_VERSION;
+#define MAX_CTX_NR	(1UL << CTX_NR_BITS)
+#define CTX_BMAP_SLOTS	BITS_TO_LONGS(MAX_CTX_NR)
+DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
+DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
+
+static void mmu_context_wrap(void)
+{
+	unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
+	unsigned long new_ver, new_ctx, old_ctx;
+	struct mm_struct *mm;
+	int cpu;
+
+	bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
+
+	/* Reserve kernel context */
+	set_bit(0, mmu_context_bmap);
+
+	new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
+	if (unlikely(new_ver == 0))
+		new_ver = CTX_FIRST_VERSION;
+	tlb_context_cache = new_ver;
+
+	/*
+	 * Make sure that any new mm that are added into per_cpu_secondary_mm,
+	 * are going to go through get_new_mmu_context() path.
+	 */
+	mb();
+
+	/*
+	 * Updated versions to current on those CPUs that had valid secondary
+	 * contexts
+	 */
+	for_each_online_cpu(cpu) {
+		/*
+		 * If a new mm is stored after we took this mm from the array,
+		 * it will go into get_new_mmu_context() path, because we
+		 * already bumped the version in tlb_context_cache.
+		 */
+		mm = per_cpu(per_cpu_secondary_mm, cpu);
+
+		if (unlikely(!mm || mm == &init_mm))
+			continue;
+
+		old_ctx = mm->context.sparc64_ctx_val;
+		if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
+			new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
+			set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
+			mm->context.sparc64_ctx_val = new_ctx;
+		}
+	}
+}
+
+/* Caller does TLB context flushing on local CPU if necessary.
+ * The caller also ensures that CTX_VALID(mm->context) is false.
+ *
+ * We must be careful about boundary cases so that we never
+ * let the user have CTX 0 (nucleus) or we ever use a CTX
+ * version of zero (and thus NO_CONTEXT would not be caught
+ * by version mis-match tests in mmu_context.h).
+ *
+ * Always invoked with interrupts disabled.
+ */
+void get_new_mmu_context(struct mm_struct *mm)
+{
+	unsigned long ctx, new_ctx;
+	unsigned long orig_pgsz_bits;
+
+	spin_lock(&ctx_alloc_lock);
+retry:
+	/* wrap might have happened, test again if our context became valid */
+	if (unlikely(CTX_VALID(mm->context)))
+		goto out;
+	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
+	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
+	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
+	if (new_ctx >= (1 << CTX_NR_BITS)) {
+		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
+		if (new_ctx >= ctx) {
+			mmu_context_wrap();
+			goto retry;
+		}
+	}
+	if (mm->context.sparc64_ctx_val)
+		cpumask_clear(mm_cpumask(mm));
+	mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
+	new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
+	tlb_context_cache = new_ctx;
+	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
+out:
+	spin_unlock(&ctx_alloc_lock);
+}
+
+static int numa_enabled = 1;
+static int numa_debug;
+
+static int __init early_numa(char *p)
+{
+	if (!p)
+		return 0;
+
+	if (strstr(p, "off"))
+		numa_enabled = 0;
+
+	if (strstr(p, "debug"))
+		numa_debug = 1;
+
+	return 0;
+}
+early_param("numa", early_numa);
+
+#define numadbg(f, a...) \
+do {	if (numa_debug) \
+		printk(KERN_INFO f, ## a); \
+} while (0)
+
+static void __init find_ramdisk(unsigned long phys_base)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (sparc_ramdisk_image || sparc_ramdisk_image64) {
+		unsigned long ramdisk_image;
+
+		/* Older versions of the bootloader only supported a
+		 * 32-bit physical address for the ramdisk image
+		 * location, stored at sparc_ramdisk_image.  Newer
+		 * SILO versions set sparc_ramdisk_image to zero and
+		 * provide a full 64-bit physical address at
+		 * sparc_ramdisk_image64.
+		 */
+		ramdisk_image = sparc_ramdisk_image;
+		if (!ramdisk_image)
+			ramdisk_image = sparc_ramdisk_image64;
+
+		/* Another bootloader quirk.  The bootloader normalizes
+		 * the physical address to KERNBASE, so we have to
+		 * factor that back out and add in the lowest valid
+		 * physical page address to get the true physical address.
+		 */
+		ramdisk_image -= KERNBASE;
+		ramdisk_image += phys_base;
+
+		numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
+			ramdisk_image, sparc_ramdisk_size);
+
+		initrd_start = ramdisk_image;
+		initrd_end = ramdisk_image + sparc_ramdisk_size;
+
+		memblock_reserve(initrd_start, sparc_ramdisk_size);
+
+		initrd_start += PAGE_OFFSET;
+		initrd_end += PAGE_OFFSET;
+	}
+#endif
+}
+
+struct node_mem_mask {
+	unsigned long mask;
+	unsigned long match;
+};
+static struct node_mem_mask node_masks[MAX_NUMNODES];
+static int num_node_masks;
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+
+struct mdesc_mlgroup {
+	u64	node;
+	u64	latency;
+	u64	match;
+	u64	mask;
+};
+
+static struct mdesc_mlgroup *mlgroups;
+static int num_mlgroups;
+
+int numa_cpu_lookup_table[NR_CPUS];
+cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
+
+struct mdesc_mblock {
+	u64	base;
+	u64	size;
+	u64	offset; /* RA-to-PA */
+};
+static struct mdesc_mblock *mblocks;
+static int num_mblocks;
+
+static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr)
+{
+	struct mdesc_mblock *m = NULL;
+	int i;
+
+	for (i = 0; i < num_mblocks; i++) {
+		m = &mblocks[i];
+
+		if (addr >= m->base &&
+		    addr < (m->base + m->size)) {
+			break;
+		}
+	}
+
+	return m;
+}
+
+static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
+{
+	int prev_nid, new_nid;
+
+	prev_nid = -1;
+	for ( ; start < end; start += PAGE_SIZE) {
+		for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
+			struct node_mem_mask *p = &node_masks[new_nid];
+
+			if ((start & p->mask) == p->match) {
+				if (prev_nid == -1)
+					prev_nid = new_nid;
+				break;
+			}
+		}
+
+		if (new_nid == num_node_masks) {
+			prev_nid = 0;
+			WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.",
+				  start);
+			break;
+		}
+
+		if (prev_nid != new_nid)
+			break;
+	}
+	*nid = prev_nid;
+
+	return start > end ? end : start;
+}
+
+static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
+{
+	u64 ret_end, pa_start, m_mask, m_match, m_end;
+	struct mdesc_mblock *mblock;
+	int _nid, i;
+
+	if (tlb_type != hypervisor)
+		return memblock_nid_range_sun4u(start, end, nid);
+
+	mblock = addr_to_mblock(start);
+	if (!mblock) {
+		WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]",
+			  start);
+
+		_nid = 0;
+		ret_end = end;
+		goto done;
+	}
+
+	pa_start = start + mblock->offset;
+	m_match = 0;
+	m_mask = 0;
+
+	for (_nid = 0; _nid < num_node_masks; _nid++) {
+		struct node_mem_mask *const m = &node_masks[_nid];
+
+		if ((pa_start & m->mask) == m->match) {
+			m_match = m->match;
+			m_mask = m->mask;
+			break;
+		}
+	}
+
+	if (num_node_masks == _nid) {
+		/* We could not find NUMA group, so default to 0, but lets
+		 * search for latency group, so we could calculate the correct
+		 * end address that we return
+		 */
+		_nid = 0;
+
+		for (i = 0; i < num_mlgroups; i++) {
+			struct mdesc_mlgroup *const m = &mlgroups[i];
+
+			if ((pa_start & m->mask) == m->match) {
+				m_match = m->match;
+				m_mask = m->mask;
+				break;
+			}
+		}
+
+		if (i == num_mlgroups) {
+			WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]",
+				  start);
+
+			ret_end = end;
+			goto done;
+		}
+	}
+
+	/*
+	 * Each latency group has match and mask, and each memory block has an
+	 * offset.  An address belongs to a latency group if its address matches
+	 * the following formula: ((addr + offset) & mask) == match
+	 * It is, however, slow to check every single page if it matches a
+	 * particular latency group. As optimization we calculate end value by
+	 * using bit arithmetics.
+	 */
+	m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset;
+	m_end += pa_start & ~((1ul << fls64(m_mask)) - 1);
+	ret_end = m_end > end ? end : m_end;
+
+done:
+	*nid = _nid;
+	return ret_end;
+}
+#endif
+
+/* This must be invoked after performing all of the necessary
+ * memblock_set_node() calls for 'nid'.  We need to be able to get
+ * correct data from get_pfn_range_for_nid().
+ */
+static void __init allocate_node_data(int nid)
+{
+	struct pglist_data *p;
+	unsigned long start_pfn, end_pfn;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	unsigned long paddr;
+
+	paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
+	if (!paddr) {
+		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
+		prom_halt();
+	}
+	NODE_DATA(nid) = __va(paddr);
+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+	NODE_DATA(nid)->node_id = nid;
+#endif
+
+	p = NODE_DATA(nid);
+
+	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+	p->node_start_pfn = start_pfn;
+	p->node_spanned_pages = end_pfn - start_pfn;
+}
+
+static void init_node_masks_nonnuma(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int i;
+#endif
+
+	numadbg("Initializing tables for non-numa.\n");
+
+	node_masks[0].mask = 0;
+	node_masks[0].match = 0;
+	num_node_masks = 1;
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	for (i = 0; i < NR_CPUS; i++)
+		numa_cpu_lookup_table[i] = 0;
+
+	cpumask_setall(&numa_cpumask_lookup_table[0]);
+#endif
+}
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+struct pglist_data *node_data[MAX_NUMNODES];
+
+EXPORT_SYMBOL(numa_cpu_lookup_table);
+EXPORT_SYMBOL(numa_cpumask_lookup_table);
+EXPORT_SYMBOL(node_data);
+
+static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
+				   u32 cfg_handle)
+{
+	u64 arc;
+
+	mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) {
+		u64 target = mdesc_arc_target(md, arc);
+		const u64 *val;
+
+		val = mdesc_get_property(md, target,
+					 "cfg-handle", NULL);
+		if (val && *val == cfg_handle)
+			return 0;
+	}
+	return -ENODEV;
+}
+
+static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp,
+				    u32 cfg_handle)
+{
+	u64 arc, candidate, best_latency = ~(u64)0;
+
+	candidate = MDESC_NODE_NULL;
+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
+		u64 target = mdesc_arc_target(md, arc);
+		const char *name = mdesc_node_name(md, target);
+		const u64 *val;
+
+		if (strcmp(name, "pio-latency-group"))
+			continue;
+
+		val = mdesc_get_property(md, target, "latency", NULL);
+		if (!val)
+			continue;
+
+		if (*val < best_latency) {
+			candidate = target;
+			best_latency = *val;
+		}
+	}
+
+	if (candidate == MDESC_NODE_NULL)
+		return -ENODEV;
+
+	return scan_pio_for_cfg_handle(md, candidate, cfg_handle);
+}
+
+int of_node_to_nid(struct device_node *dp)
+{
+	const struct linux_prom64_registers *regs;
+	struct mdesc_handle *md;
+	u32 cfg_handle;
+	int count, nid;
+	u64 grp;
+
+	/* This is the right thing to do on currently supported
+	 * SUN4U NUMA platforms as well, as the PCI controller does
+	 * not sit behind any particular memory controller.
+	 */
+	if (!mlgroups)
+		return -1;
+
+	regs = of_get_property(dp, "reg", NULL);
+	if (!regs)
+		return -1;
+
+	cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff;
+
+	md = mdesc_grab();
+
+	count = 0;
+	nid = -1;
+	mdesc_for_each_node_by_name(md, grp, "group") {
+		if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) {
+			nid = count;
+			break;
+		}
+		count++;
+	}
+
+	mdesc_release(md);
+
+	return nid;
+}
+
+static void __init add_node_ranges(void)
+{
+	struct memblock_region *reg;
+	unsigned long prev_max;
+
+memblock_resized:
+	prev_max = memblock.memory.max;
+
+	for_each_memblock(memory, reg) {
+		unsigned long size = reg->size;
+		unsigned long start, end;
+
+		start = reg->base;
+		end = start + size;
+		while (start < end) {
+			unsigned long this_end;
+			int nid;
+
+			this_end = memblock_nid_range(start, end, &nid);
+
+			numadbg("Setting memblock NUMA node nid[%d] "
+				"start[%lx] end[%lx]\n",
+				nid, start, this_end);
+
+			memblock_set_node(start, this_end - start,
+					  &memblock.memory, nid);
+			if (memblock.memory.max != prev_max)
+				goto memblock_resized;
+			start = this_end;
+		}
+	}
+}
+
+static int __init grab_mlgroups(struct mdesc_handle *md)
+{
+	unsigned long paddr;
+	int count = 0;
+	u64 node;
+
+	mdesc_for_each_node_by_name(md, node, "memory-latency-group")
+		count++;
+	if (!count)
+		return -ENOENT;
+
+	paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup),
+			  SMP_CACHE_BYTES);
+	if (!paddr)
+		return -ENOMEM;
+
+	mlgroups = __va(paddr);
+	num_mlgroups = count;
+
+	count = 0;
+	mdesc_for_each_node_by_name(md, node, "memory-latency-group") {
+		struct mdesc_mlgroup *m = &mlgroups[count++];
+		const u64 *val;
+
+		m->node = node;
+
+		val = mdesc_get_property(md, node, "latency", NULL);
+		m->latency = *val;
+		val = mdesc_get_property(md, node, "address-match", NULL);
+		m->match = *val;
+		val = mdesc_get_property(md, node, "address-mask", NULL);
+		m->mask = *val;
+
+		numadbg("MLGROUP[%d]: node[%llx] latency[%llx] "
+			"match[%llx] mask[%llx]\n",
+			count - 1, m->node, m->latency, m->match, m->mask);
+	}
+
+	return 0;
+}
+
+static int __init grab_mblocks(struct mdesc_handle *md)
+{
+	unsigned long paddr;
+	int count = 0;
+	u64 node;
+
+	mdesc_for_each_node_by_name(md, node, "mblock")
+		count++;
+	if (!count)
+		return -ENOENT;
+
+	paddr = memblock_alloc(count * sizeof(struct mdesc_mblock),
+			  SMP_CACHE_BYTES);
+	if (!paddr)
+		return -ENOMEM;
+
+	mblocks = __va(paddr);
+	num_mblocks = count;
+
+	count = 0;
+	mdesc_for_each_node_by_name(md, node, "mblock") {
+		struct mdesc_mblock *m = &mblocks[count++];
+		const u64 *val;
+
+		val = mdesc_get_property(md, node, "base", NULL);
+		m->base = *val;
+		val = mdesc_get_property(md, node, "size", NULL);
+		m->size = *val;
+		val = mdesc_get_property(md, node,
+					 "address-congruence-offset", NULL);
+
+		/* The address-congruence-offset property is optional.
+		 * Explicity zero it be identifty this.
+		 */
+		if (val)
+			m->offset = *val;
+		else
+			m->offset = 0UL;
+
+		numadbg("MBLOCK[%d]: base[%llx] size[%llx] offset[%llx]\n",
+			count - 1, m->base, m->size, m->offset);
+	}
+
+	return 0;
+}
+
+static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
+					       u64 grp, cpumask_t *mask)
+{
+	u64 arc;
+
+	cpumask_clear(mask);
+
+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
+		u64 target = mdesc_arc_target(md, arc);
+		const char *name = mdesc_node_name(md, target);
+		const u64 *id;
+
+		if (strcmp(name, "cpu"))
+			continue;
+		id = mdesc_get_property(md, target, "id", NULL);
+		if (*id < nr_cpu_ids)
+			cpumask_set_cpu(*id, mask);
+	}
+}
+
+static struct mdesc_mlgroup * __init find_mlgroup(u64 node)
+{
+	int i;
+
+	for (i = 0; i < num_mlgroups; i++) {
+		struct mdesc_mlgroup *m = &mlgroups[i];
+		if (m->node == node)
+			return m;
+	}
+	return NULL;
+}
+
+int __node_distance(int from, int to)
+{
+	if ((from >= MAX_NUMNODES) || (to >= MAX_NUMNODES)) {
+		pr_warn("Returning default NUMA distance value for %d->%d\n",
+			from, to);
+		return (from == to) ? LOCAL_DISTANCE : REMOTE_DISTANCE;
+	}
+	return numa_latency[from][to];
+}
+EXPORT_SYMBOL(__node_distance);
+
+static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+{
+	int i;
+
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		struct node_mem_mask *n = &node_masks[i];
+
+		if ((grp->mask == n->mask) && (grp->match == n->match))
+			break;
+	}
+	return i;
+}
+
+static void __init find_numa_latencies_for_group(struct mdesc_handle *md,
+						 u64 grp, int index)
+{
+	u64 arc;
+
+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
+		int tnode;
+		u64 target = mdesc_arc_target(md, arc);
+		struct mdesc_mlgroup *m = find_mlgroup(target);
+
+		if (!m)
+			continue;
+		tnode = find_best_numa_node_for_mlgroup(m);
+		if (tnode == MAX_NUMNODES)
+			continue;
+		numa_latency[index][tnode] = m->latency;
+	}
+}
+
+static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp,
+				      int index)
+{
+	struct mdesc_mlgroup *candidate = NULL;
+	u64 arc, best_latency = ~(u64)0;
+	struct node_mem_mask *n;
+
+	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
+		u64 target = mdesc_arc_target(md, arc);
+		struct mdesc_mlgroup *m = find_mlgroup(target);
+		if (!m)
+			continue;
+		if (m->latency < best_latency) {
+			candidate = m;
+			best_latency = m->latency;
+		}
+	}
+	if (!candidate)
+		return -ENOENT;
+
+	if (num_node_masks != index) {
+		printk(KERN_ERR "Inconsistent NUMA state, "
+		       "index[%d] != num_node_masks[%d]\n",
+		       index, num_node_masks);
+		return -EINVAL;
+	}
+
+	n = &node_masks[num_node_masks++];
+
+	n->mask = candidate->mask;
+	n->match = candidate->match;
+
+	numadbg("NUMA NODE[%d]: mask[%lx] match[%lx] (latency[%llx])\n",
+		index, n->mask, n->match, candidate->latency);
+
+	return 0;
+}
+
+static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
+					 int index)
+{
+	cpumask_t mask;
+	int cpu;
+
+	numa_parse_mdesc_group_cpus(md, grp, &mask);
+
+	for_each_cpu(cpu, &mask)
+		numa_cpu_lookup_table[cpu] = index;
+	cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
+
+	if (numa_debug) {
+		printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
+		for_each_cpu(cpu, &mask)
+			printk("%d ", cpu);
+		printk("]\n");
+	}
+
+	return numa_attach_mlgroup(md, grp, index);
+}
+
+static int __init numa_parse_mdesc(void)
+{
+	struct mdesc_handle *md = mdesc_grab();
+	int i, j, err, count;
+	u64 node;
+
+	node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
+	if (node == MDESC_NODE_NULL) {
+		mdesc_release(md);
+		return -ENOENT;
+	}
+
+	err = grab_mblocks(md);
+	if (err < 0)
+		goto out;
+
+	err = grab_mlgroups(md);
+	if (err < 0)
+		goto out;
+
+	count = 0;
+	mdesc_for_each_node_by_name(md, node, "group") {
+		err = numa_parse_mdesc_group(md, node, count);
+		if (err < 0)
+			break;
+		count++;
+	}
+
+	count = 0;
+	mdesc_for_each_node_by_name(md, node, "group") {
+		find_numa_latencies_for_group(md, node, count);
+		count++;
+	}
+
+	/* Normalize numa latency matrix according to ACPI SLIT spec. */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		u64 self_latency = numa_latency[i][i];
+
+		for (j = 0; j < MAX_NUMNODES; j++) {
+			numa_latency[i][j] =
+				(numa_latency[i][j] * LOCAL_DISTANCE) /
+				self_latency;
+		}
+	}
+
+	add_node_ranges();
+
+	for (i = 0; i < num_node_masks; i++) {
+		allocate_node_data(i);
+		node_set_online(i);
+	}
+
+	err = 0;
+out:
+	mdesc_release(md);
+	return err;
+}
+
+static int __init numa_parse_jbus(void)
+{
+	unsigned long cpu, index;
+
+	/* NUMA node id is encoded in bits 36 and higher, and there is
+	 * a 1-to-1 mapping from CPU ID to NUMA node ID.
+	 */
+	index = 0;
+	for_each_present_cpu(cpu) {
+		numa_cpu_lookup_table[cpu] = index;
+		cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
+		node_masks[index].mask = ~((1UL << 36UL) - 1UL);
+		node_masks[index].match = cpu << 36UL;
+
+		index++;
+	}
+	num_node_masks = index;
+
+	add_node_ranges();
+
+	for (index = 0; index < num_node_masks; index++) {
+		allocate_node_data(index);
+		node_set_online(index);
+	}
+
+	return 0;
+}
+
+static int __init numa_parse_sun4u(void)
+{
+	if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+		unsigned long ver;
+
+		__asm__ ("rdpr %%ver, %0" : "=r" (ver));
+		if ((ver >> 32UL) == __JALAPENO_ID ||
+		    (ver >> 32UL) == __SERRANO_ID)
+			return numa_parse_jbus();
+	}
+	return -1;
+}
+
+static int __init bootmem_init_numa(void)
+{
+	int i, j;
+	int err = -1;
+
+	numadbg("bootmem_init_numa()\n");
+
+	/* Some sane defaults for numa latency values */
+	for (i = 0; i < MAX_NUMNODES; i++) {
+		for (j = 0; j < MAX_NUMNODES; j++)
+			numa_latency[i][j] = (i == j) ?
+				LOCAL_DISTANCE : REMOTE_DISTANCE;
+	}
+
+	if (numa_enabled) {
+		if (tlb_type == hypervisor)
+			err = numa_parse_mdesc();
+		else
+			err = numa_parse_sun4u();
+	}
+	return err;
+}
+
+#else
+
+static int bootmem_init_numa(void)
+{
+	return -1;
+}
+
+#endif
+
+static void __init bootmem_init_nonnuma(void)
+{
+	unsigned long top_of_ram = memblock_end_of_DRAM();
+	unsigned long total_ram = memblock_phys_mem_size();
+
+	numadbg("bootmem_init_nonnuma()\n");
+
+	printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
+	       top_of_ram, total_ram);
+	printk(KERN_INFO "Memory hole size: %ldMB\n",
+	       (top_of_ram - total_ram) >> 20);
+
+	init_node_masks_nonnuma();
+	memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
+	allocate_node_data(0);
+	node_set_online(0);
+}
+
+static unsigned long __init bootmem_init(unsigned long phys_base)
+{
+	unsigned long end_pfn;
+
+	end_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	max_pfn = max_low_pfn = end_pfn;
+	min_low_pfn = (phys_base >> PAGE_SHIFT);
+
+	if (bootmem_init_numa() < 0)
+		bootmem_init_nonnuma();
+
+	/* Dump memblock with node info. */
+	memblock_dump_all();
+
+	/* XXX cpu notifier XXX */
+
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
+	sparse_init();
+
+	return end_pfn;
+}
+
+static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
+static int pall_ents __initdata;
+
+static unsigned long max_phys_bits = 40;
+
+bool kern_addr_valid(unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	if ((long)addr < 0L) {
+		unsigned long pa = __pa(addr);
+
+		if ((pa >> max_phys_bits) != 0UL)
+			return false;
+
+		return pfn_valid(pa >> PAGE_SHIFT);
+	}
+
+	if (addr >= (unsigned long) KERNBASE &&
+	    addr < (unsigned long)&_end)
+		return true;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return 0;
+
+	if (pud_large(*pud))
+		return pfn_valid(pud_pfn(*pud));
+
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return 0;
+
+	if (pmd_large(*pmd))
+		return pfn_valid(pmd_pfn(*pmd));
+
+	pte = pte_offset_kernel(pmd, addr);
+	if (pte_none(*pte))
+		return 0;
+
+	return pfn_valid(pte_pfn(*pte));
+}
+EXPORT_SYMBOL(kern_addr_valid);
+
+static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
+					      unsigned long vend,
+					      pud_t *pud)
+{
+	const unsigned long mask16gb = (1UL << 34) - 1UL;
+	u64 pte_val = vstart;
+
+	/* Each PUD is 8GB */
+	if ((vstart & mask16gb) ||
+	    (vend - vstart <= mask16gb)) {
+		pte_val ^= kern_linear_pte_xor[2];
+		pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
+
+		return vstart + PUD_SIZE;
+	}
+
+	pte_val ^= kern_linear_pte_xor[3];
+	pte_val |= _PAGE_PUD_HUGE;
+
+	vend = vstart + mask16gb + 1UL;
+	while (vstart < vend) {
+		pud_val(*pud) = pte_val;
+
+		pte_val += PUD_SIZE;
+		vstart += PUD_SIZE;
+		pud++;
+	}
+	return vstart;
+}
+
+static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
+				   bool guard)
+{
+	if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
+		return true;
+
+	return false;
+}
+
+static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
+					      unsigned long vend,
+					      pmd_t *pmd)
+{
+	const unsigned long mask256mb = (1UL << 28) - 1UL;
+	const unsigned long mask2gb = (1UL << 31) - 1UL;
+	u64 pte_val = vstart;
+
+	/* Each PMD is 8MB */
+	if ((vstart & mask256mb) ||
+	    (vend - vstart <= mask256mb)) {
+		pte_val ^= kern_linear_pte_xor[0];
+		pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
+
+		return vstart + PMD_SIZE;
+	}
+
+	if ((vstart & mask2gb) ||
+	    (vend - vstart <= mask2gb)) {
+		pte_val ^= kern_linear_pte_xor[1];
+		pte_val |= _PAGE_PMD_HUGE;
+		vend = vstart + mask256mb + 1UL;
+	} else {
+		pte_val ^= kern_linear_pte_xor[2];
+		pte_val |= _PAGE_PMD_HUGE;
+		vend = vstart + mask2gb + 1UL;
+	}
+
+	while (vstart < vend) {
+		pmd_val(*pmd) = pte_val;
+
+		pte_val += PMD_SIZE;
+		vstart += PMD_SIZE;
+		pmd++;
+	}
+
+	return vstart;
+}
+
+static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
+				   bool guard)
+{
+	if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
+		return true;
+
+	return false;
+}
+
+static unsigned long __ref kernel_map_range(unsigned long pstart,
+					    unsigned long pend, pgprot_t prot,
+					    bool use_huge)
+{
+	unsigned long vstart = PAGE_OFFSET + pstart;
+	unsigned long vend = PAGE_OFFSET + pend;
+	unsigned long alloc_bytes = 0UL;
+
+	if ((vstart & ~PAGE_MASK) || (vend & ~PAGE_MASK)) {
+		prom_printf("kernel_map: Unaligned physmem[%lx:%lx]\n",
+			    vstart, vend);
+		prom_halt();
+	}
+
+	while (vstart < vend) {
+		unsigned long this_end, paddr = __pa(vstart);
+		pgd_t *pgd = pgd_offset_k(vstart);
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		if (pgd_none(*pgd)) {
+			pud_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pgd_populate(&init_mm, pgd, new);
+		}
+		pud = pud_offset(pgd, vstart);
+		if (pud_none(*pud)) {
+			pmd_t *new;
+
+			if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
+				vstart = kernel_map_hugepud(vstart, vend, pud);
+				continue;
+			}
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pud_populate(&init_mm, pud, new);
+		}
+
+		pmd = pmd_offset(pud, vstart);
+		if (pmd_none(*pmd)) {
+			pte_t *new;
+
+			if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
+				vstart = kernel_map_hugepmd(vstart, vend, pmd);
+				continue;
+			}
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pmd_populate_kernel(&init_mm, pmd, new);
+		}
+
+		pte = pte_offset_kernel(pmd, vstart);
+		this_end = (vstart + PMD_SIZE) & PMD_MASK;
+		if (this_end > vend)
+			this_end = vend;
+
+		while (vstart < this_end) {
+			pte_val(*pte) = (paddr | pgprot_val(prot));
+
+			vstart += PAGE_SIZE;
+			paddr += PAGE_SIZE;
+			pte++;
+		}
+	}
+
+	return alloc_bytes;
+}
+
+static void __init flush_all_kernel_tsbs(void)
+{
+	int i;
+
+	for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
+		struct tsb *ent = &swapper_tsb[i];
+
+		ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+	}
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
+		struct tsb *ent = &swapper_4m_tsb[i];
+
+		ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+	}
+#endif
+}
+
+extern unsigned int kvmap_linear_patch[1];
+
+static void __init kernel_physical_mapping_init(void)
+{
+	unsigned long i, mem_alloced = 0UL;
+	bool use_huge = true;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	use_huge = false;
+#endif
+	for (i = 0; i < pall_ents; i++) {
+		unsigned long phys_start, phys_end;
+
+		phys_start = pall[i].phys_addr;
+		phys_end = phys_start + pall[i].reg_size;
+
+		mem_alloced += kernel_map_range(phys_start, phys_end,
+						PAGE_KERNEL, use_huge);
+	}
+
+	printk("Allocated %ld bytes for kernel page tables.\n",
+	       mem_alloced);
+
+	kvmap_linear_patch[0] = 0x01000000; /* nop */
+	flushi(&kvmap_linear_patch[0]);
+
+	flush_all_kernel_tsbs();
+
+	__flush_tlb_all();
+}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
+	unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
+
+	kernel_map_range(phys_start, phys_end,
+			 (enable ? PAGE_KERNEL : __pgprot(0)), false);
+
+	flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
+			       PAGE_OFFSET + phys_end);
+
+	/* we should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 */
+	__flush_tlb_kernel_range(PAGE_OFFSET + phys_start,
+				 PAGE_OFFSET + phys_end);
+}
+#endif
+
+unsigned long __init find_ecache_flush_span(unsigned long size)
+{
+	int i;
+
+	for (i = 0; i < pavail_ents; i++) {
+		if (pavail[i].reg_size >= size)
+			return pavail[i].phys_addr;
+	}
+
+	return ~0UL;
+}
+
+unsigned long PAGE_OFFSET;
+EXPORT_SYMBOL(PAGE_OFFSET);
+
+unsigned long VMALLOC_END   = 0x0000010000000000UL;
+EXPORT_SYMBOL(VMALLOC_END);
+
+unsigned long sparc64_va_hole_top =    0xfffff80000000000UL;
+unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;
+
+static void __init setup_page_offset(void)
+{
+	if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+		/* Cheetah/Panther support a full 64-bit virtual
+		 * address, so we can use all that our page tables
+		 * support.
+		 */
+		sparc64_va_hole_top =    0xfff0000000000000UL;
+		sparc64_va_hole_bottom = 0x0010000000000000UL;
+
+		max_phys_bits = 42;
+	} else if (tlb_type == hypervisor) {
+		switch (sun4v_chip_type) {
+		case SUN4V_CHIP_NIAGARA1:
+		case SUN4V_CHIP_NIAGARA2:
+			/* T1 and T2 support 48-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xffff800000000000UL;
+			sparc64_va_hole_bottom = 0x0000800000000000UL;
+
+			max_phys_bits = 39;
+			break;
+		case SUN4V_CHIP_NIAGARA3:
+			/* T3 supports 48-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xffff800000000000UL;
+			sparc64_va_hole_bottom = 0x0000800000000000UL;
+
+			max_phys_bits = 43;
+			break;
+		case SUN4V_CHIP_NIAGARA4:
+		case SUN4V_CHIP_NIAGARA5:
+		case SUN4V_CHIP_SPARC64X:
+		case SUN4V_CHIP_SPARC_M6:
+			/* T4 and later support 52-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xfff8000000000000UL;
+			sparc64_va_hole_bottom = 0x0008000000000000UL;
+			max_phys_bits = 47;
+			break;
+		case SUN4V_CHIP_SPARC_M7:
+		case SUN4V_CHIP_SPARC_SN:
+			/* M7 and later support 52-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xfff8000000000000UL;
+			sparc64_va_hole_bottom = 0x0008000000000000UL;
+			max_phys_bits = 49;
+			break;
+		case SUN4V_CHIP_SPARC_M8:
+		default:
+			/* M8 and later support 54-bit virtual addresses.
+			 * However, restricting M8 and above VA bits to 53
+			 * as 4-level page table cannot support more than
+			 * 53 VA bits.
+			 */
+			sparc64_va_hole_top =    0xfff0000000000000UL;
+			sparc64_va_hole_bottom = 0x0010000000000000UL;
+			max_phys_bits = 51;
+			break;
+		}
+	}
+
+	if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) {
+		prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n",
+			    max_phys_bits);
+		prom_halt();
+	}
+
+	PAGE_OFFSET = sparc64_va_hole_top;
+	VMALLOC_END = ((sparc64_va_hole_bottom >> 1) +
+		       (sparc64_va_hole_bottom >> 2));
+
+	pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+		PAGE_OFFSET, max_phys_bits);
+	pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n",
+		VMALLOC_START, VMALLOC_END);
+	pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n",
+		VMEMMAP_BASE, VMEMMAP_BASE << 1);
+}
+
+static void __init tsb_phys_patch(void)
+{
+	struct tsb_ldquad_phys_patch_entry *pquad;
+	struct tsb_phys_patch_entry *p;
+
+	pquad = &__tsb_ldquad_phys_patch;
+	while (pquad < &__tsb_ldquad_phys_patch_end) {
+		unsigned long addr = pquad->addr;
+
+		if (tlb_type == hypervisor)
+			*(unsigned int *) addr = pquad->sun4v_insn;
+		else
+			*(unsigned int *) addr = pquad->sun4u_insn;
+		wmb();
+		__asm__ __volatile__("flush	%0"
+				     : /* no outputs */
+				     : "r" (addr));
+
+		pquad++;
+	}
+
+	p = &__tsb_phys_patch;
+	while (p < &__tsb_phys_patch_end) {
+		unsigned long addr = p->addr;
+
+		*(unsigned int *) addr = p->insn;
+		wmb();
+		__asm__ __volatile__("flush	%0"
+				     : /* no outputs */
+				     : "r" (addr));
+
+		p++;
+	}
+}
+
+/* Don't mark as init, we give this to the Hypervisor.  */
+#ifndef CONFIG_DEBUG_PAGEALLOC
+#define NUM_KTSB_DESCR	2
+#else
+#define NUM_KTSB_DESCR	1
+#endif
+static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
+
+/* The swapper TSBs are loaded with a base sequence of:
+ *
+ *	sethi	%uhi(SYMBOL), REG1
+ *	sethi	%hi(SYMBOL), REG2
+ *	or	REG1, %ulo(SYMBOL), REG1
+ *	or	REG2, %lo(SYMBOL), REG2
+ *	sllx	REG1, 32, REG1
+ *	or	REG1, REG2, REG1
+ *
+ * When we use physical addressing for the TSB accesses, we patch the
+ * first four instructions in the above sequence.
+ */
+
+static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
+{
+	unsigned long high_bits, low_bits;
+
+	high_bits = (pa >> 32) & 0xffffffff;
+	low_bits = (pa >> 0) & 0xffffffff;
+
+	while (start < end) {
+		unsigned int *ia = (unsigned int *)(unsigned long)*start;
+
+		ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10);
+		__asm__ __volatile__("flush	%0" : : "r" (ia));
+
+		ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10);
+		__asm__ __volatile__("flush	%0" : : "r" (ia + 1));
+
+		ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff);
+		__asm__ __volatile__("flush	%0" : : "r" (ia + 2));
+
+		ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff);
+		__asm__ __volatile__("flush	%0" : : "r" (ia + 3));
+
+		start++;
+	}
+}
+
+static void ktsb_phys_patch(void)
+{
+	extern unsigned int __swapper_tsb_phys_patch;
+	extern unsigned int __swapper_tsb_phys_patch_end;
+	unsigned long ktsb_pa;
+
+	ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
+	patch_one_ktsb_phys(&__swapper_tsb_phys_patch,
+			    &__swapper_tsb_phys_patch_end, ktsb_pa);
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	{
+	extern unsigned int __swapper_4m_tsb_phys_patch;
+	extern unsigned int __swapper_4m_tsb_phys_patch_end;
+	ktsb_pa = (kern_base +
+		   ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
+	patch_one_ktsb_phys(&__swapper_4m_tsb_phys_patch,
+			    &__swapper_4m_tsb_phys_patch_end, ktsb_pa);
+	}
+#endif
+}
+
+static void __init sun4v_ktsb_init(void)
+{
+	unsigned long ktsb_pa;
+
+	/* First KTSB for PAGE_SIZE mappings.  */
+	ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
+
+	switch (PAGE_SIZE) {
+	case 8 * 1024:
+	default:
+		ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_8K;
+		ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_8K;
+		break;
+
+	case 64 * 1024:
+		ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_64K;
+		ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_64K;
+		break;
+
+	case 512 * 1024:
+		ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_512K;
+		ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_512K;
+		break;
+
+	case 4 * 1024 * 1024:
+		ktsb_descr[0].pgsz_idx = HV_PGSZ_IDX_4MB;
+		ktsb_descr[0].pgsz_mask = HV_PGSZ_MASK_4MB;
+		break;
+	}
+
+	ktsb_descr[0].assoc = 1;
+	ktsb_descr[0].num_ttes = KERNEL_TSB_NENTRIES;
+	ktsb_descr[0].ctx_idx = 0;
+	ktsb_descr[0].tsb_base = ktsb_pa;
+	ktsb_descr[0].resv = 0;
+
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	/* Second KTSB for 4MB/256MB/2GB/16GB mappings.  */
+	ktsb_pa = (kern_base +
+		   ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
+
+	ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
+	ktsb_descr[1].pgsz_mask = ((HV_PGSZ_MASK_4MB |
+				    HV_PGSZ_MASK_256MB |
+				    HV_PGSZ_MASK_2GB |
+				    HV_PGSZ_MASK_16GB) &
+				   cpu_pgsz_mask);
+	ktsb_descr[1].assoc = 1;
+	ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
+	ktsb_descr[1].ctx_idx = 0;
+	ktsb_descr[1].tsb_base = ktsb_pa;
+	ktsb_descr[1].resv = 0;
+#endif
+}
+
+void sun4v_ktsb_register(void)
+{
+	unsigned long pa, ret;
+
+	pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
+
+	ret = sun4v_mmu_tsb_ctx0(NUM_KTSB_DESCR, pa);
+	if (ret != 0) {
+		prom_printf("hypervisor_mmu_tsb_ctx0[%lx]: "
+			    "errors with %lx\n", pa, ret);
+		prom_halt();
+	}
+}
+
+static void __init sun4u_linear_pte_xor_finalize(void)
+{
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	/* This is where we would add Panther support for
+	 * 32MB and 256MB pages.
+	 */
+#endif
+}
+
+static void __init sun4v_linear_pte_xor_finalize(void)
+{
+	unsigned long pagecv_flag;
+
+	/* Bit 9 of TTE is no longer CV bit on M7 processor and it instead
+	 * enables MCD error. Do not set bit 9 on M7 processor.
+	 */
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
+	case SUN4V_CHIP_SPARC_SN:
+		pagecv_flag = 0x00;
+		break;
+	default:
+		pagecv_flag = _PAGE_CV_4V;
+		break;
+	}
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
+		kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
+			PAGE_OFFSET;
+		kern_linear_pte_xor[1] |= (_PAGE_CP_4V | pagecv_flag |
+					   _PAGE_P_4V | _PAGE_W_4V);
+	} else {
+		kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
+	}
+
+	if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
+		kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
+			PAGE_OFFSET;
+		kern_linear_pte_xor[2] |= (_PAGE_CP_4V | pagecv_flag |
+					   _PAGE_P_4V | _PAGE_W_4V);
+	} else {
+		kern_linear_pte_xor[2] = kern_linear_pte_xor[1];
+	}
+
+	if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
+		kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
+			PAGE_OFFSET;
+		kern_linear_pte_xor[3] |= (_PAGE_CP_4V | pagecv_flag |
+					   _PAGE_P_4V | _PAGE_W_4V);
+	} else {
+		kern_linear_pte_xor[3] = kern_linear_pte_xor[2];
+	}
+#endif
+}
+
+/* paging_init() sets up the page tables */
+
+static unsigned long last_valid_pfn;
+
+static void sun4u_pgprot_init(void);
+static void sun4v_pgprot_init(void);
+
+static phys_addr_t __init available_memory(void)
+{
+	phys_addr_t available = 0ULL;
+	phys_addr_t pa_start, pa_end;
+	u64 i;
+
+	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
+				&pa_end, NULL)
+		available = available + (pa_end  - pa_start);
+
+	return available;
+}
+
+#define _PAGE_CACHE_4U	(_PAGE_CP_4U | _PAGE_CV_4U)
+#define _PAGE_CACHE_4V	(_PAGE_CP_4V | _PAGE_CV_4V)
+#define __DIRTY_BITS_4U	 (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
+#define __DIRTY_BITS_4V	 (_PAGE_MODIFIED_4V | _PAGE_WRITE_4V | _PAGE_W_4V)
+#define __ACCESS_BITS_4U (_PAGE_ACCESSED_4U | _PAGE_READ_4U | _PAGE_R)
+#define __ACCESS_BITS_4V (_PAGE_ACCESSED_4V | _PAGE_READ_4V | _PAGE_R)
+
+/* We need to exclude reserved regions. This exclusion will include
+ * vmlinux and initrd. To be more precise the initrd size could be used to
+ * compute a new lower limit because it is freed later during initialization.
+ */
+static void __init reduce_memory(phys_addr_t limit_ram)
+{
+	phys_addr_t avail_ram = available_memory();
+	phys_addr_t pa_start, pa_end;
+	u64 i;
+
+	if (limit_ram >= avail_ram)
+		return;
+
+	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
+				&pa_end, NULL) {
+		phys_addr_t region_size = pa_end - pa_start;
+		phys_addr_t clip_start = pa_start;
+
+		avail_ram = avail_ram - region_size;
+		/* Are we consuming too much? */
+		if (avail_ram < limit_ram) {
+			phys_addr_t give_back = limit_ram - avail_ram;
+
+			region_size = region_size - give_back;
+			clip_start = clip_start + give_back;
+		}
+
+		memblock_remove(clip_start, region_size);
+
+		if (avail_ram <= limit_ram)
+			break;
+		i = 0UL;
+	}
+}
+
+void __init paging_init(void)
+{
+	unsigned long end_pfn, shift, phys_base;
+	unsigned long real_end, i;
+
+	setup_page_offset();
+
+	/* These build time checkes make sure that the dcache_dirty_cpu()
+	 * page->flags usage will work.
+	 *
+	 * When a page gets marked as dcache-dirty, we store the
+	 * cpu number starting at bit 32 in the page->flags.  Also,
+	 * functions like clear_dcache_dirty_cpu use the cpu mask
+	 * in 13-bit signed-immediate instruction fields.
+	 */
+
+	/*
+	 * Page flags must not reach into upper 32 bits that are used
+	 * for the cpu number
+	 */
+	BUILD_BUG_ON(NR_PAGEFLAGS > 32);
+
+	/*
+	 * The bit fields placed in the high range must not reach below
+	 * the 32 bit boundary. Otherwise we cannot place the cpu field
+	 * at the 32 bit boundary.
+	 */
+	BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
+		ilog2(roundup_pow_of_two(NR_CPUS)) > 32);
+
+	BUILD_BUG_ON(NR_CPUS > 4096);
+
+	kern_base = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
+	kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
+
+	/* Invalidate both kernel TSBs.  */
+	memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
+#endif
+
+	/* TTE.cv bit on sparc v9 occupies the same position as TTE.mcde
+	 * bit on M7 processor. This is a conflicting usage of the same
+	 * bit. Enabling TTE.cv on M7 would turn on Memory Corruption
+	 * Detection error on all pages and this will lead to problems
+	 * later. Kernel does not run with MCD enabled and hence rest
+	 * of the required steps to fully configure memory corruption
+	 * detection are not taken. We need to ensure TTE.mcde is not
+	 * set on M7 processor. Compute the value of cacheability
+	 * flag for use later taking this into consideration.
+	 */
+	switch (sun4v_chip_type) {
+	case SUN4V_CHIP_SPARC_M7:
+	case SUN4V_CHIP_SPARC_M8:
+	case SUN4V_CHIP_SPARC_SN:
+		page_cache4v_flag = _PAGE_CP_4V;
+		break;
+	default:
+		page_cache4v_flag = _PAGE_CACHE_4V;
+		break;
+	}
+
+	if (tlb_type == hypervisor)
+		sun4v_pgprot_init();
+	else
+		sun4u_pgprot_init();
+
+	if (tlb_type == cheetah_plus ||
+	    tlb_type == hypervisor) {
+		tsb_phys_patch();
+		ktsb_phys_patch();
+	}
+
+	if (tlb_type == hypervisor)
+		sun4v_patch_tlb_handlers();
+
+	/* Find available physical memory...
+	 *
+	 * Read it twice in order to work around a bug in openfirmware.
+	 * The call to grab this table itself can cause openfirmware to
+	 * allocate memory, which in turn can take away some space from
+	 * the list of available memory.  Reading it twice makes sure
+	 * we really do get the final value.
+	 */
+	read_obp_translations();
+	read_obp_memory("reg", &pall[0], &pall_ents);
+	read_obp_memory("available", &pavail[0], &pavail_ents);
+	read_obp_memory("available", &pavail[0], &pavail_ents);
+
+	phys_base = 0xffffffffffffffffUL;
+	for (i = 0; i < pavail_ents; i++) {
+		phys_base = min(phys_base, pavail[i].phys_addr);
+		memblock_add(pavail[i].phys_addr, pavail[i].reg_size);
+	}
+
+	memblock_reserve(kern_base, kern_size);
+
+	find_ramdisk(phys_base);
+
+	if (cmdline_memory_size)
+		reduce_memory(cmdline_memory_size);
+
+	memblock_allow_resize();
+	memblock_dump_all();
+
+	set_bit(0, mmu_context_bmap);
+
+	shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
+
+	real_end = (unsigned long)_end;
+	num_kernel_image_mappings = DIV_ROUND_UP(real_end - KERNBASE, 1 << ILOG2_4MB);
+	printk("Kernel: Using %d locked TLB entries for main kernel image.\n",
+	       num_kernel_image_mappings);
+
+	/* Set kernel pgd to upper alias so physical page computations
+	 * work.
+	 */
+	init_mm.pgd += ((shift) / (sizeof(pgd_t)));
+	
+	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
+
+	inherit_prom_mappings();
+	
+	/* Ok, we can use our TLB miss and window trap handlers safely.  */
+	setup_tba();
+
+	__flush_tlb_all();
+
+	prom_build_devicetree();
+	of_populate_present_mask();
+#ifndef CONFIG_SMP
+	of_fill_in_cpu_data();
+#endif
+
+	if (tlb_type == hypervisor) {
+		sun4v_mdesc_init();
+		mdesc_populate_present_mask(cpu_all_mask);
+#ifndef CONFIG_SMP
+		mdesc_fill_in_cpu_data(cpu_all_mask);
+#endif
+		mdesc_get_page_sizes(cpu_all_mask, &cpu_pgsz_mask);
+
+		sun4v_linear_pte_xor_finalize();
+
+		sun4v_ktsb_init();
+		sun4v_ktsb_register();
+	} else {
+		unsigned long impl, ver;
+
+		cpu_pgsz_mask = (HV_PGSZ_MASK_8K | HV_PGSZ_MASK_64K |
+				 HV_PGSZ_MASK_512K | HV_PGSZ_MASK_4MB);
+
+		__asm__ __volatile__("rdpr %%ver, %0" : "=r" (ver));
+		impl = ((ver >> 32) & 0xffff);
+		if (impl == PANTHER_IMPL)
+			cpu_pgsz_mask |= (HV_PGSZ_MASK_32MB |
+					  HV_PGSZ_MASK_256MB);
+
+		sun4u_linear_pte_xor_finalize();
+	}
+
+	/* Flush the TLBs and the 4M TSB so that the updated linear
+	 * pte XOR settings are realized for all mappings.
+	 */
+	__flush_tlb_all();
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
+#endif
+	__flush_tlb_all();
+
+	/* Setup bootmem... */
+	last_valid_pfn = end_pfn = bootmem_init(phys_base);
+
+	kernel_physical_mapping_init();
+
+	{
+		unsigned long max_zone_pfns[MAX_NR_ZONES];
+
+		memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+		max_zone_pfns[ZONE_NORMAL] = end_pfn;
+
+		free_area_init_nodes(max_zone_pfns);
+	}
+
+	printk("Booting Linux...\n");
+}
+
+int page_in_phys_avail(unsigned long paddr)
+{
+	int i;
+
+	paddr &= PAGE_MASK;
+
+	for (i = 0; i < pavail_ents; i++) {
+		unsigned long start, end;
+
+		start = pavail[i].phys_addr;
+		end = start + pavail[i].reg_size;
+
+		if (paddr >= start && paddr < end)
+			return 1;
+	}
+	if (paddr >= kern_base && paddr < (kern_base + kern_size))
+		return 1;
+#ifdef CONFIG_BLK_DEV_INITRD
+	if (paddr >= __pa(initrd_start) &&
+	    paddr < __pa(PAGE_ALIGN(initrd_end)))
+		return 1;
+#endif
+
+	return 0;
+}
+
+static void __init register_page_bootmem_info(void)
+{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	int i;
+
+	for_each_online_node(i)
+		if (NODE_DATA(i)->node_spanned_pages)
+			register_page_bootmem_info_node(NODE_DATA(i));
+#endif
+}
+void __init mem_init(void)
+{
+	high_memory = __va(last_valid_pfn << PAGE_SHIFT);
+
+	free_all_bootmem();
+
+	/*
+	 * Must be done after boot memory is put on freelist, because here we
+	 * might set fields in deferred struct pages that have not yet been
+	 * initialized, and free_all_bootmem() initializes all the reserved
+	 * deferred pages for us.
+	 */
+	register_page_bootmem_info();
+
+	/*
+	 * Set up the zero page, mark it reserved, so that page count
+	 * is not manipulated when freeing the page from user ptes.
+	 */
+	mem_map_zero = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
+	if (mem_map_zero == NULL) {
+		prom_printf("paging_init: Cannot alloc zero page.\n");
+		prom_halt();
+	}
+	mark_page_reserved(mem_map_zero);
+
+	mem_init_print_info(NULL);
+
+	if (tlb_type == cheetah || tlb_type == cheetah_plus)
+		cheetah_ecache_flush_init();
+}
+
+void free_initmem(void)
+{
+	unsigned long addr, initend;
+	int do_free = 1;
+
+	/* If the physical memory maps were trimmed by kernel command
+	 * line options, don't even try freeing this initmem stuff up.
+	 * The kernel image could have been in the trimmed out region
+	 * and if so the freeing below will free invalid page structs.
+	 */
+	if (cmdline_memory_size)
+		do_free = 0;
+
+	/*
+	 * The init section is aligned to 8k in vmlinux.lds. Page align for >8k pagesizes.
+	 */
+	addr = PAGE_ALIGN((unsigned long)(__init_begin));
+	initend = (unsigned long)(__init_end) & PAGE_MASK;
+	for (; addr < initend; addr += PAGE_SIZE) {
+		unsigned long page;
+
+		page = (addr +
+			((unsigned long) __va(kern_base)) -
+			((unsigned long) KERNBASE));
+		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
+
+		if (do_free)
+			free_reserved_page(virt_to_page(page));
+	}
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
+			   "initrd");
+}
+#endif
+
+pgprot_t PAGE_KERNEL __read_mostly;
+EXPORT_SYMBOL(PAGE_KERNEL);
+
+pgprot_t PAGE_KERNEL_LOCKED __read_mostly;
+pgprot_t PAGE_COPY __read_mostly;
+
+pgprot_t PAGE_SHARED __read_mostly;
+EXPORT_SYMBOL(PAGE_SHARED);
+
+unsigned long pg_iobits __read_mostly;
+
+unsigned long _PAGE_IE __read_mostly;
+EXPORT_SYMBOL(_PAGE_IE);
+
+unsigned long _PAGE_E __read_mostly;
+EXPORT_SYMBOL(_PAGE_E);
+
+unsigned long _PAGE_CACHE __read_mostly;
+EXPORT_SYMBOL(_PAGE_CACHE);
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
+			       int node, struct vmem_altmap *altmap)
+{
+	unsigned long pte_base;
+
+	pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
+		    _PAGE_CP_4U | _PAGE_CV_4U |
+		    _PAGE_P_4U | _PAGE_W_4U);
+	if (tlb_type == hypervisor)
+		pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4V |
+			    page_cache4v_flag | _PAGE_P_4V | _PAGE_W_4V);
+
+	pte_base |= _PAGE_PMD_HUGE;
+
+	vstart = vstart & PMD_MASK;
+	vend = ALIGN(vend, PMD_SIZE);
+	for (; vstart < vend; vstart += PMD_SIZE) {
+		pgd_t *pgd = vmemmap_pgd_populate(vstart, node);
+		unsigned long pte;
+		pud_t *pud;
+		pmd_t *pmd;
+
+		if (!pgd)
+			return -ENOMEM;
+
+		pud = vmemmap_pud_populate(pgd, vstart, node);
+		if (!pud)
+			return -ENOMEM;
+
+		pmd = pmd_offset(pud, vstart);
+		pte = pmd_val(*pmd);
+		if (!(pte & _PAGE_VALID)) {
+			void *block = vmemmap_alloc_block(PMD_SIZE, node);
+
+			if (!block)
+				return -ENOMEM;
+
+			pmd_val(*pmd) = pte_base | __pa(block);
+		}
+	}
+
+	return 0;
+}
+
+void vmemmap_free(unsigned long start, unsigned long end,
+		struct vmem_altmap *altmap)
+{
+}
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static void prot_init_common(unsigned long page_none,
+			     unsigned long page_shared,
+			     unsigned long page_copy,
+			     unsigned long page_readonly,
+			     unsigned long page_exec_bit)
+{
+	PAGE_COPY = __pgprot(page_copy);
+	PAGE_SHARED = __pgprot(page_shared);
+
+	protection_map[0x0] = __pgprot(page_none);
+	protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
+	protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit);
+	protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit);
+	protection_map[0x4] = __pgprot(page_readonly);
+	protection_map[0x5] = __pgprot(page_readonly);
+	protection_map[0x6] = __pgprot(page_copy);
+	protection_map[0x7] = __pgprot(page_copy);
+	protection_map[0x8] = __pgprot(page_none);
+	protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit);
+	protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit);
+	protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit);
+	protection_map[0xc] = __pgprot(page_readonly);
+	protection_map[0xd] = __pgprot(page_readonly);
+	protection_map[0xe] = __pgprot(page_shared);
+	protection_map[0xf] = __pgprot(page_shared);
+}
+
+static void __init sun4u_pgprot_init(void)
+{
+	unsigned long page_none, page_shared, page_copy, page_readonly;
+	unsigned long page_exec_bit;
+	int i;
+
+	PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
+				_PAGE_CACHE_4U | _PAGE_P_4U |
+				__ACCESS_BITS_4U | __DIRTY_BITS_4U |
+				_PAGE_EXEC_4U);
+	PAGE_KERNEL_LOCKED = __pgprot (_PAGE_PRESENT_4U | _PAGE_VALID |
+				       _PAGE_CACHE_4U | _PAGE_P_4U |
+				       __ACCESS_BITS_4U | __DIRTY_BITS_4U |
+				       _PAGE_EXEC_4U | _PAGE_L_4U);
+
+	_PAGE_IE = _PAGE_IE_4U;
+	_PAGE_E = _PAGE_E_4U;
+	_PAGE_CACHE = _PAGE_CACHE_4U;
+
+	pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U |
+		     __ACCESS_BITS_4U | _PAGE_E_4U);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
+#else
+	kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
+		PAGE_OFFSET;
+#endif
+	kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
+				   _PAGE_P_4U | _PAGE_W_4U);
+
+	for (i = 1; i < 4; i++)
+		kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
+
+	_PAGE_ALL_SZ_BITS =  (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
+			      _PAGE_SZ64K_4U | _PAGE_SZ8K_4U |
+			      _PAGE_SZ32MB_4U | _PAGE_SZ256MB_4U);
+
+
+	page_none = _PAGE_PRESENT_4U | _PAGE_ACCESSED_4U | _PAGE_CACHE_4U;
+	page_shared = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
+		       __ACCESS_BITS_4U | _PAGE_WRITE_4U | _PAGE_EXEC_4U);
+	page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
+		       __ACCESS_BITS_4U | _PAGE_EXEC_4U);
+	page_readonly   = (_PAGE_VALID | _PAGE_PRESENT_4U | _PAGE_CACHE_4U |
+			   __ACCESS_BITS_4U | _PAGE_EXEC_4U);
+
+	page_exec_bit = _PAGE_EXEC_4U;
+
+	prot_init_common(page_none, page_shared, page_copy, page_readonly,
+			 page_exec_bit);
+}
+
+static void __init sun4v_pgprot_init(void)
+{
+	unsigned long page_none, page_shared, page_copy, page_readonly;
+	unsigned long page_exec_bit;
+	int i;
+
+	PAGE_KERNEL = __pgprot (_PAGE_PRESENT_4V | _PAGE_VALID |
+				page_cache4v_flag | _PAGE_P_4V |
+				__ACCESS_BITS_4V | __DIRTY_BITS_4V |
+				_PAGE_EXEC_4V);
+	PAGE_KERNEL_LOCKED = PAGE_KERNEL;
+
+	_PAGE_IE = _PAGE_IE_4V;
+	_PAGE_E = _PAGE_E_4V;
+	_PAGE_CACHE = page_cache4v_flag;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
+#else
+	kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
+		PAGE_OFFSET;
+#endif
+	kern_linear_pte_xor[0] |= (page_cache4v_flag | _PAGE_P_4V |
+				   _PAGE_W_4V);
+
+	for (i = 1; i < 4; i++)
+		kern_linear_pte_xor[i] = kern_linear_pte_xor[0];
+
+	pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
+		     __ACCESS_BITS_4V | _PAGE_E_4V);
+
+	_PAGE_ALL_SZ_BITS = (_PAGE_SZ16GB_4V | _PAGE_SZ2GB_4V |
+			     _PAGE_SZ256MB_4V | _PAGE_SZ32MB_4V |
+			     _PAGE_SZ4MB_4V | _PAGE_SZ512K_4V |
+			     _PAGE_SZ64K_4V | _PAGE_SZ8K_4V);
+
+	page_none = _PAGE_PRESENT_4V | _PAGE_ACCESSED_4V | page_cache4v_flag;
+	page_shared = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
+		       __ACCESS_BITS_4V | _PAGE_WRITE_4V | _PAGE_EXEC_4V);
+	page_copy   = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
+		       __ACCESS_BITS_4V | _PAGE_EXEC_4V);
+	page_readonly = (_PAGE_VALID | _PAGE_PRESENT_4V | page_cache4v_flag |
+			 __ACCESS_BITS_4V | _PAGE_EXEC_4V);
+
+	page_exec_bit = _PAGE_EXEC_4V;
+
+	prot_init_common(page_none, page_shared, page_copy, page_readonly,
+			 page_exec_bit);
+}
+
+unsigned long pte_sz_bits(unsigned long sz)
+{
+	if (tlb_type == hypervisor) {
+		switch (sz) {
+		case 8 * 1024:
+		default:
+			return _PAGE_SZ8K_4V;
+		case 64 * 1024:
+			return _PAGE_SZ64K_4V;
+		case 512 * 1024:
+			return _PAGE_SZ512K_4V;
+		case 4 * 1024 * 1024:
+			return _PAGE_SZ4MB_4V;
+		}
+	} else {
+		switch (sz) {
+		case 8 * 1024:
+		default:
+			return _PAGE_SZ8K_4U;
+		case 64 * 1024:
+			return _PAGE_SZ64K_4U;
+		case 512 * 1024:
+			return _PAGE_SZ512K_4U;
+		case 4 * 1024 * 1024:
+			return _PAGE_SZ4MB_4U;
+		}
+	}
+}
+
+pte_t mk_pte_io(unsigned long page, pgprot_t prot, int space, unsigned long page_size)
+{
+	pte_t pte;
+
+	pte_val(pte)  = page | pgprot_val(pgprot_noncached(prot));
+	pte_val(pte) |= (((unsigned long)space) << 32);
+	pte_val(pte) |= pte_sz_bits(page_size);
+
+	return pte;
+}
+
+static unsigned long kern_large_tte(unsigned long paddr)
+{
+	unsigned long val;
+
+	val = (_PAGE_VALID | _PAGE_SZ4MB_4U |
+	       _PAGE_CP_4U | _PAGE_CV_4U | _PAGE_P_4U |
+	       _PAGE_EXEC_4U | _PAGE_L_4U | _PAGE_W_4U);
+	if (tlb_type == hypervisor)
+		val = (_PAGE_VALID | _PAGE_SZ4MB_4V |
+		       page_cache4v_flag | _PAGE_P_4V |
+		       _PAGE_EXEC_4V | _PAGE_W_4V);
+
+	return val | paddr;
+}
+
+/* If not locked, zap it. */
+void __flush_tlb_all(void)
+{
+	unsigned long pstate;
+	int i;
+
+	__asm__ __volatile__("flushw\n\t"
+			     "rdpr	%%pstate, %0\n\t"
+			     "wrpr	%0, %1, %%pstate"
+			     : "=r" (pstate)
+			     : "i" (PSTATE_IE));
+	if (tlb_type == hypervisor) {
+		sun4v_mmu_demap_all();
+	} else if (tlb_type == spitfire) {
+		for (i = 0; i < 64; i++) {
+			/* Spitfire Errata #32 workaround */
+			/* NOTE: Always runs on spitfire, so no
+			 *       cheetah+ page size encodings.
+			 */
+			__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+					     "flush	%%g6"
+					     : /* No outputs */
+					     : "r" (0),
+					     "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+
+			if (!(spitfire_get_dtlb_data(i) & _PAGE_L_4U)) {
+				__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+						     "membar #Sync"
+						     : /* no outputs */
+						     : "r" (TLB_TAG_ACCESS), "i" (ASI_DMMU));
+				spitfire_put_dtlb_data(i, 0x0UL);
+			}
+
+			/* Spitfire Errata #32 workaround */
+			/* NOTE: Always runs on spitfire, so no
+			 *       cheetah+ page size encodings.
+			 */
+			__asm__ __volatile__("stxa	%0, [%1] %2\n\t"
+					     "flush	%%g6"
+					     : /* No outputs */
+					     : "r" (0),
+					     "r" (PRIMARY_CONTEXT), "i" (ASI_DMMU));
+
+			if (!(spitfire_get_itlb_data(i) & _PAGE_L_4U)) {
+				__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
+						     "membar #Sync"
+						     : /* no outputs */
+						     : "r" (TLB_TAG_ACCESS), "i" (ASI_IMMU));
+				spitfire_put_itlb_data(i, 0x0UL);
+			}
+		}
+	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+		cheetah_flush_dtlb_all();
+		cheetah_flush_itlb_all();
+	}
+	__asm__ __volatile__("wrpr	%0, 0, %%pstate"
+			     : : "r" (pstate));
+}
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+			    unsigned long address)
+{
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	pte_t *pte = NULL;
+
+	if (page)
+		pte = (pte_t *) page_address(page);
+
+	return pte;
+}
+
+pgtable_t pte_alloc_one(struct mm_struct *mm,
+			unsigned long address)
+{
+	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		return NULL;
+	if (!pgtable_page_ctor(page)) {
+		free_unref_page(page);
+		return NULL;
+	}
+	return (pte_t *) page_address(page);
+}
+
+void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+	free_page((unsigned long)pte);
+}
+
+static void __pte_free(pgtable_t pte)
+{
+	struct page *page = virt_to_page(pte);
+
+	pgtable_page_dtor(page);
+	__free_page(page);
+}
+
+void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+	__pte_free(pte);
+}
+
+void pgtable_free(void *table, bool is_page)
+{
+	if (is_page)
+		__pte_free(table);
+	else
+		kmem_cache_free(pgtable_cache, table);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+			  pmd_t *pmd)
+{
+	unsigned long pte, flags;
+	struct mm_struct *mm;
+	pmd_t entry = *pmd;
+
+	if (!pmd_large(entry) || !pmd_young(entry))
+		return;
+
+	pte = pmd_val(entry);
+
+	/* Don't insert a non-valid PMD into the TSB, we'll deadlock.  */
+	if (!(pte & _PAGE_VALID))
+		return;
+
+	/* We are fabricating 8MB pages using 4MB real hw pages.  */
+	pte |= (addr & (1UL << REAL_HPAGE_SHIFT));
+
+	mm = vma->vm_mm;
+
+	spin_lock_irqsave(&mm->context.lock, flags);
+
+	if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
+		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
+					addr, pte);
+
+	spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void context_reload(void *__data)
+{
+	struct mm_struct *mm = __data;
+
+	if (mm == current->mm)
+		load_secondary_context(mm);
+}
+
+void hugetlb_setup(struct pt_regs *regs)
+{
+	struct mm_struct *mm = current->mm;
+	struct tsb_config *tp;
+
+	if (faulthandler_disabled() || !mm) {
+		const struct exception_table_entry *entry;
+
+		entry = search_exception_tables(regs->tpc);
+		if (entry) {
+			regs->tpc = entry->fixup;
+			regs->tnpc = regs->tpc + 4;
+			return;
+		}
+		pr_alert("Unexpected HugeTLB setup in atomic context.\n");
+		die_if_kernel("HugeTSB in atomic", regs);
+	}
+
+	tp = &mm->context.tsb_block[MM_TSB_HUGE];
+	if (likely(tp->tsb == NULL))
+		tsb_grow(mm, MM_TSB_HUGE, 0);
+
+	tsb_context_switch(mm);
+	smp_tsb_sync(mm);
+
+	/* On UltraSPARC-III+ and later, configure the second half of
+	 * the Data-TLB for huge pages.
+	 */
+	if (tlb_type == cheetah_plus) {
+		bool need_context_reload = false;
+		unsigned long ctx;
+
+		spin_lock_irq(&ctx_alloc_lock);
+		ctx = mm->context.sparc64_ctx_val;
+		ctx &= ~CTX_PGSZ_MASK;
+		ctx |= CTX_PGSZ_BASE << CTX_PGSZ0_SHIFT;
+		ctx |= CTX_PGSZ_HUGE << CTX_PGSZ1_SHIFT;
+
+		if (ctx != mm->context.sparc64_ctx_val) {
+			/* When changing the page size fields, we
+			 * must perform a context flush so that no
+			 * stale entries match.  This flush must
+			 * occur with the original context register
+			 * settings.
+			 */
+			do_flush_tlb_mm(mm);
+
+			/* Reload the context register of all processors
+			 * also executing in this address space.
+			 */
+			mm->context.sparc64_ctx_val = ctx;
+			need_context_reload = true;
+		}
+		spin_unlock_irq(&ctx_alloc_lock);
+
+		if (need_context_reload)
+			on_each_cpu(context_reload, mm, 0);
+	}
+}
+#endif
+
+static struct resource code_resource = {
+	.name	= "Kernel code",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
+static struct resource data_resource = {
+	.name	= "Kernel data",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
+static struct resource bss_resource = {
+	.name	= "Kernel bss",
+	.flags	= IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
+static inline resource_size_t compute_kern_paddr(void *addr)
+{
+	return (resource_size_t) (addr - KERNBASE + kern_base);
+}
+
+static void __init kernel_lds_init(void)
+{
+	code_resource.start = compute_kern_paddr(_text);
+	code_resource.end   = compute_kern_paddr(_etext - 1);
+	data_resource.start = compute_kern_paddr(_etext);
+	data_resource.end   = compute_kern_paddr(_edata - 1);
+	bss_resource.start  = compute_kern_paddr(__bss_start);
+	bss_resource.end    = compute_kern_paddr(_end - 1);
+}
+
+static int __init report_memory(void)
+{
+	int i;
+	struct resource *res;
+
+	kernel_lds_init();
+
+	for (i = 0; i < pavail_ents; i++) {
+		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+
+		if (!res) {
+			pr_warn("Failed to allocate source.\n");
+			break;
+		}
+
+		res->name = "System RAM";
+		res->start = pavail[i].phys_addr;
+		res->end = pavail[i].phys_addr + pavail[i].reg_size - 1;
+		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+
+		if (insert_resource(&iomem_resource, res) < 0) {
+			pr_warn("Resource insertion failed.\n");
+			break;
+		}
+
+		insert_resource(res, &code_resource);
+		insert_resource(res, &data_resource);
+		insert_resource(res, &bss_resource);
+	}
+
+	return 0;
+}
+arch_initcall(report_memory);
+
+#ifdef CONFIG_SMP
+#define do_flush_tlb_kernel_range	smp_flush_tlb_kernel_range
+#else
+#define do_flush_tlb_kernel_range	__flush_tlb_kernel_range
+#endif
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	if (start < HI_OBP_ADDRESS && end > LOW_OBP_ADDRESS) {
+		if (start < LOW_OBP_ADDRESS) {
+			flush_tsb_kernel_range(start, LOW_OBP_ADDRESS);
+			do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
+		}
+		if (end > HI_OBP_ADDRESS) {
+			flush_tsb_kernel_range(HI_OBP_ADDRESS, end);
+			do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end);
+		}
+	} else {
+		flush_tsb_kernel_range(start, end);
+		do_flush_tlb_kernel_range(start, end);
+	}
+}
+
+void copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	char *vfrom, *vto;
+
+	vfrom = kmap_atomic(from);
+	vto = kmap_atomic(to);
+	copy_user_page(vto, vfrom, vaddr, to);
+	kunmap_atomic(vto);
+	kunmap_atomic(vfrom);
+
+	/* If this page has ADI enabled, copy over any ADI tags
+	 * as well
+	 */
+	if (vma->vm_flags & VM_SPARC_ADI) {
+		unsigned long pfrom, pto, i, adi_tag;
+
+		pfrom = page_to_phys(from);
+		pto = page_to_phys(to);
+
+		for (i = pfrom; i < (pfrom + PAGE_SIZE); i += adi_blksize()) {
+			asm volatile("ldxa [%1] %2, %0\n\t"
+					: "=r" (adi_tag)
+					:  "r" (i), "i" (ASI_MCD_REAL));
+			asm volatile("stxa %0, [%1] %2\n\t"
+					:
+					: "r" (adi_tag), "r" (pto),
+					  "i" (ASI_MCD_REAL));
+			pto += adi_blksize();
+		}
+		asm volatile("membar #Sync\n\t");
+	}
+}
+EXPORT_SYMBOL(copy_user_highpage);
+
+void copy_highpage(struct page *to, struct page *from)
+{
+	char *vfrom, *vto;
+
+	vfrom = kmap_atomic(from);
+	vto = kmap_atomic(to);
+	copy_page(vto, vfrom);
+	kunmap_atomic(vto);
+	kunmap_atomic(vfrom);
+
+	/* If this platform is ADI enabled, copy any ADI tags
+	 * as well
+	 */
+	if (adi_capable()) {
+		unsigned long pfrom, pto, i, adi_tag;
+
+		pfrom = page_to_phys(from);
+		pto = page_to_phys(to);
+
+		for (i = pfrom; i < (pfrom + PAGE_SIZE); i += adi_blksize()) {
+			asm volatile("ldxa [%1] %2, %0\n\t"
+					: "=r" (adi_tag)
+					:  "r" (i), "i" (ASI_MCD_REAL));
+			asm volatile("stxa %0, [%1] %2\n\t"
+					:
+					: "r" (adi_tag), "r" (pto),
+					  "i" (ASI_MCD_REAL));
+			pto += adi_blksize();
+		}
+		asm volatile("membar #Sync\n\t");
+	}
+}
+EXPORT_SYMBOL(copy_highpage);
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h
new file mode 100644
index 0000000..d920a75
--- /dev/null
+++ b/arch/sparc/mm/init_64.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SPARC64_MM_INIT_H
+#define _SPARC64_MM_INIT_H
+
+#include <asm/page.h>
+
+/* Most of the symbols in this file are defined in init.c and
+ * marked non-static so that assembler code can get at them.
+ */
+
+#define MAX_PHYS_ADDRESS	(1UL << MAX_PHYS_ADDRESS_BITS)
+
+extern unsigned long kern_linear_pte_xor[4];
+extern unsigned int sparc64_highest_unlocked_tlb_ent;
+extern unsigned long sparc64_kern_pri_context;
+extern unsigned long sparc64_kern_pri_nuc_bits;
+extern unsigned long sparc64_kern_sec_context;
+void mmu_info(struct seq_file *m);
+
+struct linux_prom_translation {
+	unsigned long virt;
+	unsigned long size;
+	unsigned long data;
+};
+
+/* Exported for kernel TLB miss handling in ktlb.S */
+extern struct linux_prom_translation prom_trans[512];
+extern unsigned int prom_trans_ents;
+
+/* Exported for SMP bootup purposes. */
+extern unsigned long kern_locked_tte_data;
+
+void prom_world(int enter);
+
+#endif /* _SPARC64_MM_INIT_H */
diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c
new file mode 100644
index 0000000..c8cb27d
--- /dev/null
+++ b/arch/sparc/mm/io-unit.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * io-unit.c:  IO-UNIT specific routines for memory management.
+ *
+ * Copyright (C) 1997,1998 Jakub Jelinek    (jj@sunsite.mff.cuni.cz)
+ */
+ 
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>	/* pte_offset_map => kmap_atomic */
+#include <linux/bitops.h>
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/io-unit.h>
+#include <asm/mxcc.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/dma.h>
+#include <asm/oplib.h>
+
+#include "mm_32.h"
+
+/* #define IOUNIT_DEBUG */
+#ifdef IOUNIT_DEBUG
+#define IOD(x) printk(x)
+#else
+#define IOD(x) do { } while (0)
+#endif
+
+#define IOPERM        (IOUPTE_CACHE | IOUPTE_WRITE | IOUPTE_VALID)
+#define MKIOPTE(phys) __iopte((((phys)>>4) & IOUPTE_PAGE) | IOPERM)
+
+static void __init iounit_iommu_init(struct platform_device *op)
+{
+	struct iounit_struct *iounit;
+	iopte_t __iomem *xpt;
+	iopte_t __iomem *xptend;
+
+	iounit = kzalloc(sizeof(struct iounit_struct), GFP_ATOMIC);
+	if (!iounit) {
+		prom_printf("SUN4D: Cannot alloc iounit, halting.\n");
+		prom_halt();
+	}
+
+	iounit->limit[0] = IOUNIT_BMAP1_START;
+	iounit->limit[1] = IOUNIT_BMAP2_START;
+	iounit->limit[2] = IOUNIT_BMAPM_START;
+	iounit->limit[3] = IOUNIT_BMAPM_END;
+	iounit->rotor[1] = IOUNIT_BMAP2_START;
+	iounit->rotor[2] = IOUNIT_BMAPM_START;
+
+	xpt = of_ioremap(&op->resource[2], 0, PAGE_SIZE * 16, "XPT");
+	if (!xpt) {
+		prom_printf("SUN4D: Cannot map External Page Table.");
+		prom_halt();
+	}
+	
+	op->dev.archdata.iommu = iounit;
+	iounit->page_table = xpt;
+	spin_lock_init(&iounit->lock);
+
+	xptend = iounit->page_table + (16 * PAGE_SIZE) / sizeof(iopte_t);
+	for (; xpt < xptend; xpt++)
+		sbus_writel(0, xpt);
+}
+
+static int __init iounit_init(void)
+{
+	extern void sun4d_init_sbi_irq(void);
+	struct device_node *dp;
+
+	for_each_node_by_name(dp, "sbi") {
+		struct platform_device *op = of_find_device_by_node(dp);
+
+		iounit_iommu_init(op);
+		of_propagate_archdata(op);
+	}
+
+	sun4d_init_sbi_irq();
+
+	return 0;
+}
+
+subsys_initcall(iounit_init);
+
+/* One has to hold iounit->lock to call this */
+static unsigned long iounit_get_area(struct iounit_struct *iounit, unsigned long vaddr, int size)
+{
+	int i, j, k, npages;
+	unsigned long rotor, scan, limit;
+	iopte_t iopte;
+
+        npages = ((vaddr & ~PAGE_MASK) + size + (PAGE_SIZE-1)) >> PAGE_SHIFT;
+
+	/* A tiny bit of magic ingredience :) */
+	switch (npages) {
+	case 1: i = 0x0231; break;
+	case 2: i = 0x0132; break;
+	default: i = 0x0213; break;
+	}
+	
+	IOD(("iounit_get_area(%08lx,%d[%d])=", vaddr, size, npages));
+	
+next:	j = (i & 15);
+	rotor = iounit->rotor[j - 1];
+	limit = iounit->limit[j];
+	scan = rotor;
+nexti:	scan = find_next_zero_bit(iounit->bmap, limit, scan);
+	if (scan + npages > limit) {
+		if (limit != rotor) {
+			limit = rotor;
+			scan = iounit->limit[j - 1];
+			goto nexti;
+		}
+		i >>= 4;
+		if (!(i & 15))
+			panic("iounit_get_area: Couldn't find free iopte slots for (%08lx,%d)\n", vaddr, size);
+		goto next;
+	}
+	for (k = 1, scan++; k < npages; k++)
+		if (test_bit(scan++, iounit->bmap))
+			goto nexti;
+	iounit->rotor[j - 1] = (scan < limit) ? scan : iounit->limit[j - 1];
+	scan -= npages;
+	iopte = MKIOPTE(__pa(vaddr & PAGE_MASK));
+	vaddr = IOUNIT_DMA_BASE + (scan << PAGE_SHIFT) + (vaddr & ~PAGE_MASK);
+	for (k = 0; k < npages; k++, iopte = __iopte(iopte_val(iopte) + 0x100), scan++) {
+		set_bit(scan, iounit->bmap);
+		sbus_writel(iopte_val(iopte), &iounit->page_table[scan]);
+	}
+	IOD(("%08lx\n", vaddr));
+	return vaddr;
+}
+
+static __u32 iounit_get_scsi_one(struct device *dev, char *vaddr, unsigned long len)
+{
+	struct iounit_struct *iounit = dev->archdata.iommu;
+	unsigned long ret, flags;
+	
+	spin_lock_irqsave(&iounit->lock, flags);
+	ret = iounit_get_area(iounit, (unsigned long)vaddr, len);
+	spin_unlock_irqrestore(&iounit->lock, flags);
+	return ret;
+}
+
+static void iounit_get_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz)
+{
+	struct iounit_struct *iounit = dev->archdata.iommu;
+	unsigned long flags;
+
+	/* FIXME: Cache some resolved pages - often several sg entries are to the same page */
+	spin_lock_irqsave(&iounit->lock, flags);
+	while (sz != 0) {
+		--sz;
+		sg->dma_address = iounit_get_area(iounit, (unsigned long) sg_virt(sg), sg->length);
+		sg->dma_length = sg->length;
+		sg = sg_next(sg);
+	}
+	spin_unlock_irqrestore(&iounit->lock, flags);
+}
+
+static void iounit_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len)
+{
+	struct iounit_struct *iounit = dev->archdata.iommu;
+	unsigned long flags;
+	
+	spin_lock_irqsave(&iounit->lock, flags);
+	len = ((vaddr & ~PAGE_MASK) + len + (PAGE_SIZE-1)) >> PAGE_SHIFT;
+	vaddr = (vaddr - IOUNIT_DMA_BASE) >> PAGE_SHIFT;
+	IOD(("iounit_release %08lx-%08lx\n", (long)vaddr, (long)len+vaddr));
+	for (len += vaddr; vaddr < len; vaddr++)
+		clear_bit(vaddr, iounit->bmap);
+	spin_unlock_irqrestore(&iounit->lock, flags);
+}
+
+static void iounit_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz)
+{
+	struct iounit_struct *iounit = dev->archdata.iommu;
+	unsigned long flags;
+	unsigned long vaddr, len;
+
+	spin_lock_irqsave(&iounit->lock, flags);
+	while (sz != 0) {
+		--sz;
+		len = ((sg->dma_address & ~PAGE_MASK) + sg->length + (PAGE_SIZE-1)) >> PAGE_SHIFT;
+		vaddr = (sg->dma_address - IOUNIT_DMA_BASE) >> PAGE_SHIFT;
+		IOD(("iounit_release %08lx-%08lx\n", (long)vaddr, (long)len+vaddr));
+		for (len += vaddr; vaddr < len; vaddr++)
+			clear_bit(vaddr, iounit->bmap);
+		sg = sg_next(sg);
+	}
+	spin_unlock_irqrestore(&iounit->lock, flags);
+}
+
+#ifdef CONFIG_SBUS
+static int iounit_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va, unsigned long addr, int len)
+{
+	struct iounit_struct *iounit = dev->archdata.iommu;
+	unsigned long page, end;
+	pgprot_t dvma_prot;
+	iopte_t __iomem *iopte;
+
+	*pba = addr;
+
+	dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV);
+	end = PAGE_ALIGN((addr + len));
+	while(addr < end) {
+		page = va;
+		{
+			pgd_t *pgdp;
+			pmd_t *pmdp;
+			pte_t *ptep;
+			long i;
+
+			pgdp = pgd_offset(&init_mm, addr);
+			pmdp = pmd_offset(pgdp, addr);
+			ptep = pte_offset_map(pmdp, addr);
+
+			set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
+			
+			i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT);
+
+			iopte = iounit->page_table + i;
+			sbus_writel(iopte_val(MKIOPTE(__pa(page))), iopte);
+		}
+		addr += PAGE_SIZE;
+		va += PAGE_SIZE;
+	}
+	flush_cache_all();
+	flush_tlb_all();
+
+	return 0;
+}
+
+static void iounit_unmap_dma_area(struct device *dev, unsigned long addr, int len)
+{
+	/* XXX Somebody please fill this in */
+}
+#endif
+
+static const struct sparc32_dma_ops iounit_dma_ops = {
+	.get_scsi_one		= iounit_get_scsi_one,
+	.get_scsi_sgl		= iounit_get_scsi_sgl,
+	.release_scsi_one	= iounit_release_scsi_one,
+	.release_scsi_sgl	= iounit_release_scsi_sgl,
+#ifdef CONFIG_SBUS
+	.map_dma_area		= iounit_map_dma_area,
+	.unmap_dma_area		= iounit_unmap_dma_area,
+#endif
+};
+
+void __init ld_mmu_iounit(void)
+{
+	sparc32_dma_ops = &iounit_dma_ops;
+}
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
new file mode 100644
index 0000000..2c5f8a6
--- /dev/null
+++ b/arch/sparc/mm/iommu.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * iommu.c:  IOMMU specific routines for memory management.
+ *
+ * Copyright (C) 1995 David S. Miller  (davem@caip.rutgers.edu)
+ * Copyright (C) 1995,2002 Pete Zaitcev     (zaitcev@yahoo.com)
+ * Copyright (C) 1996 Eddie C. Dost    (ecd@skynet.be)
+ * Copyright (C) 1997,1998 Jakub Jelinek    (jj@sunsite.mff.cuni.cz)
+ */
+ 
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>	/* pte_offset_map => kmap_atomic */
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/io.h>
+#include <asm/mxcc.h>
+#include <asm/mbus.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/bitext.h>
+#include <asm/iommu.h>
+#include <asm/dma.h>
+
+#include "mm_32.h"
+
+/*
+ * This can be sized dynamically, but we will do this
+ * only when we have a guidance about actual I/O pressures.
+ */
+#define IOMMU_RNGE	IOMMU_RNGE_256MB
+#define IOMMU_START	0xF0000000
+#define IOMMU_WINSIZE	(256*1024*1024U)
+#define IOMMU_NPTES	(IOMMU_WINSIZE/PAGE_SIZE)	/* 64K PTEs, 256KB */
+#define IOMMU_ORDER	6				/* 4096 * (1<<6) */
+
+static int viking_flush;
+/* viking.S */
+extern void viking_flush_page(unsigned long page);
+extern void viking_mxcc_flush_page(unsigned long page);
+
+/*
+ * Values precomputed according to CPU type.
+ */
+static unsigned int ioperm_noc;		/* Consistent mapping iopte flags */
+static pgprot_t dvma_prot;		/* Consistent mapping pte flags */
+
+#define IOPERM        (IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID)
+#define MKIOPTE(pfn, perm) (((((pfn)<<8) & IOPTE_PAGE) | (perm)) & ~IOPTE_WAZ)
+
+static void __init sbus_iommu_init(struct platform_device *op)
+{
+	struct iommu_struct *iommu;
+	unsigned int impl, vers;
+	unsigned long *bitmap;
+	unsigned long control;
+	unsigned long base;
+	unsigned long tmp;
+
+	iommu = kmalloc(sizeof(struct iommu_struct), GFP_KERNEL);
+	if (!iommu) {
+		prom_printf("Unable to allocate iommu structure\n");
+		prom_halt();
+	}
+
+	iommu->regs = of_ioremap(&op->resource[0], 0, PAGE_SIZE * 3,
+				 "iommu_regs");
+	if (!iommu->regs) {
+		prom_printf("Cannot map IOMMU registers\n");
+		prom_halt();
+	}
+
+	control = sbus_readl(&iommu->regs->control);
+	impl = (control & IOMMU_CTRL_IMPL) >> 28;
+	vers = (control & IOMMU_CTRL_VERS) >> 24;
+	control &= ~(IOMMU_CTRL_RNGE);
+	control |= (IOMMU_RNGE_256MB | IOMMU_CTRL_ENAB);
+	sbus_writel(control, &iommu->regs->control);
+
+	iommu_invalidate(iommu->regs);
+	iommu->start = IOMMU_START;
+	iommu->end = 0xffffffff;
+
+	/* Allocate IOMMU page table */
+	/* Stupid alignment constraints give me a headache. 
+	   We need 256K or 512K or 1M or 2M area aligned to
+           its size and current gfp will fortunately give
+           it to us. */
+        tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER);
+	if (!tmp) {
+		prom_printf("Unable to allocate iommu table [0x%lx]\n",
+			    IOMMU_NPTES * sizeof(iopte_t));
+		prom_halt();
+	}
+	iommu->page_table = (iopte_t *)tmp;
+
+	/* Initialize new table. */
+	memset(iommu->page_table, 0, IOMMU_NPTES*sizeof(iopte_t));
+	flush_cache_all();
+	flush_tlb_all();
+
+	base = __pa((unsigned long)iommu->page_table) >> 4;
+	sbus_writel(base, &iommu->regs->base);
+	iommu_invalidate(iommu->regs);
+
+	bitmap = kmalloc(IOMMU_NPTES>>3, GFP_KERNEL);
+	if (!bitmap) {
+		prom_printf("Unable to allocate iommu bitmap [%d]\n",
+			    (int)(IOMMU_NPTES>>3));
+		prom_halt();
+	}
+	bit_map_init(&iommu->usemap, bitmap, IOMMU_NPTES);
+	/* To be coherent on HyperSparc, the page color of DVMA
+	 * and physical addresses must match.
+	 */
+	if (srmmu_modtype == HyperSparc)
+		iommu->usemap.num_colors = vac_cache_size >> PAGE_SHIFT;
+	else
+		iommu->usemap.num_colors = 1;
+
+	printk(KERN_INFO "IOMMU: impl %d vers %d table 0x%p[%d B] map [%d b]\n",
+	       impl, vers, iommu->page_table,
+	       (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES);
+
+	op->dev.archdata.iommu = iommu;
+}
+
+static int __init iommu_init(void)
+{
+	struct device_node *dp;
+
+	for_each_node_by_name(dp, "iommu") {
+		struct platform_device *op = of_find_device_by_node(dp);
+
+		sbus_iommu_init(op);
+		of_propagate_archdata(op);
+	}
+
+	return 0;
+}
+
+subsys_initcall(iommu_init);
+
+/* Flush the iotlb entries to ram. */
+/* This could be better if we didn't have to flush whole pages. */
+static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte)
+{
+	unsigned long start;
+	unsigned long end;
+
+	start = (unsigned long)iopte;
+	end = PAGE_ALIGN(start + niopte*sizeof(iopte_t));
+	start &= PAGE_MASK;
+	if (viking_mxcc_present) {
+		while(start < end) {
+			viking_mxcc_flush_page(start);
+			start += PAGE_SIZE;
+		}
+	} else if (viking_flush) {
+		while(start < end) {
+			viking_flush_page(start);
+			start += PAGE_SIZE;
+		}
+	} else {
+		while(start < end) {
+			__flush_page_to_ram(start);
+			start += PAGE_SIZE;
+		}
+	}
+}
+
+static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
+{
+	struct iommu_struct *iommu = dev->archdata.iommu;
+	int ioptex;
+	iopte_t *iopte, *iopte0;
+	unsigned int busa, busa0;
+	int i;
+
+	/* page color = pfn of page */
+	ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page));
+	if (ioptex < 0)
+		panic("iommu out");
+	busa0 = iommu->start + (ioptex << PAGE_SHIFT);
+	iopte0 = &iommu->page_table[ioptex];
+
+	busa = busa0;
+	iopte = iopte0;
+	for (i = 0; i < npages; i++) {
+		iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM);
+		iommu_invalidate_page(iommu->regs, busa);
+		busa += PAGE_SIZE;
+		iopte++;
+		page++;
+	}
+
+	iommu_flush_iotlb(iopte0, npages);
+
+	return busa0;
+}
+
+static u32 iommu_get_scsi_one(struct device *dev, char *vaddr, unsigned int len)
+{
+	unsigned long off;
+	int npages;
+	struct page *page;
+	u32 busa;
+
+	off = (unsigned long)vaddr & ~PAGE_MASK;
+	npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
+	page = virt_to_page((unsigned long)vaddr & PAGE_MASK);
+	busa = iommu_get_one(dev, page, npages);
+	return busa + off;
+}
+
+static __u32 iommu_get_scsi_one_gflush(struct device *dev, char *vaddr, unsigned long len)
+{
+	flush_page_for_dma(0);
+	return iommu_get_scsi_one(dev, vaddr, len);
+}
+
+static __u32 iommu_get_scsi_one_pflush(struct device *dev, char *vaddr, unsigned long len)
+{
+	unsigned long page = ((unsigned long) vaddr) & PAGE_MASK;
+
+	while(page < ((unsigned long)(vaddr + len))) {
+		flush_page_for_dma(page);
+		page += PAGE_SIZE;
+	}
+	return iommu_get_scsi_one(dev, vaddr, len);
+}
+
+static void iommu_get_scsi_sgl_gflush(struct device *dev, struct scatterlist *sg, int sz)
+{
+	int n;
+
+	flush_page_for_dma(0);
+	while (sz != 0) {
+		--sz;
+		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
+		sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
+		sg->dma_length = sg->length;
+		sg = sg_next(sg);
+	}
+}
+
+static void iommu_get_scsi_sgl_pflush(struct device *dev, struct scatterlist *sg, int sz)
+{
+	unsigned long page, oldpage = 0;
+	int n, i;
+
+	while(sz != 0) {
+		--sz;
+
+		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+		/*
+		 * We expect unmapped highmem pages to be not in the cache.
+		 * XXX Is this a good assumption?
+		 * XXX What if someone else unmaps it here and races us?
+		 */
+		if ((page = (unsigned long) page_address(sg_page(sg))) != 0) {
+			for (i = 0; i < n; i++) {
+				if (page != oldpage) {	/* Already flushed? */
+					flush_page_for_dma(page);
+					oldpage = page;
+				}
+				page += PAGE_SIZE;
+			}
+		}
+
+		sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
+		sg->dma_length = sg->length;
+		sg = sg_next(sg);
+	}
+}
+
+static void iommu_release_one(struct device *dev, u32 busa, int npages)
+{
+	struct iommu_struct *iommu = dev->archdata.iommu;
+	int ioptex;
+	int i;
+
+	BUG_ON(busa < iommu->start);
+	ioptex = (busa - iommu->start) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		iopte_val(iommu->page_table[ioptex + i]) = 0;
+		iommu_invalidate_page(iommu->regs, busa);
+		busa += PAGE_SIZE;
+	}
+	bit_map_clear(&iommu->usemap, ioptex, npages);
+}
+
+static void iommu_release_scsi_one(struct device *dev, __u32 vaddr, unsigned long len)
+{
+	unsigned long off;
+	int npages;
+
+	off = vaddr & ~PAGE_MASK;
+	npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
+	iommu_release_one(dev, vaddr & PAGE_MASK, npages);
+}
+
+static void iommu_release_scsi_sgl(struct device *dev, struct scatterlist *sg, int sz)
+{
+	int n;
+
+	while(sz != 0) {
+		--sz;
+
+		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
+		iommu_release_one(dev, sg->dma_address & PAGE_MASK, n);
+		sg->dma_address = 0x21212121;
+		sg = sg_next(sg);
+	}
+}
+
+#ifdef CONFIG_SBUS
+static int iommu_map_dma_area(struct device *dev, dma_addr_t *pba, unsigned long va,
+			      unsigned long addr, int len)
+{
+	struct iommu_struct *iommu = dev->archdata.iommu;
+	unsigned long page, end;
+	iopte_t *iopte = iommu->page_table;
+	iopte_t *first;
+	int ioptex;
+
+	BUG_ON((va & ~PAGE_MASK) != 0);
+	BUG_ON((addr & ~PAGE_MASK) != 0);
+	BUG_ON((len & ~PAGE_MASK) != 0);
+
+	/* page color = physical address */
+	ioptex = bit_map_string_get(&iommu->usemap, len >> PAGE_SHIFT,
+		addr >> PAGE_SHIFT);
+	if (ioptex < 0)
+		panic("iommu out");
+
+	iopte += ioptex;
+	first = iopte;
+	end = addr + len;
+	while(addr < end) {
+		page = va;
+		{
+			pgd_t *pgdp;
+			pmd_t *pmdp;
+			pte_t *ptep;
+
+			if (viking_mxcc_present)
+				viking_mxcc_flush_page(page);
+			else if (viking_flush)
+				viking_flush_page(page);
+			else
+				__flush_page_to_ram(page);
+
+			pgdp = pgd_offset(&init_mm, addr);
+			pmdp = pmd_offset(pgdp, addr);
+			ptep = pte_offset_map(pmdp, addr);
+
+			set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
+		}
+		iopte_val(*iopte++) =
+		    MKIOPTE(page_to_pfn(virt_to_page(page)), ioperm_noc);
+		addr += PAGE_SIZE;
+		va += PAGE_SIZE;
+	}
+	/* P3: why do we need this?
+	 *
+	 * DAVEM: Because there are several aspects, none of which
+	 *        are handled by a single interface.  Some cpus are
+	 *        completely not I/O DMA coherent, and some have
+	 *        virtually indexed caches.  The driver DMA flushing
+	 *        methods handle the former case, but here during
+	 *        IOMMU page table modifications, and usage of non-cacheable
+	 *        cpu mappings of pages potentially in the cpu caches, we have
+	 *        to handle the latter case as well.
+	 */
+	flush_cache_all();
+	iommu_flush_iotlb(first, len >> PAGE_SHIFT);
+	flush_tlb_all();
+	iommu_invalidate(iommu->regs);
+
+	*pba = iommu->start + (ioptex << PAGE_SHIFT);
+	return 0;
+}
+
+static void iommu_unmap_dma_area(struct device *dev, unsigned long busa, int len)
+{
+	struct iommu_struct *iommu = dev->archdata.iommu;
+	iopte_t *iopte = iommu->page_table;
+	unsigned long end;
+	int ioptex = (busa - iommu->start) >> PAGE_SHIFT;
+
+	BUG_ON((busa & ~PAGE_MASK) != 0);
+	BUG_ON((len & ~PAGE_MASK) != 0);
+
+	iopte += ioptex;
+	end = busa + len;
+	while (busa < end) {
+		iopte_val(*iopte++) = 0;
+		busa += PAGE_SIZE;
+	}
+	flush_tlb_all();
+	iommu_invalidate(iommu->regs);
+	bit_map_clear(&iommu->usemap, ioptex, len >> PAGE_SHIFT);
+}
+#endif
+
+static const struct sparc32_dma_ops iommu_dma_gflush_ops = {
+	.get_scsi_one		= iommu_get_scsi_one_gflush,
+	.get_scsi_sgl		= iommu_get_scsi_sgl_gflush,
+	.release_scsi_one	= iommu_release_scsi_one,
+	.release_scsi_sgl	= iommu_release_scsi_sgl,
+#ifdef CONFIG_SBUS
+	.map_dma_area		= iommu_map_dma_area,
+	.unmap_dma_area		= iommu_unmap_dma_area,
+#endif
+};
+
+static const struct sparc32_dma_ops iommu_dma_pflush_ops = {
+	.get_scsi_one		= iommu_get_scsi_one_pflush,
+	.get_scsi_sgl		= iommu_get_scsi_sgl_pflush,
+	.release_scsi_one	= iommu_release_scsi_one,
+	.release_scsi_sgl	= iommu_release_scsi_sgl,
+#ifdef CONFIG_SBUS
+	.map_dma_area		= iommu_map_dma_area,
+	.unmap_dma_area		= iommu_unmap_dma_area,
+#endif
+};
+
+void __init ld_mmu_iommu(void)
+{
+	if (flush_page_for_dma_global) {
+		/* flush_page_for_dma flushes everything, no matter of what page is it */
+		sparc32_dma_ops = &iommu_dma_gflush_ops;
+	} else {
+		sparc32_dma_ops = &iommu_dma_pflush_ops;
+	}
+
+	if (viking_mxcc_present || srmmu_modtype == HyperSparc) {
+		dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV);
+		ioperm_noc = IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID;
+	} else {
+		dvma_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV);
+		ioperm_noc = IOPTE_WRITE | IOPTE_VALID;
+	}
+}
diff --git a/arch/sparc/mm/leon_mm.c b/arch/sparc/mm/leon_mm.c
new file mode 100644
index 0000000..ec61ff1
--- /dev/null
+++ b/arch/sparc/mm/leon_mm.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/sparc/mm/leon_m.c
+ *
+ * Copyright (C) 2004 Konrad Eisele (eiselekd@web.de, konrad@gaisler.com) Gaisler Research
+ * Copyright (C) 2009 Daniel Hellstrom (daniel@gaisler.com) Aeroflex Gaisler AB
+ * Copyright (C) 2009 Konrad Eisele (konrad@gaisler.com) Aeroflex Gaisler AB
+ *
+ * do srmmu probe in software
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/asi.h>
+#include <asm/leon.h>
+#include <asm/tlbflush.h>
+
+#include "mm_32.h"
+
+int leon_flush_during_switch = 1;
+static int srmmu_swprobe_trace;
+
+static inline unsigned long leon_get_ctable_ptr(void)
+{
+	unsigned int retval;
+
+	__asm__ __volatile__("lda [%1] %2, %0\n\t" :
+			     "=r" (retval) :
+			     "r" (SRMMU_CTXTBL_PTR),
+			     "i" (ASI_LEON_MMUREGS));
+	return (retval & SRMMU_CTX_PMASK) << 4;
+}
+
+
+unsigned long leon_swprobe(unsigned long vaddr, unsigned long *paddr)
+{
+
+	unsigned int ctxtbl;
+	unsigned int pgd, pmd, ped;
+	unsigned int ptr;
+	unsigned int lvl, pte, paddrbase;
+	unsigned int ctx;
+	unsigned int paddr_calc;
+
+	paddrbase = 0;
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe: trace on\n");
+
+	ctxtbl = leon_get_ctable_ptr();
+	if (!(ctxtbl)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: leon_get_ctable_ptr returned 0=>0\n");
+		return 0;
+	}
+	if (!_pfn_valid(PFN(ctxtbl))) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO
+			       "swprobe: !_pfn_valid(%x)=>0\n",
+			       PFN(ctxtbl));
+		return 0;
+	}
+
+	ctx = srmmu_get_context();
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- ctx (%x) ---\n", ctx);
+
+	pgd = LEON_BYPASS_LOAD_PA(ctxtbl + (ctx * 4));
+
+	if (((pgd & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pgd is entry level 3\n");
+		lvl = 3;
+		pte = pgd;
+		paddrbase = pgd & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (((pgd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pgd is invalid => 0\n");
+		return 0;
+	}
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- pgd (%x) ---\n", pgd);
+
+	ptr = (pgd & SRMMU_PTD_PMASK) << 4;
+	ptr += ((((vaddr) >> LEON_PGD_SH) & LEON_PGD_M) * 4);
+	if (!_pfn_valid(PFN(ptr)))
+		return 0;
+
+	pmd = LEON_BYPASS_LOAD_PA(ptr);
+	if (((pmd & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pmd is entry level 2\n");
+		lvl = 2;
+		pte = pmd;
+		paddrbase = pmd & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (((pmd & SRMMU_ET_MASK) != SRMMU_ET_PTD)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: pmd is invalid => 0\n");
+		return 0;
+	}
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- pmd (%x) ---\n", pmd);
+
+	ptr = (pmd & SRMMU_PTD_PMASK) << 4;
+	ptr += (((vaddr >> LEON_PMD_SH) & LEON_PMD_M) * 4);
+	if (!_pfn_valid(PFN(ptr))) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: !_pfn_valid(%x)=>0\n",
+			       PFN(ptr));
+		return 0;
+	}
+
+	ped = LEON_BYPASS_LOAD_PA(ptr);
+
+	if (((ped & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: ped is entry level 1\n");
+		lvl = 1;
+		pte = ped;
+		paddrbase = ped & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (((ped & SRMMU_ET_MASK) != SRMMU_ET_PTD)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: ped is invalid => 0\n");
+		return 0;
+	}
+
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe:  --- ped (%x) ---\n", ped);
+
+	ptr = (ped & SRMMU_PTD_PMASK) << 4;
+	ptr += (((vaddr >> LEON_PTE_SH) & LEON_PTE_M) * 4);
+	if (!_pfn_valid(PFN(ptr)))
+		return 0;
+
+	ptr = LEON_BYPASS_LOAD_PA(ptr);
+	if (((ptr & SRMMU_ET_MASK) == SRMMU_ET_PTE)) {
+		if (srmmu_swprobe_trace)
+			printk(KERN_INFO "swprobe: ptr is entry level 0\n");
+		lvl = 0;
+		pte = ptr;
+		paddrbase = ptr & _SRMMU_PTE_PMASK_LEON;
+		goto ready;
+	}
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe: ptr is invalid => 0\n");
+	return 0;
+
+ready:
+	switch (lvl) {
+	case 0:
+		paddr_calc =
+		    (vaddr & ~(-1 << LEON_PTE_SH)) | ((pte & ~0xff) << 4);
+		break;
+	case 1:
+		paddr_calc =
+		    (vaddr & ~(-1 << LEON_PMD_SH)) | ((pte & ~0xff) << 4);
+		break;
+	case 2:
+		paddr_calc =
+		    (vaddr & ~(-1 << LEON_PGD_SH)) | ((pte & ~0xff) << 4);
+		break;
+	default:
+	case 3:
+		paddr_calc = vaddr;
+		break;
+	}
+	if (srmmu_swprobe_trace)
+		printk(KERN_INFO "swprobe: padde %x\n", paddr_calc);
+	if (paddr)
+		*paddr = paddr_calc;
+	return pte;
+}
+
+void leon_flush_icache_all(void)
+{
+	__asm__ __volatile__(" flush ");	/*iflush*/
+}
+
+void leon_flush_dcache_all(void)
+{
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
+			     "i"(ASI_LEON_DFLUSH) : "memory");
+}
+
+void leon_flush_pcache_all(struct vm_area_struct *vma, unsigned long page)
+{
+	if (vma->vm_flags & VM_EXEC)
+		leon_flush_icache_all();
+	leon_flush_dcache_all();
+}
+
+void leon_flush_cache_all(void)
+{
+	__asm__ __volatile__(" flush ");	/*iflush*/
+	__asm__ __volatile__("sta %%g0, [%%g0] %0\n\t" : :
+			     "i"(ASI_LEON_DFLUSH) : "memory");
+}
+
+void leon_flush_tlb_all(void)
+{
+	leon_flush_cache_all();
+	__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : : "r"(0x400),
+			     "i"(ASI_LEON_MMUFLUSH) : "memory");
+}
+
+/* get all cache regs */
+void leon3_getCacheRegs(struct leon3_cacheregs *regs)
+{
+	unsigned long ccr, iccr, dccr;
+
+	if (!regs)
+		return;
+	/* Get Cache regs from "Cache ASI" address 0x0, 0x8 and 0xC */
+	__asm__ __volatile__("lda [%%g0] %3, %0\n\t"
+			     "mov 0x08, %%g1\n\t"
+			     "lda [%%g1] %3, %1\n\t"
+			     "mov 0x0c, %%g1\n\t"
+			     "lda [%%g1] %3, %2\n\t"
+			     : "=r"(ccr), "=r"(iccr), "=r"(dccr)
+			       /* output */
+			     : "i"(ASI_LEON_CACHEREGS)	/* input */
+			     : "g1"	/* clobber list */
+	    );
+	regs->ccr = ccr;
+	regs->iccr = iccr;
+	regs->dccr = dccr;
+}
+
+/* Due to virtual cache we need to check cache configuration if
+ * it is possible to skip flushing in some cases.
+ *
+ * Leon2 and Leon3 differ in their way of telling cache information
+ *
+ */
+int __init leon_flush_needed(void)
+{
+	int flush_needed = -1;
+	unsigned int ssize, sets;
+	char *setStr[4] =
+	    { "direct mapped", "2-way associative", "3-way associative",
+		"4-way associative"
+	};
+	/* leon 3 */
+	struct leon3_cacheregs cregs;
+	leon3_getCacheRegs(&cregs);
+	sets = (cregs.dccr & LEON3_XCCR_SETS_MASK) >> 24;
+	/* (ssize=>realsize) 0=>1k, 1=>2k, 2=>4k, 3=>8k ... */
+	ssize = 1 << ((cregs.dccr & LEON3_XCCR_SSIZE_MASK) >> 20);
+
+	printk(KERN_INFO "CACHE: %s cache, set size %dk\n",
+	       sets > 3 ? "unknown" : setStr[sets], ssize);
+	if ((ssize <= (PAGE_SIZE / 1024)) && (sets == 0)) {
+		/* Set Size <= Page size  ==>
+		   flush on every context switch not needed. */
+		flush_needed = 0;
+		printk(KERN_INFO "CACHE: not flushing on every context switch\n");
+	}
+	return flush_needed;
+}
+
+void leon_switch_mm(void)
+{
+	flush_tlb_mm((void *)0);
+	if (leon_flush_during_switch)
+		leon_flush_cache_all();
+}
+
+static void leon_flush_cache_mm(struct mm_struct *mm)
+{
+	leon_flush_cache_all();
+}
+
+static void leon_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
+{
+	leon_flush_pcache_all(vma, page);
+}
+
+static void leon_flush_cache_range(struct vm_area_struct *vma,
+				   unsigned long start,
+				   unsigned long end)
+{
+	leon_flush_cache_all();
+}
+
+static void leon_flush_tlb_mm(struct mm_struct *mm)
+{
+	leon_flush_tlb_all();
+}
+
+static void leon_flush_tlb_page(struct vm_area_struct *vma,
+				unsigned long page)
+{
+	leon_flush_tlb_all();
+}
+
+static void leon_flush_tlb_range(struct vm_area_struct *vma,
+				 unsigned long start,
+				 unsigned long end)
+{
+	leon_flush_tlb_all();
+}
+
+static void leon_flush_page_to_ram(unsigned long page)
+{
+	leon_flush_cache_all();
+}
+
+static void leon_flush_sig_insns(struct mm_struct *mm, unsigned long page)
+{
+	leon_flush_cache_all();
+}
+
+static void leon_flush_page_for_dma(unsigned long page)
+{
+	leon_flush_dcache_all();
+}
+
+void __init poke_leonsparc(void)
+{
+}
+
+static const struct sparc32_cachetlb_ops leon_ops = {
+	.cache_all	= leon_flush_cache_all,
+	.cache_mm	= leon_flush_cache_mm,
+	.cache_page	= leon_flush_cache_page,
+	.cache_range	= leon_flush_cache_range,
+	.tlb_all	= leon_flush_tlb_all,
+	.tlb_mm		= leon_flush_tlb_mm,
+	.tlb_page	= leon_flush_tlb_page,
+	.tlb_range	= leon_flush_tlb_range,
+	.page_to_ram	= leon_flush_page_to_ram,
+	.sig_insns	= leon_flush_sig_insns,
+	.page_for_dma	= leon_flush_page_for_dma,
+};
+
+void __init init_leon(void)
+{
+	srmmu_name = "LEON";
+	sparc32_cachetlb_ops = &leon_ops;
+	poke_srmmu = poke_leonsparc;
+
+	leon_flush_during_switch = leon_flush_needed();
+}
diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h
new file mode 100644
index 0000000..0d0b06e
--- /dev/null
+++ b/arch/sparc/mm/mm_32.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* fault_32.c - visible as they are called from assembler */
+asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
+                            unsigned long address);
+asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
+                               unsigned long address);
+
+void window_overflow_fault(void);
+void window_underflow_fault(unsigned long sp);
+void window_ret_fault(struct pt_regs *regs);
+
+/* srmmu.c */
+extern char *srmmu_name;
+extern int viking_mxcc_present;
+extern int flush_page_for_dma_global;
+
+extern void (*poke_srmmu)(void);
+
+void __init srmmu_paging_init(void);
+
+/* iommu.c */
+void ld_mmu_iommu(void);
+
+/* io-unit.c */
+void ld_mmu_iounit(void);
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
new file mode 100644
index 0000000..be9cb00
--- /dev/null
+++ b/arch/sparc/mm/srmmu.c
@@ -0,0 +1,1843 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * srmmu.c:  SRMMU specific routines for memory management.
+ *
+ * Copyright (C) 1995 David S. Miller  (davem@caip.rutgers.edu)
+ * Copyright (C) 1995,2002 Pete Zaitcev (zaitcev@yahoo.com)
+ * Copyright (C) 1996 Eddie C. Dost    (ecd@skynet.be)
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1999,2000 Anton Blanchard (anton@samba.org)
+ */
+
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/kdebug.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/log2.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/io-unit.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/bitext.h>
+#include <asm/vaddrs.h>
+#include <asm/cache.h>
+#include <asm/traps.h>
+#include <asm/oplib.h>
+#include <asm/mbus.h>
+#include <asm/page.h>
+#include <asm/asi.h>
+#include <asm/smp.h>
+#include <asm/io.h>
+
+/* Now the cpu specific definitions. */
+#include <asm/turbosparc.h>
+#include <asm/tsunami.h>
+#include <asm/viking.h>
+#include <asm/swift.h>
+#include <asm/leon.h>
+#include <asm/mxcc.h>
+#include <asm/ross.h>
+
+#include "mm_32.h"
+
+enum mbus_module srmmu_modtype;
+static unsigned int hwbug_bitmask;
+int vac_cache_size;
+EXPORT_SYMBOL(vac_cache_size);
+int vac_line_size;
+
+extern struct resource sparc_iomap;
+
+extern unsigned long last_valid_pfn;
+
+static pgd_t *srmmu_swapper_pg_dir;
+
+const struct sparc32_cachetlb_ops *sparc32_cachetlb_ops;
+EXPORT_SYMBOL(sparc32_cachetlb_ops);
+
+#ifdef CONFIG_SMP
+const struct sparc32_cachetlb_ops *local_ops;
+
+#define FLUSH_BEGIN(mm)
+#define FLUSH_END
+#else
+#define FLUSH_BEGIN(mm) if ((mm)->context != NO_CONTEXT) {
+#define FLUSH_END	}
+#endif
+
+int flush_page_for_dma_global = 1;
+
+char *srmmu_name;
+
+ctxd_t *srmmu_ctx_table_phys;
+static ctxd_t *srmmu_context_table;
+
+int viking_mxcc_present;
+static DEFINE_SPINLOCK(srmmu_context_spinlock);
+
+static int is_hypersparc;
+
+static int srmmu_cache_pagetables;
+
+/* these will be initialized in srmmu_nocache_calcsize() */
+static unsigned long srmmu_nocache_size;
+static unsigned long srmmu_nocache_end;
+
+/* 1 bit <=> 256 bytes of nocache <=> 64 PTEs */
+#define SRMMU_NOCACHE_BITMAP_SHIFT (PAGE_SHIFT - 4)
+
+/* The context table is a nocache user with the biggest alignment needs. */
+#define SRMMU_NOCACHE_ALIGN_MAX (sizeof(ctxd_t)*SRMMU_MAX_CONTEXTS)
+
+void *srmmu_nocache_pool;
+static struct bit_map srmmu_nocache_map;
+
+static inline int srmmu_pmd_none(pmd_t pmd)
+{ return !(pmd_val(pmd) & 0xFFFFFFF); }
+
+/* XXX should we hyper_flush_whole_icache here - Anton */
+static inline void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
+{
+	pte_t pte;
+
+	pte = __pte((SRMMU_ET_PTD | (__nocache_pa(pgdp) >> 4)));
+	set_pte((pte_t *)ctxp, pte);
+}
+
+/*
+ * Locations of MSI Registers.
+ */
+#define MSI_MBUS_ARBEN	0xe0001008	/* MBus Arbiter Enable register */
+
+/*
+ * Useful bits in the MSI Registers.
+ */
+#define MSI_ASYNC_MODE  0x80000000	/* Operate the MSI asynchronously */
+
+static void msi_set_sync(void)
+{
+	__asm__ __volatile__ ("lda [%0] %1, %%g3\n\t"
+			      "andn %%g3, %2, %%g3\n\t"
+			      "sta %%g3, [%0] %1\n\t" : :
+			      "r" (MSI_MBUS_ARBEN),
+			      "i" (ASI_M_CTL), "r" (MSI_ASYNC_MODE) : "g3");
+}
+
+void pmd_set(pmd_t *pmdp, pte_t *ptep)
+{
+	unsigned long ptp;	/* Physical address, shifted right by 4 */
+	int i;
+
+	ptp = __nocache_pa(ptep) >> 4;
+	for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
+		set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
+		ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
+	}
+}
+
+void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
+{
+	unsigned long ptp;	/* Physical address, shifted right by 4 */
+	int i;
+
+	ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4);	/* watch for overflow */
+	for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
+		set_pte((pte_t *)&pmdp->pmdv[i], __pte(SRMMU_ET_PTD | ptp));
+		ptp += (SRMMU_REAL_PTRS_PER_PTE * sizeof(pte_t) >> 4);
+	}
+}
+
+/* Find an entry in the third-level page table.. */
+pte_t *pte_offset_kernel(pmd_t *dir, unsigned long address)
+{
+	void *pte;
+
+	pte = __nocache_va((dir->pmdv[0] & SRMMU_PTD_PMASK) << 4);
+	return (pte_t *) pte +
+	    ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
+}
+
+/*
+ * size: bytes to allocate in the nocache area.
+ * align: bytes, number to align at.
+ * Returns the virtual address of the allocated area.
+ */
+static void *__srmmu_get_nocache(int size, int align)
+{
+	int offset;
+	unsigned long addr;
+
+	if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
+		printk(KERN_ERR "Size 0x%x too small for nocache request\n",
+		       size);
+		size = SRMMU_NOCACHE_BITMAP_SHIFT;
+	}
+	if (size & (SRMMU_NOCACHE_BITMAP_SHIFT - 1)) {
+		printk(KERN_ERR "Size 0x%x unaligned int nocache request\n",
+		       size);
+		size += SRMMU_NOCACHE_BITMAP_SHIFT - 1;
+	}
+	BUG_ON(align > SRMMU_NOCACHE_ALIGN_MAX);
+
+	offset = bit_map_string_get(&srmmu_nocache_map,
+				    size >> SRMMU_NOCACHE_BITMAP_SHIFT,
+				    align >> SRMMU_NOCACHE_BITMAP_SHIFT);
+	if (offset == -1) {
+		printk(KERN_ERR "srmmu: out of nocache %d: %d/%d\n",
+		       size, (int) srmmu_nocache_size,
+		       srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
+		return NULL;
+	}
+
+	addr = SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT);
+	return (void *)addr;
+}
+
+void *srmmu_get_nocache(int size, int align)
+{
+	void *tmp;
+
+	tmp = __srmmu_get_nocache(size, align);
+
+	if (tmp)
+		memset(tmp, 0, size);
+
+	return tmp;
+}
+
+void srmmu_free_nocache(void *addr, int size)
+{
+	unsigned long vaddr;
+	int offset;
+
+	vaddr = (unsigned long)addr;
+	if (vaddr < SRMMU_NOCACHE_VADDR) {
+		printk("Vaddr %lx is smaller than nocache base 0x%lx\n",
+		    vaddr, (unsigned long)SRMMU_NOCACHE_VADDR);
+		BUG();
+	}
+	if (vaddr + size > srmmu_nocache_end) {
+		printk("Vaddr %lx is bigger than nocache end 0x%lx\n",
+		    vaddr, srmmu_nocache_end);
+		BUG();
+	}
+	if (!is_power_of_2(size)) {
+		printk("Size 0x%x is not a power of 2\n", size);
+		BUG();
+	}
+	if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
+		printk("Size 0x%x is too small\n", size);
+		BUG();
+	}
+	if (vaddr & (size - 1)) {
+		printk("Vaddr %lx is not aligned to size 0x%x\n", vaddr, size);
+		BUG();
+	}
+
+	offset = (vaddr - SRMMU_NOCACHE_VADDR) >> SRMMU_NOCACHE_BITMAP_SHIFT;
+	size = size >> SRMMU_NOCACHE_BITMAP_SHIFT;
+
+	bit_map_clear(&srmmu_nocache_map, offset, size);
+}
+
+static void srmmu_early_allocate_ptable_skeleton(unsigned long start,
+						 unsigned long end);
+
+/* Return how much physical memory we have.  */
+static unsigned long __init probe_memory(void)
+{
+	unsigned long total = 0;
+	int i;
+
+	for (i = 0; sp_banks[i].num_bytes; i++)
+		total += sp_banks[i].num_bytes;
+
+	return total;
+}
+
+/*
+ * Reserve nocache dynamically proportionally to the amount of
+ * system RAM. -- Tomas Szepe <szepe@pinerecords.com>, June 2002
+ */
+static void __init srmmu_nocache_calcsize(void)
+{
+	unsigned long sysmemavail = probe_memory() / 1024;
+	int srmmu_nocache_npages;
+
+	srmmu_nocache_npages =
+		sysmemavail / SRMMU_NOCACHE_ALCRATIO / 1024 * 256;
+
+ /* P3 XXX The 4x overuse: corroborated by /proc/meminfo. */
+	// if (srmmu_nocache_npages < 256) srmmu_nocache_npages = 256;
+	if (srmmu_nocache_npages < SRMMU_MIN_NOCACHE_PAGES)
+		srmmu_nocache_npages = SRMMU_MIN_NOCACHE_PAGES;
+
+	/* anything above 1280 blows up */
+	if (srmmu_nocache_npages > SRMMU_MAX_NOCACHE_PAGES)
+		srmmu_nocache_npages = SRMMU_MAX_NOCACHE_PAGES;
+
+	srmmu_nocache_size = srmmu_nocache_npages * PAGE_SIZE;
+	srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size;
+}
+
+static void __init srmmu_nocache_init(void)
+{
+	void *srmmu_nocache_bitmap;
+	unsigned int bitmap_bits;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long paddr, vaddr;
+	unsigned long pteval;
+
+	bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT;
+
+	srmmu_nocache_pool = __alloc_bootmem(srmmu_nocache_size,
+		SRMMU_NOCACHE_ALIGN_MAX, 0UL);
+	memset(srmmu_nocache_pool, 0, srmmu_nocache_size);
+
+	srmmu_nocache_bitmap =
+		__alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long),
+				SMP_CACHE_BYTES, 0UL);
+	bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits);
+
+	srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
+	memset(__nocache_fix(srmmu_swapper_pg_dir), 0, SRMMU_PGD_TABLE_SIZE);
+	init_mm.pgd = srmmu_swapper_pg_dir;
+
+	srmmu_early_allocate_ptable_skeleton(SRMMU_NOCACHE_VADDR, srmmu_nocache_end);
+
+	paddr = __pa((unsigned long)srmmu_nocache_pool);
+	vaddr = SRMMU_NOCACHE_VADDR;
+
+	while (vaddr < srmmu_nocache_end) {
+		pgd = pgd_offset_k(vaddr);
+		pmd = pmd_offset(__nocache_fix(pgd), vaddr);
+		pte = pte_offset_kernel(__nocache_fix(pmd), vaddr);
+
+		pteval = ((paddr >> 4) | SRMMU_ET_PTE | SRMMU_PRIV);
+
+		if (srmmu_cache_pagetables)
+			pteval |= SRMMU_CACHE;
+
+		set_pte(__nocache_fix(pte), __pte(pteval));
+
+		vaddr += PAGE_SIZE;
+		paddr += PAGE_SIZE;
+	}
+
+	flush_cache_all();
+	flush_tlb_all();
+}
+
+pgd_t *get_pgd_fast(void)
+{
+	pgd_t *pgd = NULL;
+
+	pgd = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
+	if (pgd) {
+		pgd_t *init = pgd_offset_k(0);
+		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+		memcpy(pgd + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
+						(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+	}
+
+	return pgd;
+}
+
+/*
+ * Hardware needs alignment to 256 only, but we align to whole page size
+ * to reduce fragmentation problems due to the buddy principle.
+ * XXX Provide actual fragmentation statistics in /proc.
+ *
+ * Alignments up to the page size are the same for physical and virtual
+ * addresses of the nocache area.
+ */
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	unsigned long pte;
+	struct page *page;
+
+	if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
+		return NULL;
+	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
+	if (!pgtable_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	return page;
+}
+
+void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+	unsigned long p;
+
+	pgtable_page_dtor(pte);
+	p = (unsigned long)page_address(pte);	/* Cached address (for test) */
+	if (p == 0)
+		BUG();
+	p = page_to_pfn(pte) << PAGE_SHIFT;	/* Physical address */
+
+	/* free non cached virtual address*/
+	srmmu_free_nocache(__nocache_va(p), PTE_SIZE);
+}
+
+/* context handling - a dynamically sized pool is used */
+#define NO_CONTEXT	-1
+
+struct ctx_list {
+	struct ctx_list *next;
+	struct ctx_list *prev;
+	unsigned int ctx_number;
+	struct mm_struct *ctx_mm;
+};
+
+static struct ctx_list *ctx_list_pool;
+static struct ctx_list ctx_free;
+static struct ctx_list ctx_used;
+
+/* At boot time we determine the number of contexts */
+static int num_contexts;
+
+static inline void remove_from_ctx_list(struct ctx_list *entry)
+{
+	entry->next->prev = entry->prev;
+	entry->prev->next = entry->next;
+}
+
+static inline void add_to_ctx_list(struct ctx_list *head, struct ctx_list *entry)
+{
+	entry->next = head;
+	(entry->prev = head->prev)->next = entry;
+	head->prev = entry;
+}
+#define add_to_free_ctxlist(entry) add_to_ctx_list(&ctx_free, entry)
+#define add_to_used_ctxlist(entry) add_to_ctx_list(&ctx_used, entry)
+
+
+static inline void alloc_context(struct mm_struct *old_mm, struct mm_struct *mm)
+{
+	struct ctx_list *ctxp;
+
+	ctxp = ctx_free.next;
+	if (ctxp != &ctx_free) {
+		remove_from_ctx_list(ctxp);
+		add_to_used_ctxlist(ctxp);
+		mm->context = ctxp->ctx_number;
+		ctxp->ctx_mm = mm;
+		return;
+	}
+	ctxp = ctx_used.next;
+	if (ctxp->ctx_mm == old_mm)
+		ctxp = ctxp->next;
+	if (ctxp == &ctx_used)
+		panic("out of mmu contexts");
+	flush_cache_mm(ctxp->ctx_mm);
+	flush_tlb_mm(ctxp->ctx_mm);
+	remove_from_ctx_list(ctxp);
+	add_to_used_ctxlist(ctxp);
+	ctxp->ctx_mm->context = NO_CONTEXT;
+	ctxp->ctx_mm = mm;
+	mm->context = ctxp->ctx_number;
+}
+
+static inline void free_context(int context)
+{
+	struct ctx_list *ctx_old;
+
+	ctx_old = ctx_list_pool + context;
+	remove_from_ctx_list(ctx_old);
+	add_to_free_ctxlist(ctx_old);
+}
+
+static void __init sparc_context_init(int numctx)
+{
+	int ctx;
+	unsigned long size;
+
+	size = numctx * sizeof(struct ctx_list);
+	ctx_list_pool = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+
+	for (ctx = 0; ctx < numctx; ctx++) {
+		struct ctx_list *clist;
+
+		clist = (ctx_list_pool + ctx);
+		clist->ctx_number = ctx;
+		clist->ctx_mm = NULL;
+	}
+	ctx_free.next = ctx_free.prev = &ctx_free;
+	ctx_used.next = ctx_used.prev = &ctx_used;
+	for (ctx = 0; ctx < numctx; ctx++)
+		add_to_free_ctxlist(ctx_list_pool + ctx);
+}
+
+void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
+	       struct task_struct *tsk)
+{
+	unsigned long flags;
+
+	if (mm->context == NO_CONTEXT) {
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
+		alloc_context(old_mm, mm);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
+		srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
+	}
+
+	if (sparc_cpu_model == sparc_leon)
+		leon_switch_mm();
+
+	if (is_hypersparc)
+		hyper_flush_whole_icache();
+
+	srmmu_set_context(mm->context);
+}
+
+/* Low level IO area allocation on the SRMMU. */
+static inline void srmmu_mapioaddr(unsigned long physaddr,
+				   unsigned long virt_addr, int bus_type)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	unsigned long tmp;
+
+	physaddr &= PAGE_MASK;
+	pgdp = pgd_offset_k(virt_addr);
+	pmdp = pmd_offset(pgdp, virt_addr);
+	ptep = pte_offset_kernel(pmdp, virt_addr);
+	tmp = (physaddr >> 4) | SRMMU_ET_PTE;
+
+	/* I need to test whether this is consistent over all
+	 * sun4m's.  The bus_type represents the upper 4 bits of
+	 * 36-bit physical address on the I/O space lines...
+	 */
+	tmp |= (bus_type << 28);
+	tmp |= SRMMU_PRIV;
+	__flush_page_to_ram(virt_addr);
+	set_pte(ptep, __pte(tmp));
+}
+
+void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
+		      unsigned long xva, unsigned int len)
+{
+	while (len != 0) {
+		len -= PAGE_SIZE;
+		srmmu_mapioaddr(xpa, xva, bus);
+		xva += PAGE_SIZE;
+		xpa += PAGE_SIZE;
+	}
+	flush_tlb_all();
+}
+
+static inline void srmmu_unmapioaddr(unsigned long virt_addr)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	pgdp = pgd_offset_k(virt_addr);
+	pmdp = pmd_offset(pgdp, virt_addr);
+	ptep = pte_offset_kernel(pmdp, virt_addr);
+
+	/* No need to flush uncacheable page. */
+	__pte_clear(ptep);
+}
+
+void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len)
+{
+	while (len != 0) {
+		len -= PAGE_SIZE;
+		srmmu_unmapioaddr(virt_addr);
+		virt_addr += PAGE_SIZE;
+	}
+	flush_tlb_all();
+}
+
+/* tsunami.S */
+extern void tsunami_flush_cache_all(void);
+extern void tsunami_flush_cache_mm(struct mm_struct *mm);
+extern void tsunami_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
+extern void tsunami_flush_page_to_ram(unsigned long page);
+extern void tsunami_flush_page_for_dma(unsigned long page);
+extern void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
+extern void tsunami_flush_tlb_all(void);
+extern void tsunami_flush_tlb_mm(struct mm_struct *mm);
+extern void tsunami_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void tsunami_setup_blockops(void);
+
+/* swift.S */
+extern void swift_flush_cache_all(void);
+extern void swift_flush_cache_mm(struct mm_struct *mm);
+extern void swift_flush_cache_range(struct vm_area_struct *vma,
+				    unsigned long start, unsigned long end);
+extern void swift_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
+extern void swift_flush_page_to_ram(unsigned long page);
+extern void swift_flush_page_for_dma(unsigned long page);
+extern void swift_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
+extern void swift_flush_tlb_all(void);
+extern void swift_flush_tlb_mm(struct mm_struct *mm);
+extern void swift_flush_tlb_range(struct vm_area_struct *vma,
+				  unsigned long start, unsigned long end);
+extern void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+
+#if 0  /* P3: deadwood to debug precise flushes on Swift. */
+void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	int cctx, ctx1;
+
+	page &= PAGE_MASK;
+	if ((ctx1 = vma->vm_mm->context) != -1) {
+		cctx = srmmu_get_context();
+/* Is context # ever different from current context? P3 */
+		if (cctx != ctx1) {
+			printk("flush ctx %02x curr %02x\n", ctx1, cctx);
+			srmmu_set_context(ctx1);
+			swift_flush_page(page);
+			__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
+					"r" (page), "i" (ASI_M_FLUSH_PROBE));
+			srmmu_set_context(cctx);
+		} else {
+			 /* Rm. prot. bits from virt. c. */
+			/* swift_flush_cache_all(); */
+			/* swift_flush_cache_page(vma, page); */
+			swift_flush_page(page);
+
+			__asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
+				"r" (page), "i" (ASI_M_FLUSH_PROBE));
+			/* same as above: srmmu_flush_tlb_page() */
+		}
+	}
+}
+#endif
+
+/*
+ * The following are all MBUS based SRMMU modules, and therefore could
+ * be found in a multiprocessor configuration.  On the whole, these
+ * chips seems to be much more touchy about DVMA and page tables
+ * with respect to cache coherency.
+ */
+
+/* viking.S */
+extern void viking_flush_cache_all(void);
+extern void viking_flush_cache_mm(struct mm_struct *mm);
+extern void viking_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
+				     unsigned long end);
+extern void viking_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
+extern void viking_flush_page_to_ram(unsigned long page);
+extern void viking_flush_page_for_dma(unsigned long page);
+extern void viking_flush_sig_insns(struct mm_struct *mm, unsigned long addr);
+extern void viking_flush_page(unsigned long page);
+extern void viking_mxcc_flush_page(unsigned long page);
+extern void viking_flush_tlb_all(void);
+extern void viking_flush_tlb_mm(struct mm_struct *mm);
+extern void viking_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+				   unsigned long end);
+extern void viking_flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long page);
+extern void sun4dsmp_flush_tlb_all(void);
+extern void sun4dsmp_flush_tlb_mm(struct mm_struct *mm);
+extern void sun4dsmp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+				   unsigned long end);
+extern void sun4dsmp_flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long page);
+
+/* hypersparc.S */
+extern void hypersparc_flush_cache_all(void);
+extern void hypersparc_flush_cache_mm(struct mm_struct *mm);
+extern void hypersparc_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void hypersparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
+extern void hypersparc_flush_page_to_ram(unsigned long page);
+extern void hypersparc_flush_page_for_dma(unsigned long page);
+extern void hypersparc_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
+extern void hypersparc_flush_tlb_all(void);
+extern void hypersparc_flush_tlb_mm(struct mm_struct *mm);
+extern void hypersparc_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
+extern void hypersparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
+extern void hypersparc_setup_blockops(void);
+
+/*
+ * NOTE: All of this startup code assumes the low 16mb (approx.) of
+ *       kernel mappings are done with one single contiguous chunk of
+ *       ram.  On small ram machines (classics mainly) we only get
+ *       around 8mb mapped for us.
+ */
+
+static void __init early_pgtable_allocfail(char *type)
+{
+	prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type);
+	prom_halt();
+}
+
+static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
+							unsigned long end)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	while (start < end) {
+		pgdp = pgd_offset_k(start);
+		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+			pmdp = __srmmu_get_nocache(
+			    SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
+			if (pmdp == NULL)
+				early_pgtable_allocfail("pmd");
+			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
+			pgd_set(__nocache_fix(pgdp), pmdp);
+		}
+		pmdp = pmd_offset(__nocache_fix(pgdp), start);
+		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
+			ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
+			if (ptep == NULL)
+				early_pgtable_allocfail("pte");
+			memset(__nocache_fix(ptep), 0, PTE_SIZE);
+			pmd_set(__nocache_fix(pmdp), ptep);
+		}
+		if (start > (0xffffffffUL - PMD_SIZE))
+			break;
+		start = (start + PMD_SIZE) & PMD_MASK;
+	}
+}
+
+static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
+						  unsigned long end)
+{
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+
+	while (start < end) {
+		pgdp = pgd_offset_k(start);
+		if (pgd_none(*pgdp)) {
+			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
+			if (pmdp == NULL)
+				early_pgtable_allocfail("pmd");
+			memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE);
+			pgd_set(pgdp, pmdp);
+		}
+		pmdp = pmd_offset(pgdp, start);
+		if (srmmu_pmd_none(*pmdp)) {
+			ptep = __srmmu_get_nocache(PTE_SIZE,
+							     PTE_SIZE);
+			if (ptep == NULL)
+				early_pgtable_allocfail("pte");
+			memset(ptep, 0, PTE_SIZE);
+			pmd_set(pmdp, ptep);
+		}
+		if (start > (0xffffffffUL - PMD_SIZE))
+			break;
+		start = (start + PMD_SIZE) & PMD_MASK;
+	}
+}
+
+/* These flush types are not available on all chips... */
+static inline unsigned long srmmu_probe(unsigned long vaddr)
+{
+	unsigned long retval;
+
+	if (sparc_cpu_model != sparc_leon) {
+
+		vaddr &= PAGE_MASK;
+		__asm__ __volatile__("lda [%1] %2, %0\n\t" :
+				     "=r" (retval) :
+				     "r" (vaddr | 0x400), "i" (ASI_M_FLUSH_PROBE));
+	} else {
+		retval = leon_swprobe(vaddr, NULL);
+	}
+	return retval;
+}
+
+/*
+ * This is much cleaner than poking around physical address space
+ * looking at the prom's page table directly which is what most
+ * other OS's do.  Yuck... this is much better.
+ */
+static void __init srmmu_inherit_prom_mappings(unsigned long start,
+					       unsigned long end)
+{
+	unsigned long probed;
+	unsigned long addr;
+	pgd_t *pgdp;
+	pmd_t *pmdp;
+	pte_t *ptep;
+	int what; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */
+
+	while (start <= end) {
+		if (start == 0)
+			break; /* probably wrap around */
+		if (start == 0xfef00000)
+			start = KADB_DEBUGGER_BEGVM;
+		probed = srmmu_probe(start);
+		if (!probed) {
+			/* continue probing until we find an entry */
+			start += PAGE_SIZE;
+			continue;
+		}
+
+		/* A red snapper, see what it really is. */
+		what = 0;
+		addr = start - PAGE_SIZE;
+
+		if (!(start & ~(SRMMU_REAL_PMD_MASK))) {
+			if (srmmu_probe(addr + SRMMU_REAL_PMD_SIZE) == probed)
+				what = 1;
+		}
+
+		if (!(start & ~(SRMMU_PGDIR_MASK))) {
+			if (srmmu_probe(addr + SRMMU_PGDIR_SIZE) == probed)
+				what = 2;
+		}
+
+		pgdp = pgd_offset_k(start);
+		if (what == 2) {
+			*(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
+			start += SRMMU_PGDIR_SIZE;
+			continue;
+		}
+		if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
+			pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
+						   SRMMU_PMD_TABLE_SIZE);
+			if (pmdp == NULL)
+				early_pgtable_allocfail("pmd");
+			memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
+			pgd_set(__nocache_fix(pgdp), pmdp);
+		}
+		pmdp = pmd_offset(__nocache_fix(pgdp), start);
+		if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
+			ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
+			if (ptep == NULL)
+				early_pgtable_allocfail("pte");
+			memset(__nocache_fix(ptep), 0, PTE_SIZE);
+			pmd_set(__nocache_fix(pmdp), ptep);
+		}
+		if (what == 1) {
+			/* We bend the rule where all 16 PTPs in a pmd_t point
+			 * inside the same PTE page, and we leak a perfectly
+			 * good hardware PTE piece. Alternatives seem worse.
+			 */
+			unsigned int x;	/* Index of HW PMD in soft cluster */
+			unsigned long *val;
+			x = (start >> PMD_SHIFT) & 15;
+			val = &pmdp->pmdv[x];
+			*(unsigned long *)__nocache_fix(val) = probed;
+			start += SRMMU_REAL_PMD_SIZE;
+			continue;
+		}
+		ptep = pte_offset_kernel(__nocache_fix(pmdp), start);
+		*(pte_t *)__nocache_fix(ptep) = __pte(probed);
+		start += PAGE_SIZE;
+	}
+}
+
+#define KERNEL_PTE(page_shifted) ((page_shifted)|SRMMU_CACHE|SRMMU_PRIV|SRMMU_VALID)
+
+/* Create a third-level SRMMU 16MB page mapping. */
+static void __init do_large_mapping(unsigned long vaddr, unsigned long phys_base)
+{
+	pgd_t *pgdp = pgd_offset_k(vaddr);
+	unsigned long big_pte;
+
+	big_pte = KERNEL_PTE(phys_base >> 4);
+	*(pgd_t *)__nocache_fix(pgdp) = __pgd(big_pte);
+}
+
+/* Map sp_bank entry SP_ENTRY, starting at virtual address VBASE. */
+static unsigned long __init map_spbank(unsigned long vbase, int sp_entry)
+{
+	unsigned long pstart = (sp_banks[sp_entry].base_addr & SRMMU_PGDIR_MASK);
+	unsigned long vstart = (vbase & SRMMU_PGDIR_MASK);
+	unsigned long vend = SRMMU_PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes);
+	/* Map "low" memory only */
+	const unsigned long min_vaddr = PAGE_OFFSET;
+	const unsigned long max_vaddr = PAGE_OFFSET + SRMMU_MAXMEM;
+
+	if (vstart < min_vaddr || vstart >= max_vaddr)
+		return vstart;
+
+	if (vend > max_vaddr || vend < min_vaddr)
+		vend = max_vaddr;
+
+	while (vstart < vend) {
+		do_large_mapping(vstart, pstart);
+		vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE;
+	}
+	return vstart;
+}
+
+static void __init map_kernel(void)
+{
+	int i;
+
+	if (phys_base > 0) {
+		do_large_mapping(PAGE_OFFSET, phys_base);
+	}
+
+	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
+		map_spbank((unsigned long)__va(sp_banks[i].base_addr), i);
+	}
+}
+
+void (*poke_srmmu)(void) = NULL;
+
+void __init srmmu_paging_init(void)
+{
+	int i;
+	phandle cpunode;
+	char node_str[128];
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long pages_avail;
+
+	init_mm.context = (unsigned long) NO_CONTEXT;
+	sparc_iomap.start = SUN4M_IOBASE_VADDR;	/* 16MB of IOSPACE on all sun4m's. */
+
+	if (sparc_cpu_model == sun4d)
+		num_contexts = 65536; /* We know it is Viking */
+	else {
+		/* Find the number of contexts on the srmmu. */
+		cpunode = prom_getchild(prom_root_node);
+		num_contexts = 0;
+		while (cpunode != 0) {
+			prom_getstring(cpunode, "device_type", node_str, sizeof(node_str));
+			if (!strcmp(node_str, "cpu")) {
+				num_contexts = prom_getintdefault(cpunode, "mmu-nctx", 0x8);
+				break;
+			}
+			cpunode = prom_getsibling(cpunode);
+		}
+	}
+
+	if (!num_contexts) {
+		prom_printf("Something wrong, can't find cpu node in paging_init.\n");
+		prom_halt();
+	}
+
+	pages_avail = 0;
+	last_valid_pfn = bootmem_init(&pages_avail);
+
+	srmmu_nocache_calcsize();
+	srmmu_nocache_init();
+	srmmu_inherit_prom_mappings(0xfe400000, (LINUX_OPPROM_ENDVM - PAGE_SIZE));
+	map_kernel();
+
+	/* ctx table has to be physically aligned to its size */
+	srmmu_context_table = __srmmu_get_nocache(num_contexts * sizeof(ctxd_t), num_contexts * sizeof(ctxd_t));
+	srmmu_ctx_table_phys = (ctxd_t *)__nocache_pa(srmmu_context_table);
+
+	for (i = 0; i < num_contexts; i++)
+		srmmu_ctxd_set((ctxd_t *)__nocache_fix(&srmmu_context_table[i]), srmmu_swapper_pg_dir);
+
+	flush_cache_all();
+	srmmu_set_ctable_ptr((unsigned long)srmmu_ctx_table_phys);
+#ifdef CONFIG_SMP
+	/* Stop from hanging here... */
+	local_ops->tlb_all();
+#else
+	flush_tlb_all();
+#endif
+	poke_srmmu();
+
+	srmmu_allocate_ptable_skeleton(sparc_iomap.start, IOBASE_END);
+	srmmu_allocate_ptable_skeleton(DVMA_VADDR, DVMA_END);
+
+	srmmu_allocate_ptable_skeleton(
+		__fix_to_virt(__end_of_fixed_addresses - 1), FIXADDR_TOP);
+	srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END);
+
+	pgd = pgd_offset_k(PKMAP_BASE);
+	pmd = pmd_offset(pgd, PKMAP_BASE);
+	pte = pte_offset_kernel(pmd, PKMAP_BASE);
+	pkmap_page_table = pte;
+
+	flush_cache_all();
+	flush_tlb_all();
+
+	sparc_context_init(num_contexts);
+
+	kmap_init();
+
+	{
+		unsigned long zones_size[MAX_NR_ZONES];
+		unsigned long zholes_size[MAX_NR_ZONES];
+		unsigned long npages;
+		int znum;
+
+		for (znum = 0; znum < MAX_NR_ZONES; znum++)
+			zones_size[znum] = zholes_size[znum] = 0;
+
+		npages = max_low_pfn - pfn_base;
+
+		zones_size[ZONE_DMA] = npages;
+		zholes_size[ZONE_DMA] = npages - pages_avail;
+
+		npages = highend_pfn - max_low_pfn;
+		zones_size[ZONE_HIGHMEM] = npages;
+		zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
+
+		free_area_init_node(0, zones_size, pfn_base, zholes_size);
+	}
+}
+
+void mmu_info(struct seq_file *m)
+{
+	seq_printf(m,
+		   "MMU type\t: %s\n"
+		   "contexts\t: %d\n"
+		   "nocache total\t: %ld\n"
+		   "nocache used\t: %d\n",
+		   srmmu_name,
+		   num_contexts,
+		   srmmu_nocache_size,
+		   srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
+}
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	mm->context = NO_CONTEXT;
+	return 0;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+	unsigned long flags;
+
+	if (mm->context != NO_CONTEXT) {
+		flush_cache_mm(mm);
+		srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir);
+		flush_tlb_mm(mm);
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
+		free_context(mm->context);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
+		mm->context = NO_CONTEXT;
+	}
+}
+
+/* Init various srmmu chip types. */
+static void __init srmmu_is_bad(void)
+{
+	prom_printf("Could not determine SRMMU chip type.\n");
+	prom_halt();
+}
+
+static void __init init_vac_layout(void)
+{
+	phandle nd;
+	int cache_lines;
+	char node_str[128];
+#ifdef CONFIG_SMP
+	int cpu = 0;
+	unsigned long max_size = 0;
+	unsigned long min_line_size = 0x10000000;
+#endif
+
+	nd = prom_getchild(prom_root_node);
+	while ((nd = prom_getsibling(nd)) != 0) {
+		prom_getstring(nd, "device_type", node_str, sizeof(node_str));
+		if (!strcmp(node_str, "cpu")) {
+			vac_line_size = prom_getint(nd, "cache-line-size");
+			if (vac_line_size == -1) {
+				prom_printf("can't determine cache-line-size, halting.\n");
+				prom_halt();
+			}
+			cache_lines = prom_getint(nd, "cache-nlines");
+			if (cache_lines == -1) {
+				prom_printf("can't determine cache-nlines, halting.\n");
+				prom_halt();
+			}
+
+			vac_cache_size = cache_lines * vac_line_size;
+#ifdef CONFIG_SMP
+			if (vac_cache_size > max_size)
+				max_size = vac_cache_size;
+			if (vac_line_size < min_line_size)
+				min_line_size = vac_line_size;
+			//FIXME: cpus not contiguous!!
+			cpu++;
+			if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+				break;
+#else
+			break;
+#endif
+		}
+	}
+	if (nd == 0) {
+		prom_printf("No CPU nodes found, halting.\n");
+		prom_halt();
+	}
+#ifdef CONFIG_SMP
+	vac_cache_size = max_size;
+	vac_line_size = min_line_size;
+#endif
+	printk("SRMMU: Using VAC size of %d bytes, line size %d bytes.\n",
+	       (int)vac_cache_size, (int)vac_line_size);
+}
+
+static void poke_hypersparc(void)
+{
+	volatile unsigned long clear;
+	unsigned long mreg = srmmu_get_mmureg();
+
+	hyper_flush_unconditional_combined();
+
+	mreg &= ~(HYPERSPARC_CWENABLE);
+	mreg |= (HYPERSPARC_CENABLE | HYPERSPARC_WBENABLE);
+	mreg |= (HYPERSPARC_CMODE);
+
+	srmmu_set_mmureg(mreg);
+
+#if 0 /* XXX I think this is bad news... -DaveM */
+	hyper_clear_all_tags();
+#endif
+
+	put_ross_icr(HYPERSPARC_ICCR_FTD | HYPERSPARC_ICCR_ICE);
+	hyper_flush_whole_icache();
+	clear = srmmu_get_faddr();
+	clear = srmmu_get_fstatus();
+}
+
+static const struct sparc32_cachetlb_ops hypersparc_ops = {
+	.cache_all	= hypersparc_flush_cache_all,
+	.cache_mm	= hypersparc_flush_cache_mm,
+	.cache_page	= hypersparc_flush_cache_page,
+	.cache_range	= hypersparc_flush_cache_range,
+	.tlb_all	= hypersparc_flush_tlb_all,
+	.tlb_mm		= hypersparc_flush_tlb_mm,
+	.tlb_page	= hypersparc_flush_tlb_page,
+	.tlb_range	= hypersparc_flush_tlb_range,
+	.page_to_ram	= hypersparc_flush_page_to_ram,
+	.sig_insns	= hypersparc_flush_sig_insns,
+	.page_for_dma	= hypersparc_flush_page_for_dma,
+};
+
+static void __init init_hypersparc(void)
+{
+	srmmu_name = "ROSS HyperSparc";
+	srmmu_modtype = HyperSparc;
+
+	init_vac_layout();
+
+	is_hypersparc = 1;
+	sparc32_cachetlb_ops = &hypersparc_ops;
+
+	poke_srmmu = poke_hypersparc;
+
+	hypersparc_setup_blockops();
+}
+
+static void poke_swift(void)
+{
+	unsigned long mreg;
+
+	/* Clear any crap from the cache or else... */
+	swift_flush_cache_all();
+
+	/* Enable I & D caches */
+	mreg = srmmu_get_mmureg();
+	mreg |= (SWIFT_IE | SWIFT_DE);
+	/*
+	 * The Swift branch folding logic is completely broken.  At
+	 * trap time, if things are just right, if can mistakenly
+	 * think that a trap is coming from kernel mode when in fact
+	 * it is coming from user mode (it mis-executes the branch in
+	 * the trap code).  So you see things like crashme completely
+	 * hosing your machine which is completely unacceptable.  Turn
+	 * this shit off... nice job Fujitsu.
+	 */
+	mreg &= ~(SWIFT_BF);
+	srmmu_set_mmureg(mreg);
+}
+
+static const struct sparc32_cachetlb_ops swift_ops = {
+	.cache_all	= swift_flush_cache_all,
+	.cache_mm	= swift_flush_cache_mm,
+	.cache_page	= swift_flush_cache_page,
+	.cache_range	= swift_flush_cache_range,
+	.tlb_all	= swift_flush_tlb_all,
+	.tlb_mm		= swift_flush_tlb_mm,
+	.tlb_page	= swift_flush_tlb_page,
+	.tlb_range	= swift_flush_tlb_range,
+	.page_to_ram	= swift_flush_page_to_ram,
+	.sig_insns	= swift_flush_sig_insns,
+	.page_for_dma	= swift_flush_page_for_dma,
+};
+
+#define SWIFT_MASKID_ADDR  0x10003018
+static void __init init_swift(void)
+{
+	unsigned long swift_rev;
+
+	__asm__ __volatile__("lda [%1] %2, %0\n\t"
+			     "srl %0, 0x18, %0\n\t" :
+			     "=r" (swift_rev) :
+			     "r" (SWIFT_MASKID_ADDR), "i" (ASI_M_BYPASS));
+	srmmu_name = "Fujitsu Swift";
+	switch (swift_rev) {
+	case 0x11:
+	case 0x20:
+	case 0x23:
+	case 0x30:
+		srmmu_modtype = Swift_lots_o_bugs;
+		hwbug_bitmask |= (HWBUG_KERN_ACCBROKEN | HWBUG_KERN_CBITBROKEN);
+		/*
+		 * Gee george, I wonder why Sun is so hush hush about
+		 * this hardware bug... really braindamage stuff going
+		 * on here.  However I think we can find a way to avoid
+		 * all of the workaround overhead under Linux.  Basically,
+		 * any page fault can cause kernel pages to become user
+		 * accessible (the mmu gets confused and clears some of
+		 * the ACC bits in kernel ptes).  Aha, sounds pretty
+		 * horrible eh?  But wait, after extensive testing it appears
+		 * that if you use pgd_t level large kernel pte's (like the
+		 * 4MB pages on the Pentium) the bug does not get tripped
+		 * at all.  This avoids almost all of the major overhead.
+		 * Welcome to a world where your vendor tells you to,
+		 * "apply this kernel patch" instead of "sorry for the
+		 * broken hardware, send it back and we'll give you
+		 * properly functioning parts"
+		 */
+		break;
+	case 0x25:
+	case 0x31:
+		srmmu_modtype = Swift_bad_c;
+		hwbug_bitmask |= HWBUG_KERN_CBITBROKEN;
+		/*
+		 * You see Sun allude to this hardware bug but never
+		 * admit things directly, they'll say things like,
+		 * "the Swift chip cache problems" or similar.
+		 */
+		break;
+	default:
+		srmmu_modtype = Swift_ok;
+		break;
+	}
+
+	sparc32_cachetlb_ops = &swift_ops;
+	flush_page_for_dma_global = 0;
+
+	/*
+	 * Are you now convinced that the Swift is one of the
+	 * biggest VLSI abortions of all time?  Bravo Fujitsu!
+	 * Fujitsu, the !#?!%$'d up processor people.  I bet if
+	 * you examined the microcode of the Swift you'd find
+	 * XXX's all over the place.
+	 */
+	poke_srmmu = poke_swift;
+}
+
+static void turbosparc_flush_cache_all(void)
+{
+	flush_user_windows();
+	turbosparc_idflash_clear();
+}
+
+static void turbosparc_flush_cache_mm(struct mm_struct *mm)
+{
+	FLUSH_BEGIN(mm)
+	flush_user_windows();
+	turbosparc_idflash_clear();
+	FLUSH_END
+}
+
+static void turbosparc_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	FLUSH_BEGIN(vma->vm_mm)
+	flush_user_windows();
+	turbosparc_idflash_clear();
+	FLUSH_END
+}
+
+static void turbosparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
+{
+	FLUSH_BEGIN(vma->vm_mm)
+	flush_user_windows();
+	if (vma->vm_flags & VM_EXEC)
+		turbosparc_flush_icache();
+	turbosparc_flush_dcache();
+	FLUSH_END
+}
+
+/* TurboSparc is copy-back, if we turn it on, but this does not work. */
+static void turbosparc_flush_page_to_ram(unsigned long page)
+{
+#ifdef TURBOSPARC_WRITEBACK
+	volatile unsigned long clear;
+
+	if (srmmu_probe(page))
+		turbosparc_flush_page_cache(page);
+	clear = srmmu_get_fstatus();
+#endif
+}
+
+static void turbosparc_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
+{
+}
+
+static void turbosparc_flush_page_for_dma(unsigned long page)
+{
+	turbosparc_flush_dcache();
+}
+
+static void turbosparc_flush_tlb_all(void)
+{
+	srmmu_flush_whole_tlb();
+}
+
+static void turbosparc_flush_tlb_mm(struct mm_struct *mm)
+{
+	FLUSH_BEGIN(mm)
+	srmmu_flush_whole_tlb();
+	FLUSH_END
+}
+
+static void turbosparc_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	FLUSH_BEGIN(vma->vm_mm)
+	srmmu_flush_whole_tlb();
+	FLUSH_END
+}
+
+static void turbosparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	FLUSH_BEGIN(vma->vm_mm)
+	srmmu_flush_whole_tlb();
+	FLUSH_END
+}
+
+
+static void poke_turbosparc(void)
+{
+	unsigned long mreg = srmmu_get_mmureg();
+	unsigned long ccreg;
+
+	/* Clear any crap from the cache or else... */
+	turbosparc_flush_cache_all();
+	/* Temporarily disable I & D caches */
+	mreg &= ~(TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE);
+	mreg &= ~(TURBOSPARC_PCENABLE);		/* Don't check parity */
+	srmmu_set_mmureg(mreg);
+
+	ccreg = turbosparc_get_ccreg();
+
+#ifdef TURBOSPARC_WRITEBACK
+	ccreg |= (TURBOSPARC_SNENABLE);		/* Do DVMA snooping in Dcache */
+	ccreg &= ~(TURBOSPARC_uS2 | TURBOSPARC_WTENABLE);
+			/* Write-back D-cache, emulate VLSI
+			 * abortion number three, not number one */
+#else
+	/* For now let's play safe, optimize later */
+	ccreg |= (TURBOSPARC_SNENABLE | TURBOSPARC_WTENABLE);
+			/* Do DVMA snooping in Dcache, Write-thru D-cache */
+	ccreg &= ~(TURBOSPARC_uS2);
+			/* Emulate VLSI abortion number three, not number one */
+#endif
+
+	switch (ccreg & 7) {
+	case 0: /* No SE cache */
+	case 7: /* Test mode */
+		break;
+	default:
+		ccreg |= (TURBOSPARC_SCENABLE);
+	}
+	turbosparc_set_ccreg(ccreg);
+
+	mreg |= (TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* I & D caches on */
+	mreg |= (TURBOSPARC_ICSNOOP);		/* Icache snooping on */
+	srmmu_set_mmureg(mreg);
+}
+
+static const struct sparc32_cachetlb_ops turbosparc_ops = {
+	.cache_all	= turbosparc_flush_cache_all,
+	.cache_mm	= turbosparc_flush_cache_mm,
+	.cache_page	= turbosparc_flush_cache_page,
+	.cache_range	= turbosparc_flush_cache_range,
+	.tlb_all	= turbosparc_flush_tlb_all,
+	.tlb_mm		= turbosparc_flush_tlb_mm,
+	.tlb_page	= turbosparc_flush_tlb_page,
+	.tlb_range	= turbosparc_flush_tlb_range,
+	.page_to_ram	= turbosparc_flush_page_to_ram,
+	.sig_insns	= turbosparc_flush_sig_insns,
+	.page_for_dma	= turbosparc_flush_page_for_dma,
+};
+
+static void __init init_turbosparc(void)
+{
+	srmmu_name = "Fujitsu TurboSparc";
+	srmmu_modtype = TurboSparc;
+	sparc32_cachetlb_ops = &turbosparc_ops;
+	poke_srmmu = poke_turbosparc;
+}
+
+static void poke_tsunami(void)
+{
+	unsigned long mreg = srmmu_get_mmureg();
+
+	tsunami_flush_icache();
+	tsunami_flush_dcache();
+	mreg &= ~TSUNAMI_ITD;
+	mreg |= (TSUNAMI_IENAB | TSUNAMI_DENAB);
+	srmmu_set_mmureg(mreg);
+}
+
+static const struct sparc32_cachetlb_ops tsunami_ops = {
+	.cache_all	= tsunami_flush_cache_all,
+	.cache_mm	= tsunami_flush_cache_mm,
+	.cache_page	= tsunami_flush_cache_page,
+	.cache_range	= tsunami_flush_cache_range,
+	.tlb_all	= tsunami_flush_tlb_all,
+	.tlb_mm		= tsunami_flush_tlb_mm,
+	.tlb_page	= tsunami_flush_tlb_page,
+	.tlb_range	= tsunami_flush_tlb_range,
+	.page_to_ram	= tsunami_flush_page_to_ram,
+	.sig_insns	= tsunami_flush_sig_insns,
+	.page_for_dma	= tsunami_flush_page_for_dma,
+};
+
+static void __init init_tsunami(void)
+{
+	/*
+	 * Tsunami's pretty sane, Sun and TI actually got it
+	 * somewhat right this time.  Fujitsu should have
+	 * taken some lessons from them.
+	 */
+
+	srmmu_name = "TI Tsunami";
+	srmmu_modtype = Tsunami;
+	sparc32_cachetlb_ops = &tsunami_ops;
+	poke_srmmu = poke_tsunami;
+
+	tsunami_setup_blockops();
+}
+
+static void poke_viking(void)
+{
+	unsigned long mreg = srmmu_get_mmureg();
+	static int smp_catch;
+
+	if (viking_mxcc_present) {
+		unsigned long mxcc_control = mxcc_get_creg();
+
+		mxcc_control |= (MXCC_CTL_ECE | MXCC_CTL_PRE | MXCC_CTL_MCE);
+		mxcc_control &= ~(MXCC_CTL_RRC);
+		mxcc_set_creg(mxcc_control);
+
+		/*
+		 * We don't need memory parity checks.
+		 * XXX This is a mess, have to dig out later. ecd.
+		viking_mxcc_turn_off_parity(&mreg, &mxcc_control);
+		 */
+
+		/* We do cache ptables on MXCC. */
+		mreg |= VIKING_TCENABLE;
+	} else {
+		unsigned long bpreg;
+
+		mreg &= ~(VIKING_TCENABLE);
+		if (smp_catch++) {
+			/* Must disable mixed-cmd mode here for other cpu's. */
+			bpreg = viking_get_bpreg();
+			bpreg &= ~(VIKING_ACTION_MIX);
+			viking_set_bpreg(bpreg);
+
+			/* Just in case PROM does something funny. */
+			msi_set_sync();
+		}
+	}
+
+	mreg |= VIKING_SPENABLE;
+	mreg |= (VIKING_ICENABLE | VIKING_DCENABLE);
+	mreg |= VIKING_SBENABLE;
+	mreg &= ~(VIKING_ACENABLE);
+	srmmu_set_mmureg(mreg);
+}
+
+static struct sparc32_cachetlb_ops viking_ops __ro_after_init = {
+	.cache_all	= viking_flush_cache_all,
+	.cache_mm	= viking_flush_cache_mm,
+	.cache_page	= viking_flush_cache_page,
+	.cache_range	= viking_flush_cache_range,
+	.tlb_all	= viking_flush_tlb_all,
+	.tlb_mm		= viking_flush_tlb_mm,
+	.tlb_page	= viking_flush_tlb_page,
+	.tlb_range	= viking_flush_tlb_range,
+	.page_to_ram	= viking_flush_page_to_ram,
+	.sig_insns	= viking_flush_sig_insns,
+	.page_for_dma	= viking_flush_page_for_dma,
+};
+
+#ifdef CONFIG_SMP
+/* On sun4d the cpu broadcasts local TLB flushes, so we can just
+ * perform the local TLB flush and all the other cpus will see it.
+ * But, unfortunately, there is a bug in the sun4d XBUS backplane
+ * that requires that we add some synchronization to these flushes.
+ *
+ * The bug is that the fifo which keeps track of all the pending TLB
+ * broadcasts in the system is an entry or two too small, so if we
+ * have too many going at once we'll overflow that fifo and lose a TLB
+ * flush resulting in corruption.
+ *
+ * Our workaround is to take a global spinlock around the TLB flushes,
+ * which guarentees we won't ever have too many pending.  It's a big
+ * hammer, but a semaphore like system to make sure we only have N TLB
+ * flushes going at once will require SMP locking anyways so there's
+ * no real value in trying any harder than this.
+ */
+static struct sparc32_cachetlb_ops viking_sun4d_smp_ops __ro_after_init = {
+	.cache_all	= viking_flush_cache_all,
+	.cache_mm	= viking_flush_cache_mm,
+	.cache_page	= viking_flush_cache_page,
+	.cache_range	= viking_flush_cache_range,
+	.tlb_all	= sun4dsmp_flush_tlb_all,
+	.tlb_mm		= sun4dsmp_flush_tlb_mm,
+	.tlb_page	= sun4dsmp_flush_tlb_page,
+	.tlb_range	= sun4dsmp_flush_tlb_range,
+	.page_to_ram	= viking_flush_page_to_ram,
+	.sig_insns	= viking_flush_sig_insns,
+	.page_for_dma	= viking_flush_page_for_dma,
+};
+#endif
+
+static void __init init_viking(void)
+{
+	unsigned long mreg = srmmu_get_mmureg();
+
+	/* Ahhh, the viking.  SRMMU VLSI abortion number two... */
+	if (mreg & VIKING_MMODE) {
+		srmmu_name = "TI Viking";
+		viking_mxcc_present = 0;
+		msi_set_sync();
+
+		/*
+		 * We need this to make sure old viking takes no hits
+		 * on it's cache for dma snoops to workaround the
+		 * "load from non-cacheable memory" interrupt bug.
+		 * This is only necessary because of the new way in
+		 * which we use the IOMMU.
+		 */
+		viking_ops.page_for_dma = viking_flush_page;
+#ifdef CONFIG_SMP
+		viking_sun4d_smp_ops.page_for_dma = viking_flush_page;
+#endif
+		flush_page_for_dma_global = 0;
+	} else {
+		srmmu_name = "TI Viking/MXCC";
+		viking_mxcc_present = 1;
+		srmmu_cache_pagetables = 1;
+	}
+
+	sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
+		&viking_ops;
+#ifdef CONFIG_SMP
+	if (sparc_cpu_model == sun4d)
+		sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
+			&viking_sun4d_smp_ops;
+#endif
+
+	poke_srmmu = poke_viking;
+}
+
+/* Probe for the srmmu chip version. */
+static void __init get_srmmu_type(void)
+{
+	unsigned long mreg, psr;
+	unsigned long mod_typ, mod_rev, psr_typ, psr_vers;
+
+	srmmu_modtype = SRMMU_INVAL_MOD;
+	hwbug_bitmask = 0;
+
+	mreg = srmmu_get_mmureg(); psr = get_psr();
+	mod_typ = (mreg & 0xf0000000) >> 28;
+	mod_rev = (mreg & 0x0f000000) >> 24;
+	psr_typ = (psr >> 28) & 0xf;
+	psr_vers = (psr >> 24) & 0xf;
+
+	/* First, check for sparc-leon. */
+	if (sparc_cpu_model == sparc_leon) {
+		init_leon();
+		return;
+	}
+
+	/* Second, check for HyperSparc or Cypress. */
+	if (mod_typ == 1) {
+		switch (mod_rev) {
+		case 7:
+			/* UP or MP Hypersparc */
+			init_hypersparc();
+			break;
+		case 0:
+		case 2:
+		case 10:
+		case 11:
+		case 12:
+		case 13:
+		case 14:
+		case 15:
+		default:
+			prom_printf("Sparc-Linux Cypress support does not longer exit.\n");
+			prom_halt();
+			break;
+		}
+		return;
+	}
+
+	/* Now Fujitsu TurboSparc. It might happen that it is
+	 * in Swift emulation mode, so we will check later...
+	 */
+	if (psr_typ == 0 && psr_vers == 5) {
+		init_turbosparc();
+		return;
+	}
+
+	/* Next check for Fujitsu Swift. */
+	if (psr_typ == 0 && psr_vers == 4) {
+		phandle cpunode;
+		char node_str[128];
+
+		/* Look if it is not a TurboSparc emulating Swift... */
+		cpunode = prom_getchild(prom_root_node);
+		while ((cpunode = prom_getsibling(cpunode)) != 0) {
+			prom_getstring(cpunode, "device_type", node_str, sizeof(node_str));
+			if (!strcmp(node_str, "cpu")) {
+				if (!prom_getintdefault(cpunode, "psr-implementation", 1) &&
+				    prom_getintdefault(cpunode, "psr-version", 1) == 5) {
+					init_turbosparc();
+					return;
+				}
+				break;
+			}
+		}
+
+		init_swift();
+		return;
+	}
+
+	/* Now the Viking family of srmmu. */
+	if (psr_typ == 4 &&
+	   ((psr_vers == 0) ||
+	    ((psr_vers == 1) && (mod_typ == 0) && (mod_rev == 0)))) {
+		init_viking();
+		return;
+	}
+
+	/* Finally the Tsunami. */
+	if (psr_typ == 4 && psr_vers == 1 && (mod_typ || mod_rev)) {
+		init_tsunami();
+		return;
+	}
+
+	/* Oh well */
+	srmmu_is_bad();
+}
+
+#ifdef CONFIG_SMP
+/* Local cross-calls. */
+static void smp_flush_page_for_dma(unsigned long page)
+{
+	xc1((smpfunc_t) local_ops->page_for_dma, page);
+	local_ops->page_for_dma(page);
+}
+
+static void smp_flush_cache_all(void)
+{
+	xc0((smpfunc_t) local_ops->cache_all);
+	local_ops->cache_all();
+}
+
+static void smp_flush_tlb_all(void)
+{
+	xc0((smpfunc_t) local_ops->tlb_all);
+	local_ops->tlb_all();
+}
+
+static void smp_flush_cache_mm(struct mm_struct *mm)
+{
+	if (mm->context != NO_CONTEXT) {
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
+			xc1((smpfunc_t) local_ops->cache_mm, (unsigned long) mm);
+		local_ops->cache_mm(mm);
+	}
+}
+
+static void smp_flush_tlb_mm(struct mm_struct *mm)
+{
+	if (mm->context != NO_CONTEXT) {
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask)) {
+			xc1((smpfunc_t) local_ops->tlb_mm, (unsigned long) mm);
+			if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
+				cpumask_copy(mm_cpumask(mm),
+					     cpumask_of(smp_processor_id()));
+		}
+		local_ops->tlb_mm(mm);
+	}
+}
+
+static void smp_flush_cache_range(struct vm_area_struct *vma,
+				  unsigned long start,
+				  unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context != NO_CONTEXT) {
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
+			xc3((smpfunc_t) local_ops->cache_range,
+			    (unsigned long) vma, start, end);
+		local_ops->cache_range(vma, start, end);
+	}
+}
+
+static void smp_flush_tlb_range(struct vm_area_struct *vma,
+				unsigned long start,
+				unsigned long end)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context != NO_CONTEXT) {
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
+			xc3((smpfunc_t) local_ops->tlb_range,
+			    (unsigned long) vma, start, end);
+		local_ops->tlb_range(vma, start, end);
+	}
+}
+
+static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context != NO_CONTEXT) {
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
+			xc2((smpfunc_t) local_ops->cache_page,
+			    (unsigned long) vma, page);
+		local_ops->cache_page(vma, page);
+	}
+}
+
+static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (mm->context != NO_CONTEXT) {
+		cpumask_t cpu_mask;
+		cpumask_copy(&cpu_mask, mm_cpumask(mm));
+		cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+		if (!cpumask_empty(&cpu_mask))
+			xc2((smpfunc_t) local_ops->tlb_page,
+			    (unsigned long) vma, page);
+		local_ops->tlb_page(vma, page);
+	}
+}
+
+static void smp_flush_page_to_ram(unsigned long page)
+{
+	/* Current theory is that those who call this are the one's
+	 * who have just dirtied their cache with the pages contents
+	 * in kernel space, therefore we only run this on local cpu.
+	 *
+	 * XXX This experiment failed, research further... -DaveM
+	 */
+#if 1
+	xc1((smpfunc_t) local_ops->page_to_ram, page);
+#endif
+	local_ops->page_to_ram(page);
+}
+
+static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
+{
+	cpumask_t cpu_mask;
+	cpumask_copy(&cpu_mask, mm_cpumask(mm));
+	cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+	if (!cpumask_empty(&cpu_mask))
+		xc2((smpfunc_t) local_ops->sig_insns,
+		    (unsigned long) mm, insn_addr);
+	local_ops->sig_insns(mm, insn_addr);
+}
+
+static struct sparc32_cachetlb_ops smp_cachetlb_ops __ro_after_init = {
+	.cache_all	= smp_flush_cache_all,
+	.cache_mm	= smp_flush_cache_mm,
+	.cache_page	= smp_flush_cache_page,
+	.cache_range	= smp_flush_cache_range,
+	.tlb_all	= smp_flush_tlb_all,
+	.tlb_mm		= smp_flush_tlb_mm,
+	.tlb_page	= smp_flush_tlb_page,
+	.tlb_range	= smp_flush_tlb_range,
+	.page_to_ram	= smp_flush_page_to_ram,
+	.sig_insns	= smp_flush_sig_insns,
+	.page_for_dma	= smp_flush_page_for_dma,
+};
+#endif
+
+/* Load up routines and constants for sun4m and sun4d mmu */
+void __init load_mmu(void)
+{
+	/* Functions */
+	get_srmmu_type();
+
+#ifdef CONFIG_SMP
+	/* El switcheroo... */
+	local_ops = sparc32_cachetlb_ops;
+
+	if (sparc_cpu_model == sun4d || sparc_cpu_model == sparc_leon) {
+		smp_cachetlb_ops.tlb_all = local_ops->tlb_all;
+		smp_cachetlb_ops.tlb_mm = local_ops->tlb_mm;
+		smp_cachetlb_ops.tlb_range = local_ops->tlb_range;
+		smp_cachetlb_ops.tlb_page = local_ops->tlb_page;
+	}
+
+	if (poke_srmmu == poke_viking) {
+		/* Avoid unnecessary cross calls. */
+		smp_cachetlb_ops.cache_all = local_ops->cache_all;
+		smp_cachetlb_ops.cache_mm = local_ops->cache_mm;
+		smp_cachetlb_ops.cache_range = local_ops->cache_range;
+		smp_cachetlb_ops.cache_page = local_ops->cache_page;
+
+		smp_cachetlb_ops.page_to_ram = local_ops->page_to_ram;
+		smp_cachetlb_ops.sig_insns = local_ops->sig_insns;
+		smp_cachetlb_ops.page_for_dma = local_ops->page_for_dma;
+	}
+
+	/* It really is const after this point. */
+	sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
+		&smp_cachetlb_ops;
+#endif
+
+	if (sparc_cpu_model == sun4d)
+		ld_mmu_iounit();
+	else
+		ld_mmu_iommu();
+#ifdef CONFIG_SMP
+	if (sparc_cpu_model == sun4d)
+		sun4d_init_smp();
+	else if (sparc_cpu_model == sparc_leon)
+		leon_init_smp();
+	else
+		sun4m_init_smp();
+#endif
+}
diff --git a/arch/sparc/mm/srmmu_access.S b/arch/sparc/mm/srmmu_access.S
new file mode 100644
index 0000000..d8d2e64
--- /dev/null
+++ b/arch/sparc/mm/srmmu_access.S
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Assembler variants of srmmu access functions.
+ * Implemented in assembler to allow run-time patching.
+ * LEON uses a different ASI for MMUREGS than SUN.
+ *
+ * The leon_1insn_patch infrastructure is used
+ * for the run-time patching.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asmmacro.h>
+#include <asm/pgtsrmmu.h>
+#include <asm/asi.h>
+
+/* unsigned int srmmu_get_mmureg(void) */
+ENTRY(srmmu_get_mmureg)
+LEON_PI(lda	[%g0] ASI_LEON_MMUREGS, %o0)
+SUN_PI_(lda	[%g0] ASI_M_MMUREGS, %o0)
+	retl
+	 nop
+ENDPROC(srmmu_get_mmureg)
+
+/* void srmmu_set_mmureg(unsigned long regval) */
+ENTRY(srmmu_set_mmureg)
+LEON_PI(sta	%o0, [%g0] ASI_LEON_MMUREGS)
+SUN_PI_(sta	%o0, [%g0] ASI_M_MMUREGS)
+	retl
+	 nop
+ENDPROC(srmmu_set_mmureg)
+
+/* void srmmu_set_ctable_ptr(unsigned long paddr) */
+ENTRY(srmmu_set_ctable_ptr)
+	/* paddr = ((paddr >> 4) & SRMMU_CTX_PMASK); */
+	srl	%o0, 4, %g1
+	and	%g1, SRMMU_CTX_PMASK, %g1
+
+	mov	SRMMU_CTXTBL_PTR, %g2
+LEON_PI(sta	%g1, [%g2] ASI_LEON_MMUREGS)
+SUN_PI_(sta	%g1, [%g2] ASI_M_MMUREGS)
+	retl
+	 nop
+ENDPROC(srmmu_set_ctable_ptr)
+
+
+/* void srmmu_set_context(int context) */
+ENTRY(srmmu_set_context)
+	mov	SRMMU_CTX_REG, %g1
+LEON_PI(sta	%o0, [%g1] ASI_LEON_MMUREGS)
+SUN_PI_(sta	%o0, [%g1] ASI_M_MMUREGS)
+	retl
+	 nop
+ENDPROC(srmmu_set_context)
+
+
+/* int srmmu_get_context(void) */
+ENTRY(srmmu_get_context)
+	mov	SRMMU_CTX_REG, %o0
+LEON_PI(lda     [%o0] ASI_LEON_MMUREGS, %o0)
+SUN_PI_(lda	[%o0] ASI_M_MMUREGS, %o0)
+	retl
+	 nop
+ENDPROC(srmmu_get_context)
+
+
+/* unsigned int srmmu_get_fstatus(void) */
+ENTRY(srmmu_get_fstatus)
+	mov	SRMMU_FAULT_STATUS, %o0
+LEON_PI(lda     [%o0] ASI_LEON_MMUREGS, %o0)
+SUN_PI_(lda	[%o0] ASI_M_MMUREGS, %o0)
+	retl
+	 nop
+ENDPROC(srmmu_get_fstatus)
+
+
+/* unsigned int srmmu_get_faddr(void) */
+ENTRY(srmmu_get_faddr)
+	mov	SRMMU_FAULT_ADDR, %o0
+LEON_PI(lda     [%o0] ASI_LEON_MMUREGS, %o0)
+SUN_PI_(lda	[%o0] ASI_M_MMUREGS, %o0)
+	retl
+	 nop
+ENDPROC(srmmu_get_faddr)
diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S
new file mode 100644
index 0000000..f414bfd
--- /dev/null
+++ b/arch/sparc/mm/swift.S
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * swift.S: MicroSparc-II mmu/cache operations.
+ *
+ * Copyright (C) 1999 David S. Miller (davem@redhat.com)
+ */
+
+#include <asm/psr.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtsrmmu.h>
+#include <asm/asm-offsets.h>
+
+	.text
+	.align	4
+
+#if 1	/* XXX screw this, I can't get the VAC flushes working
+	 * XXX reliably... -DaveM
+	 */
+	.globl	swift_flush_cache_all, swift_flush_cache_mm
+	.globl	swift_flush_cache_range, swift_flush_cache_page
+	.globl	swift_flush_page_for_dma
+	.globl	swift_flush_page_to_ram
+
+swift_flush_cache_all:
+swift_flush_cache_mm:
+swift_flush_cache_range:
+swift_flush_cache_page:
+swift_flush_page_for_dma:
+swift_flush_page_to_ram:
+	sethi	%hi(0x2000), %o0
+1:	subcc	%o0, 0x10, %o0
+	add	%o0, %o0, %o1
+	sta	%g0, [%o0] ASI_M_DATAC_TAG
+	bne	1b
+	 sta	%g0, [%o1] ASI_M_TXTC_TAG
+	retl
+	 nop
+#else
+
+	.globl	swift_flush_cache_all
+swift_flush_cache_all:
+	WINDOW_FLUSH(%g4, %g5)
+
+	/* Just clear out all the tags. */
+	sethi	%hi(16 * 1024), %o0
+1:	subcc	%o0, 16, %o0
+	sta	%g0, [%o0] ASI_M_TXTC_TAG
+	bne	1b
+	 sta	%g0, [%o0] ASI_M_DATAC_TAG
+	retl
+	 nop
+
+	.globl	swift_flush_cache_mm
+swift_flush_cache_mm:
+	ld	[%o0 + AOFF_mm_context], %g2
+	cmp	%g2, -1
+	be	swift_flush_cache_mm_out
+	WINDOW_FLUSH(%g4, %g5)
+	rd	%psr, %g1
+	andn	%g1, PSR_ET, %g3
+	wr	%g3, 0x0, %psr
+	nop
+	nop
+	mov	SRMMU_CTX_REG, %g7
+	lda	[%g7] ASI_M_MMUREGS, %g5
+	sta	%g2, [%g7] ASI_M_MMUREGS
+
+#if 1
+	sethi	%hi(0x2000), %o0
+1:	subcc	%o0, 0x10, %o0
+	sta	%g0, [%o0] ASI_M_FLUSH_CTX
+	bne	1b
+	 nop
+#else
+	clr	%o0
+	or	%g0, 2048, %g7
+	or	%g0, 2048, %o1
+	add	%o1, 2048, %o2
+	add	%o2, 2048, %o3
+	mov	16, %o4
+	add	%o4, 2048, %o5
+	add	%o5, 2048, %g2
+	add	%g2, 2048, %g3
+1:	sta	%g0, [%o0      ] ASI_M_FLUSH_CTX
+	sta	%g0, [%o0 + %o1] ASI_M_FLUSH_CTX
+	sta	%g0, [%o0 + %o2] ASI_M_FLUSH_CTX
+	sta	%g0, [%o0 + %o3] ASI_M_FLUSH_CTX
+	sta	%g0, [%o0 + %o4] ASI_M_FLUSH_CTX
+	sta	%g0, [%o0 + %o5] ASI_M_FLUSH_CTX
+	sta	%g0, [%o0 + %g2] ASI_M_FLUSH_CTX
+	sta	%g0, [%o0 + %g3] ASI_M_FLUSH_CTX
+	subcc	%g7, 32, %g7
+	bne	1b
+	 add	%o0, 32, %o0
+#endif
+
+	mov	SRMMU_CTX_REG, %g7
+	sta	%g5, [%g7] ASI_M_MMUREGS
+	wr	%g1, 0x0, %psr
+	nop
+	nop
+swift_flush_cache_mm_out:
+	retl
+	 nop
+
+	.globl	swift_flush_cache_range
+swift_flush_cache_range:
+	ld	[%o0 + VMA_VM_MM], %o0
+	sub	%o2, %o1, %o2
+	sethi	%hi(4096), %o3
+	cmp	%o2, %o3
+	bgu	swift_flush_cache_mm
+	 nop
+	b	70f
+	 nop
+
+	.globl	swift_flush_cache_page
+swift_flush_cache_page:
+	ld	[%o0 + VMA_VM_MM], %o0
+70:
+	ld	[%o0 + AOFF_mm_context], %g2
+	cmp	%g2, -1
+	be	swift_flush_cache_page_out
+	WINDOW_FLUSH(%g4, %g5)
+	rd	%psr, %g1
+	andn	%g1, PSR_ET, %g3
+	wr	%g3, 0x0, %psr
+	nop
+	nop
+	mov	SRMMU_CTX_REG, %g7
+	lda	[%g7] ASI_M_MMUREGS, %g5
+	sta	%g2, [%g7] ASI_M_MMUREGS
+
+	andn	%o1, (PAGE_SIZE - 1), %o1
+#if 1
+	sethi	%hi(0x1000), %o0
+1:	subcc	%o0, 0x10, %o0
+	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
+	bne	1b
+	 nop
+#else
+	or	%g0, 512, %g7
+	or	%g0, 512, %o0
+	add	%o0, 512, %o2
+	add	%o2, 512, %o3
+	add	%o3, 512, %o4
+	add	%o4, 512, %o5
+	add	%o5, 512, %g3
+	add	%g3, 512, %g4
+1:	sta	%g0, [%o1      ] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o3] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
+	subcc	%g7, 16, %g7
+	bne	1b
+	 add	%o1, 16, %o1
+#endif
+
+	mov	SRMMU_CTX_REG, %g7
+	sta	%g5, [%g7] ASI_M_MMUREGS
+	wr	%g1, 0x0, %psr
+	nop
+	nop
+swift_flush_cache_page_out:
+	retl
+	 nop
+
+	/* Swift is write-thru, however it is not
+	 * I/O nor TLB-walk coherent.  Also it has
+	 * caches which are virtually indexed and tagged.
+	 */
+	.globl	swift_flush_page_for_dma
+	.globl	swift_flush_page_to_ram
+swift_flush_page_for_dma:
+swift_flush_page_to_ram:
+	andn	%o0, (PAGE_SIZE - 1), %o1
+#if 1
+	sethi	%hi(0x1000), %o0
+1:	subcc	%o0, 0x10, %o0
+	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
+	bne	1b
+	 nop
+#else
+	or	%g0, 512, %g7
+	or	%g0, 512, %o0
+	add	%o0, 512, %o2
+	add	%o2, 512, %o3
+	add	%o3, 512, %o4
+	add	%o4, 512, %o5
+	add	%o5, 512, %g3
+	add	%g3, 512, %g4
+1:	sta	%g0, [%o1      ] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o0] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o2] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o3] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o4] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %o5] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g3] ASI_M_FLUSH_PAGE
+	sta	%g0, [%o1 + %g4] ASI_M_FLUSH_PAGE
+	subcc	%g7, 16, %g7
+	bne	1b
+	 add	%o1, 16, %o1
+#endif
+	retl
+	 nop
+#endif
+
+	.globl	swift_flush_sig_insns
+swift_flush_sig_insns:
+	flush	%o1
+	retl
+	 flush	%o1 + 4
+
+	.globl	swift_flush_tlb_mm
+	.globl	swift_flush_tlb_range
+	.globl	swift_flush_tlb_all
+swift_flush_tlb_range:
+	ld	[%o0 + VMA_VM_MM], %o0
+swift_flush_tlb_mm:
+	ld	[%o0 + AOFF_mm_context], %g2
+	cmp	%g2, -1
+	be	swift_flush_tlb_all_out
+swift_flush_tlb_all:
+	mov	0x400, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+swift_flush_tlb_all_out:
+	retl
+	 nop
+
+	.globl	swift_flush_tlb_page
+swift_flush_tlb_page:
+	ld	[%o0 + VMA_VM_MM], %o0
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	andn	%o1, (PAGE_SIZE - 1), %o1
+	cmp	%o3, -1
+	be	swift_flush_tlb_page_out
+	 nop
+#if 1
+	mov	0x400, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE	
+#else
+	lda	[%g1] ASI_M_MMUREGS, %g5
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PAGE	/* rem. virt. cache. prot. */
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+	sta	%g5, [%g1] ASI_M_MMUREGS
+#endif
+swift_flush_tlb_page_out:
+	retl
+	 nop
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
new file mode 100644
index 0000000..3d72d2d
--- /dev/null
+++ b/arch/sparc/mm/tlb.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/* arch/sparc64/mm/tlb.c
+ *
+ * Copyright (C) 2004 David S. Miller <davem@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/preempt.h>
+
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+
+/* Heavily inspired by the ppc64 code.  */
+
+static DEFINE_PER_CPU(struct tlb_batch, tlb_batch);
+
+void flush_tlb_pending(void)
+{
+	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
+	struct mm_struct *mm = tb->mm;
+
+	if (!tb->tlb_nr)
+		goto out;
+
+	flush_tsb_user(tb);
+
+	if (CTX_VALID(mm->context)) {
+		if (tb->tlb_nr == 1) {
+			global_flush_tlb_page(mm, tb->vaddrs[0]);
+		} else {
+#ifdef CONFIG_SMP
+			smp_flush_tlb_pending(tb->mm, tb->tlb_nr,
+					      &tb->vaddrs[0]);
+#else
+			__flush_tlb_pending(CTX_HWBITS(tb->mm->context),
+					    tb->tlb_nr, &tb->vaddrs[0]);
+#endif
+		}
+	}
+
+	tb->tlb_nr = 0;
+
+out:
+	put_cpu_var(tlb_batch);
+}
+
+void arch_enter_lazy_mmu_mode(void)
+{
+	struct tlb_batch *tb = this_cpu_ptr(&tlb_batch);
+
+	tb->active = 1;
+}
+
+void arch_leave_lazy_mmu_mode(void)
+{
+	struct tlb_batch *tb = this_cpu_ptr(&tlb_batch);
+
+	if (tb->tlb_nr)
+		flush_tlb_pending();
+	tb->active = 0;
+}
+
+static void tlb_batch_add_one(struct mm_struct *mm, unsigned long vaddr,
+			      bool exec, unsigned int hugepage_shift)
+{
+	struct tlb_batch *tb = &get_cpu_var(tlb_batch);
+	unsigned long nr;
+
+	vaddr &= PAGE_MASK;
+	if (exec)
+		vaddr |= 0x1UL;
+
+	nr = tb->tlb_nr;
+
+	if (unlikely(nr != 0 && mm != tb->mm)) {
+		flush_tlb_pending();
+		nr = 0;
+	}
+
+	if (!tb->active) {
+		flush_tsb_user_page(mm, vaddr, hugepage_shift);
+		global_flush_tlb_page(mm, vaddr);
+		goto out;
+	}
+
+	if (nr == 0) {
+		tb->mm = mm;
+		tb->hugepage_shift = hugepage_shift;
+	}
+
+	if (tb->hugepage_shift != hugepage_shift) {
+		flush_tlb_pending();
+		tb->hugepage_shift = hugepage_shift;
+		nr = 0;
+	}
+
+	tb->vaddrs[nr] = vaddr;
+	tb->tlb_nr = ++nr;
+	if (nr >= TLB_BATCH_NR)
+		flush_tlb_pending();
+
+out:
+	put_cpu_var(tlb_batch);
+}
+
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+		   pte_t *ptep, pte_t orig, int fullmm,
+		   unsigned int hugepage_shift)
+{
+	if (tlb_type != hypervisor &&
+	    pte_dirty(orig)) {
+		unsigned long paddr, pfn = pte_pfn(orig);
+		struct address_space *mapping;
+		struct page *page;
+
+		if (!pfn_valid(pfn))
+			goto no_cache_flush;
+
+		page = pfn_to_page(pfn);
+		if (PageReserved(page))
+			goto no_cache_flush;
+
+		/* A real file page? */
+		mapping = page_mapping_file(page);
+		if (!mapping)
+			goto no_cache_flush;
+
+		paddr = (unsigned long) page_address(page);
+		if ((paddr ^ vaddr) & (1 << 13))
+			flush_dcache_page_all(mm, page);
+	}
+
+no_cache_flush:
+	if (!fullmm)
+		tlb_batch_add_one(mm, vaddr, pte_exec(orig), hugepage_shift);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void tlb_batch_pmd_scan(struct mm_struct *mm, unsigned long vaddr,
+			       pmd_t pmd)
+{
+	unsigned long end;
+	pte_t *pte;
+
+	pte = pte_offset_map(&pmd, vaddr);
+	end = vaddr + HPAGE_SIZE;
+	while (vaddr < end) {
+		if (pte_val(*pte) & _PAGE_VALID) {
+			bool exec = pte_exec(*pte);
+
+			tlb_batch_add_one(mm, vaddr, exec, PAGE_SHIFT);
+		}
+		pte++;
+		vaddr += PAGE_SIZE;
+	}
+	pte_unmap(pte);
+}
+
+
+static void __set_pmd_acct(struct mm_struct *mm, unsigned long addr,
+			   pmd_t orig, pmd_t pmd)
+{
+	if (mm == &init_mm)
+		return;
+
+	if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
+		/*
+		 * Note that this routine only sets pmds for THP pages.
+		 * Hugetlb pages are handled elsewhere.  We need to check
+		 * for huge zero page.  Huge zero pages are like hugetlb
+		 * pages in that there is no RSS, but there is the need
+		 * for TSB entries.  So, huge zero page counts go into
+		 * hugetlb_pte_count.
+		 */
+		if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
+			if (is_huge_zero_page(pmd_page(pmd)))
+				mm->context.hugetlb_pte_count++;
+			else
+				mm->context.thp_pte_count++;
+		} else {
+			if (is_huge_zero_page(pmd_page(orig)))
+				mm->context.hugetlb_pte_count--;
+			else
+				mm->context.thp_pte_count--;
+		}
+
+		/* Do not try to allocate the TSB hash table if we
+		 * don't have one already.  We have various locks held
+		 * and thus we'll end up doing a GFP_KERNEL allocation
+		 * in an atomic context.
+		 *
+		 * Instead, we let the first TLB miss on a hugepage
+		 * take care of this.
+		 */
+	}
+
+	if (!pmd_none(orig)) {
+		addr &= HPAGE_MASK;
+		if (pmd_trans_huge(orig)) {
+			pte_t orig_pte = __pte(pmd_val(orig));
+			bool exec = pte_exec(orig_pte);
+
+			tlb_batch_add_one(mm, addr, exec, REAL_HPAGE_SHIFT);
+			tlb_batch_add_one(mm, addr + REAL_HPAGE_SIZE, exec,
+					  REAL_HPAGE_SHIFT);
+		} else {
+			tlb_batch_pmd_scan(mm, addr, orig);
+		}
+	}
+}
+
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		pmd_t *pmdp, pmd_t pmd)
+{
+	pmd_t orig = *pmdp;
+
+	*pmdp = pmd;
+	__set_pmd_acct(mm, addr, orig, pmd);
+}
+
+static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+		unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+	pmd_t old;
+
+	do {
+		old = *pmdp;
+	} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
+	__set_pmd_acct(vma->vm_mm, address, old, pmd);
+
+	return old;
+}
+
+/*
+ * This routine is only called when splitting a THP
+ */
+pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+		     pmd_t *pmdp)
+{
+	pmd_t old, entry;
+
+	entry = __pmd(pmd_val(*pmdp) & ~_PAGE_VALID);
+	old = pmdp_establish(vma, address, pmdp, entry);
+	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+	/*
+	 * set_pmd_at() will not be called in a way to decrement
+	 * thp_pte_count when splitting a THP, so do it now.
+	 * Sanity check pmd before doing the actual decrement.
+	 */
+	if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
+	    !is_huge_zero_page(pmd_page(entry)))
+		(vma->vm_mm)->context.thp_pte_count--;
+
+	return old;
+}
+
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pgtable)
+{
+	struct list_head *lh = (struct list_head *) pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	if (!pmd_huge_pte(mm, pmdp))
+		INIT_LIST_HEAD(lh);
+	else
+		list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+	pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+	struct list_head *lh;
+	pgtable_t pgtable;
+
+	assert_spin_locked(&mm->page_table_lock);
+
+	/* FIFO */
+	pgtable = pmd_huge_pte(mm, pmdp);
+	lh = (struct list_head *) pgtable;
+	if (list_empty(lh))
+		pmd_huge_pte(mm, pmdp) = NULL;
+	else {
+		pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+		list_del(lh);
+	}
+	pte_val(pgtable[0]) = 0;
+	pte_val(pgtable[1]) = 0;
+
+	return pgtable;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
new file mode 100644
index 0000000..f5edc28
--- /dev/null
+++ b/arch/sparc/mm/tsb.c
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0
+/* arch/sparc64/mm/tsb.c
+ *
+ * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
+ */
+
+#include <linux/kernel.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/tsb.h>
+#include <asm/tlb.h>
+#include <asm/oplib.h>
+
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
+{
+	vaddr >>= hash_shift;
+	return vaddr & (nentries - 1);
+}
+
+static inline int tag_compare(unsigned long tag, unsigned long vaddr)
+{
+	return (tag == (vaddr >> 22));
+}
+
+static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
+{
+	unsigned long idx;
+
+	for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
+		struct tsb *ent = &swapper_tsb[idx];
+		unsigned long match = idx << 13;
+
+		match |= (ent->tag << 22);
+		if (match >= start && match < end)
+			ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+	}
+}
+
+/* TSB flushes need only occur on the processor initiating the address
+ * space modification, not on each cpu the address space has run on.
+ * Only the TLB flush needs that treatment.
+ */
+
+void flush_tsb_kernel_range(unsigned long start, unsigned long end)
+{
+	unsigned long v;
+
+	if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
+		return flush_tsb_kernel_range_scan(start, end);
+
+	for (v = start; v < end; v += PAGE_SIZE) {
+		unsigned long hash = tsb_hash(v, PAGE_SHIFT,
+					      KERNEL_TSB_NENTRIES);
+		struct tsb *ent = &swapper_tsb[hash];
+
+		if (tag_compare(ent->tag, v))
+			ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+	}
+}
+
+static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v,
+				  unsigned long hash_shift,
+				  unsigned long nentries)
+{
+	unsigned long tag, ent, hash;
+
+	v &= ~0x1UL;
+	hash = tsb_hash(v, hash_shift, nentries);
+	ent = tsb + (hash * sizeof(struct tsb));
+	tag = (v >> 22UL);
+
+	tsb_flush(ent, tag);
+}
+
+static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+			    unsigned long tsb, unsigned long nentries)
+{
+	unsigned long i;
+
+	for (i = 0; i < tb->tlb_nr; i++)
+		__flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries);
+}
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+static void __flush_huge_tsb_one_entry(unsigned long tsb, unsigned long v,
+				       unsigned long hash_shift,
+				       unsigned long nentries,
+				       unsigned int hugepage_shift)
+{
+	unsigned int hpage_entries;
+	unsigned int i;
+
+	hpage_entries = 1 << (hugepage_shift - hash_shift);
+	for (i = 0; i < hpage_entries; i++)
+		__flush_tsb_one_entry(tsb, v + (i << hash_shift), hash_shift,
+				      nentries);
+}
+
+static void __flush_huge_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
+				 unsigned long tsb, unsigned long nentries,
+				 unsigned int hugepage_shift)
+{
+	unsigned long i;
+
+	for (i = 0; i < tb->tlb_nr; i++)
+		__flush_huge_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift,
+					   nentries, hugepage_shift);
+}
+#endif
+
+void flush_tsb_user(struct tlb_batch *tb)
+{
+	struct mm_struct *mm = tb->mm;
+	unsigned long nentries, base, flags;
+
+	spin_lock_irqsave(&mm->context.lock, flags);
+
+	if (tb->hugepage_shift < REAL_HPAGE_SHIFT) {
+		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+			base = __pa(base);
+		if (tb->hugepage_shift == PAGE_SHIFT)
+			__flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
+#if defined(CONFIG_HUGETLB_PAGE)
+		else
+			__flush_huge_tsb_one(tb, PAGE_SHIFT, base, nentries,
+					     tb->hugepage_shift);
+#endif
+	}
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+			base = __pa(base);
+		__flush_huge_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries,
+				     tb->hugepage_shift);
+	}
+#endif
+	spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+
+void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr,
+			 unsigned int hugepage_shift)
+{
+	unsigned long nentries, base, flags;
+
+	spin_lock_irqsave(&mm->context.lock, flags);
+
+	if (hugepage_shift < REAL_HPAGE_SHIFT) {
+		base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
+		nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
+		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+			base = __pa(base);
+		if (hugepage_shift == PAGE_SHIFT)
+			__flush_tsb_one_entry(base, vaddr, PAGE_SHIFT,
+					      nentries);
+#if defined(CONFIG_HUGETLB_PAGE)
+		else
+			__flush_huge_tsb_one_entry(base, vaddr, PAGE_SHIFT,
+						   nentries, hugepage_shift);
+#endif
+	}
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	else if (mm->context.tsb_block[MM_TSB_HUGE].tsb) {
+		base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
+		nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
+		if (tlb_type == cheetah_plus || tlb_type == hypervisor)
+			base = __pa(base);
+		__flush_huge_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT,
+					   nentries, hugepage_shift);
+	}
+#endif
+	spin_unlock_irqrestore(&mm->context.lock, flags);
+}
+
+#define HV_PGSZ_IDX_BASE	HV_PGSZ_IDX_8K
+#define HV_PGSZ_MASK_BASE	HV_PGSZ_MASK_8K
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define HV_PGSZ_IDX_HUGE	HV_PGSZ_IDX_4MB
+#define HV_PGSZ_MASK_HUGE	HV_PGSZ_MASK_4MB
+#endif
+
+static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
+{
+	unsigned long tsb_reg, base, tsb_paddr;
+	unsigned long page_sz, tte;
+
+	mm->context.tsb_block[tsb_idx].tsb_nentries =
+		tsb_bytes / sizeof(struct tsb);
+
+	switch (tsb_idx) {
+	case MM_TSB_BASE:
+		base = TSBMAP_8K_BASE;
+		break;
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	case MM_TSB_HUGE:
+		base = TSBMAP_4M_BASE;
+		break;
+#endif
+	default:
+		BUG();
+	}
+
+	tte = pgprot_val(PAGE_KERNEL_LOCKED);
+	tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
+	BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
+
+	/* Use the smallest page size that can map the whole TSB
+	 * in one TLB entry.
+	 */
+	switch (tsb_bytes) {
+	case 8192 << 0:
+		tsb_reg = 0x0UL;
+#ifdef DCACHE_ALIASING_POSSIBLE
+		base += (tsb_paddr & 8192);
+#endif
+		page_sz = 8192;
+		break;
+
+	case 8192 << 1:
+		tsb_reg = 0x1UL;
+		page_sz = 64 * 1024;
+		break;
+
+	case 8192 << 2:
+		tsb_reg = 0x2UL;
+		page_sz = 64 * 1024;
+		break;
+
+	case 8192 << 3:
+		tsb_reg = 0x3UL;
+		page_sz = 64 * 1024;
+		break;
+
+	case 8192 << 4:
+		tsb_reg = 0x4UL;
+		page_sz = 512 * 1024;
+		break;
+
+	case 8192 << 5:
+		tsb_reg = 0x5UL;
+		page_sz = 512 * 1024;
+		break;
+
+	case 8192 << 6:
+		tsb_reg = 0x6UL;
+		page_sz = 512 * 1024;
+		break;
+
+	case 8192 << 7:
+		tsb_reg = 0x7UL;
+		page_sz = 4 * 1024 * 1024;
+		break;
+
+	default:
+		printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
+		       current->comm, current->pid, tsb_bytes);
+		do_exit(SIGSEGV);
+	}
+	tte |= pte_sz_bits(page_sz);
+
+	if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
+		/* Physical mapping, no locked TLB entry for TSB.  */
+		tsb_reg |= tsb_paddr;
+
+		mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
+		mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
+		mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
+	} else {
+		tsb_reg |= base;
+		tsb_reg |= (tsb_paddr & (page_sz - 1UL));
+		tte |= (tsb_paddr & ~(page_sz - 1UL));
+
+		mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
+		mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
+		mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
+	}
+
+	/* Setup the Hypervisor TSB descriptor.  */
+	if (tlb_type == hypervisor) {
+		struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
+
+		switch (tsb_idx) {
+		case MM_TSB_BASE:
+			hp->pgsz_idx = HV_PGSZ_IDX_BASE;
+			break;
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+		case MM_TSB_HUGE:
+			hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
+			break;
+#endif
+		default:
+			BUG();
+		}
+		hp->assoc = 1;
+		hp->num_ttes = tsb_bytes / 16;
+		hp->ctx_idx = 0;
+		switch (tsb_idx) {
+		case MM_TSB_BASE:
+			hp->pgsz_mask = HV_PGSZ_MASK_BASE;
+			break;
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+		case MM_TSB_HUGE:
+			hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
+			break;
+#endif
+		default:
+			BUG();
+		}
+		hp->tsb_base = tsb_paddr;
+		hp->resv = 0;
+	}
+}
+
+struct kmem_cache *pgtable_cache __read_mostly;
+
+static struct kmem_cache *tsb_caches[8] __read_mostly;
+
+static const char *tsb_cache_names[8] = {
+	"tsb_8KB",
+	"tsb_16KB",
+	"tsb_32KB",
+	"tsb_64KB",
+	"tsb_128KB",
+	"tsb_256KB",
+	"tsb_512KB",
+	"tsb_1MB",
+};
+
+void __init pgtable_cache_init(void)
+{
+	unsigned long i;
+
+	pgtable_cache = kmem_cache_create("pgtable_cache",
+					  PAGE_SIZE, PAGE_SIZE,
+					  0,
+					  _clear_page);
+	if (!pgtable_cache) {
+		prom_printf("pgtable_cache_init(): Could not create!\n");
+		prom_halt();
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) {
+		unsigned long size = 8192 << i;
+		const char *name = tsb_cache_names[i];
+
+		tsb_caches[i] = kmem_cache_create(name,
+						  size, size,
+						  0, NULL);
+		if (!tsb_caches[i]) {
+			prom_printf("Could not create %s cache\n", name);
+			prom_halt();
+		}
+	}
+}
+
+int sysctl_tsb_ratio = -2;
+
+static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
+{
+	unsigned long num_ents = (new_size / sizeof(struct tsb));
+
+	if (sysctl_tsb_ratio < 0)
+		return num_ents - (num_ents >> -sysctl_tsb_ratio);
+	else
+		return num_ents + (num_ents >> sysctl_tsb_ratio);
+}
+
+/* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
+ * do_sparc64_fault() invokes this routine to try and grow it.
+ *
+ * When we reach the maximum TSB size supported, we stick ~0UL into
+ * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
+ * will not trigger any longer.
+ *
+ * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
+ * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
+ * must be 512K aligned.  It also must be physically contiguous, so we
+ * cannot use vmalloc().
+ *
+ * The idea here is to grow the TSB when the RSS of the process approaches
+ * the number of entries that the current TSB can hold at once.  Currently,
+ * we trigger when the RSS hits 3/4 of the TSB capacity.
+ */
+void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
+{
+	unsigned long max_tsb_size = 1 * 1024 * 1024;
+	unsigned long new_size, old_size, flags;
+	struct tsb *old_tsb, *new_tsb;
+	unsigned long new_cache_index, old_cache_index;
+	unsigned long new_rss_limit;
+	gfp_t gfp_flags;
+
+	if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
+		max_tsb_size = (PAGE_SIZE << MAX_ORDER);
+
+	new_cache_index = 0;
+	for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
+		new_rss_limit = tsb_size_to_rss_limit(new_size);
+		if (new_rss_limit > rss)
+			break;
+		new_cache_index++;
+	}
+
+	if (new_size == max_tsb_size)
+		new_rss_limit = ~0UL;
+
+retry_tsb_alloc:
+	gfp_flags = GFP_KERNEL;
+	if (new_size > (PAGE_SIZE * 2))
+		gfp_flags |= __GFP_NOWARN | __GFP_NORETRY;
+
+	new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
+					gfp_flags, numa_node_id());
+	if (unlikely(!new_tsb)) {
+		/* Not being able to fork due to a high-order TSB
+		 * allocation failure is very bad behavior.  Just back
+		 * down to a 0-order allocation and force no TSB
+		 * growing for this address space.
+		 */
+		if (mm->context.tsb_block[tsb_index].tsb == NULL &&
+		    new_cache_index > 0) {
+			new_cache_index = 0;
+			new_size = 8192;
+			new_rss_limit = ~0UL;
+			goto retry_tsb_alloc;
+		}
+
+		/* If we failed on a TSB grow, we are under serious
+		 * memory pressure so don't try to grow any more.
+		 */
+		if (mm->context.tsb_block[tsb_index].tsb != NULL)
+			mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
+		return;
+	}
+
+	/* Mark all tags as invalid.  */
+	tsb_init(new_tsb, new_size);
+
+	/* Ok, we are about to commit the changes.  If we are
+	 * growing an existing TSB the locking is very tricky,
+	 * so WATCH OUT!
+	 *
+	 * We have to hold mm->context.lock while committing to the
+	 * new TSB, this synchronizes us with processors in
+	 * flush_tsb_user() and switch_mm() for this address space.
+	 *
+	 * But even with that lock held, processors run asynchronously
+	 * accessing the old TSB via TLB miss handling.  This is OK
+	 * because those actions are just propagating state from the
+	 * Linux page tables into the TSB, page table mappings are not
+	 * being changed.  If a real fault occurs, the processor will
+	 * synchronize with us when it hits flush_tsb_user(), this is
+	 * also true for the case where vmscan is modifying the page
+	 * tables.  The only thing we need to be careful with is to
+	 * skip any locked TSB entries during copy_tsb().
+	 *
+	 * When we finish committing to the new TSB, we have to drop
+	 * the lock and ask all other cpus running this address space
+	 * to run tsb_context_switch() to see the new TSB table.
+	 */
+	spin_lock_irqsave(&mm->context.lock, flags);
+
+	old_tsb = mm->context.tsb_block[tsb_index].tsb;
+	old_cache_index =
+		(mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
+	old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
+		    sizeof(struct tsb));
+
+
+	/* Handle multiple threads trying to grow the TSB at the same time.
+	 * One will get in here first, and bump the size and the RSS limit.
+	 * The others will get in here next and hit this check.
+	 */
+	if (unlikely(old_tsb &&
+		     (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
+		spin_unlock_irqrestore(&mm->context.lock, flags);
+
+		kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
+		return;
+	}
+
+	mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
+
+	if (old_tsb) {
+		extern void copy_tsb(unsigned long old_tsb_base,
+				     unsigned long old_tsb_size,
+				     unsigned long new_tsb_base,
+				     unsigned long new_tsb_size,
+				     unsigned long page_size_shift);
+		unsigned long old_tsb_base = (unsigned long) old_tsb;
+		unsigned long new_tsb_base = (unsigned long) new_tsb;
+
+		if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
+			old_tsb_base = __pa(old_tsb_base);
+			new_tsb_base = __pa(new_tsb_base);
+		}
+		copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
+			tsb_index == MM_TSB_BASE ?
+			PAGE_SHIFT : REAL_HPAGE_SHIFT);
+	}
+
+	mm->context.tsb_block[tsb_index].tsb = new_tsb;
+	setup_tsb_params(mm, tsb_index, new_size);
+
+	spin_unlock_irqrestore(&mm->context.lock, flags);
+
+	/* If old_tsb is NULL, we're being invoked for the first time
+	 * from init_new_context().
+	 */
+	if (old_tsb) {
+		/* Reload it on the local cpu.  */
+		tsb_context_switch(mm);
+
+		/* Now force other processors to do the same.  */
+		preempt_disable();
+		smp_tsb_sync(mm);
+		preempt_enable();
+
+		/* Now it is safe to free the old tsb.  */
+		kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
+	}
+}
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	unsigned long mm_rss = get_mm_rss(mm);
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	unsigned long saved_hugetlb_pte_count;
+	unsigned long saved_thp_pte_count;
+#endif
+	unsigned int i;
+
+	spin_lock_init(&mm->context.lock);
+
+	mm->context.sparc64_ctx_val = 0UL;
+
+	mm->context.tag_store = NULL;
+	spin_lock_init(&mm->context.tag_lock);
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	/* We reset them to zero because the fork() page copying
+	 * will re-increment the counters as the parent PTEs are
+	 * copied into the child address space.
+	 */
+	saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
+	saved_thp_pte_count = mm->context.thp_pte_count;
+	mm->context.hugetlb_pte_count = 0;
+	mm->context.thp_pte_count = 0;
+
+	mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
+#endif
+
+	/* copy_mm() copies over the parent's mm_struct before calling
+	 * us, so we need to zero out the TSB pointer or else tsb_grow()
+	 * will be confused and think there is an older TSB to free up.
+	 */
+	for (i = 0; i < MM_NUM_TSBS; i++)
+		mm->context.tsb_block[i].tsb = NULL;
+
+	/* If this is fork, inherit the parent's TSB size.  We would
+	 * grow it to that size on the first page fault anyways.
+	 */
+	tsb_grow(mm, MM_TSB_BASE, mm_rss);
+
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+	if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
+		tsb_grow(mm, MM_TSB_HUGE,
+			 (saved_hugetlb_pte_count + saved_thp_pte_count) *
+			 REAL_HPAGE_PER_HPAGE);
+#endif
+
+	if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void tsb_destroy_one(struct tsb_config *tp)
+{
+	unsigned long cache_index;
+
+	if (!tp->tsb)
+		return;
+	cache_index = tp->tsb_reg_val & 0x7UL;
+	kmem_cache_free(tsb_caches[cache_index], tp->tsb);
+	tp->tsb = NULL;
+	tp->tsb_reg_val = 0UL;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+	unsigned long flags, i;
+
+	for (i = 0; i < MM_NUM_TSBS; i++)
+		tsb_destroy_one(&mm->context.tsb_block[i]);
+
+	spin_lock_irqsave(&ctx_alloc_lock, flags);
+
+	if (CTX_VALID(mm->context)) {
+		unsigned long nr = CTX_NRBITS(mm->context);
+		mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
+	}
+
+	spin_unlock_irqrestore(&ctx_alloc_lock, flags);
+
+	/* If ADI tag storage was allocated for this task, free it */
+	if (mm->context.tag_store) {
+		tag_storage_desc_t *tag_desc;
+		unsigned long max_desc;
+		unsigned char *tags;
+
+		tag_desc = mm->context.tag_store;
+		max_desc = PAGE_SIZE/sizeof(tag_storage_desc_t);
+		for (i = 0; i < max_desc; i++) {
+			tags = tag_desc->tags;
+			tag_desc->tags = NULL;
+			kfree(tags);
+			tag_desc++;
+		}
+		kfree(mm->context.tag_store);
+		mm->context.tag_store = NULL;
+	}
+}
diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S
new file mode 100644
index 0000000..62b742d
--- /dev/null
+++ b/arch/sparc/mm/tsunami.S
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tsunami.S: High speed MicroSparc-I mmu/cache operations.
+ *
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+#include <asm/psr.h>
+#include <asm/asi.h>
+#include <asm/page.h>
+#include <asm/pgtsrmmu.h>
+
+	.text
+	.align	4
+
+	.globl	tsunami_flush_cache_all, tsunami_flush_cache_mm
+	.globl	tsunami_flush_cache_range, tsunami_flush_cache_page
+	.globl	tsunami_flush_page_to_ram, tsunami_flush_page_for_dma
+	.globl	tsunami_flush_sig_insns
+	.globl	tsunami_flush_tlb_all, tsunami_flush_tlb_mm
+	.globl	tsunami_flush_tlb_range, tsunami_flush_tlb_page
+
+	/* Sliiick... */
+tsunami_flush_cache_page:
+tsunami_flush_cache_range:
+	ld	[%o0 + VMA_VM_MM], %o0
+tsunami_flush_cache_mm:
+	ld	[%o0 + AOFF_mm_context], %g2
+	cmp	%g2, -1
+	be	tsunami_flush_cache_out
+tsunami_flush_cache_all:
+	WINDOW_FLUSH(%g4, %g5)
+tsunami_flush_page_for_dma:
+	sta	%g0, [%g0] ASI_M_IC_FLCLEAR
+	sta	%g0, [%g0] ASI_M_DC_FLCLEAR
+tsunami_flush_cache_out:
+tsunami_flush_page_to_ram:
+	retl
+	 nop
+
+tsunami_flush_sig_insns:
+	flush	%o1
+	retl
+	 flush	%o1 + 4
+
+	/* More slick stuff... */
+tsunami_flush_tlb_range:
+	ld	[%o0 + VMA_VM_MM], %o0
+tsunami_flush_tlb_mm:
+	ld	[%o0 + AOFF_mm_context], %g2
+	cmp	%g2, -1
+	be	tsunami_flush_tlb_out
+tsunami_flush_tlb_all:
+	 mov	0x400, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+	nop
+	nop
+	nop
+	nop
+	nop
+tsunami_flush_tlb_out:
+	retl
+	 nop
+
+	/* This one can be done in a fine grained manner... */
+tsunami_flush_tlb_page:
+	ld	[%o0 + VMA_VM_MM], %o0
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	andn	%o1, (PAGE_SIZE - 1), %o1
+	cmp	%o3, -1
+	be	tsunami_flush_tlb_page_out
+	 lda	[%g1] ASI_M_MMUREGS, %g5
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+	nop
+	nop
+	nop
+	nop
+	nop
+tsunami_flush_tlb_page_out:
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+
+#define MIRROR_BLOCK(dst, src, offset, t0, t1, t2, t3) \
+	ldd	[src + offset + 0x18], t0; \
+	std	t0, [dst + offset + 0x18]; \
+	ldd	[src + offset + 0x10], t2; \
+	std	t2, [dst + offset + 0x10]; \
+	ldd	[src + offset + 0x08], t0; \
+	std	t0, [dst + offset + 0x08]; \
+	ldd	[src + offset + 0x00], t2; \
+	std	t2, [dst + offset + 0x00];
+
+tsunami_copy_1page:
+/* NOTE: This routine has to be shorter than 70insns --jj */
+	or	%g0, (PAGE_SIZE >> 8), %g1
+1:
+	MIRROR_BLOCK(%o0, %o1, 0x00, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0x20, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0x40, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0x60, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0x80, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0xa0, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0xc0, %o2, %o3, %o4, %o5)
+	MIRROR_BLOCK(%o0, %o1, 0xe0, %o2, %o3, %o4, %o5)
+	subcc	%g1, 1, %g1
+	add	%o0, 0x100, %o0
+	bne	1b
+	 add	%o1, 0x100, %o1
+
+	.globl	tsunami_setup_blockops
+tsunami_setup_blockops:
+	sethi	%hi(__copy_1page), %o0
+	or	%o0, %lo(__copy_1page), %o0
+	sethi	%hi(tsunami_copy_1page), %o1
+	or	%o1, %lo(tsunami_copy_1page), %o1
+	sethi	%hi(tsunami_setup_blockops), %o2
+	or	%o2, %lo(tsunami_setup_blockops), %o2
+	ld	[%o1], %o4
+1:	add	%o1, 4, %o1
+	st	%o4, [%o0]
+	add	%o0, 4, %o0
+	cmp	%o1, %o2
+	bne	1b
+	ld	[%o1], %o4
+	sta	%g0, [%g0] ASI_M_IC_FLCLEAR
+	sta	%g0, [%g0] ASI_M_DC_FLCLEAR
+	retl
+	 nop
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
new file mode 100644
index 0000000..d245f89
--- /dev/null
+++ b/arch/sparc/mm/ultra.S
@@ -0,0 +1,1102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ultra.S: Don't expand these all over the place...
+ *
+ * Copyright (C) 1997, 2000, 2008 David S. Miller (davem@davemloft.net)
+ */
+
+#include <asm/asi.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/spitfire.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <asm/pil.h>
+#include <asm/head.h>
+#include <asm/thread_info.h>
+#include <asm/cacheflush.h>
+#include <asm/hypervisor.h>
+#include <asm/cpudata.h>
+
+	/* Basically, most of the Spitfire vs. Cheetah madness
+	 * has to do with the fact that Cheetah does not support
+	 * IMMU flushes out of the secondary context.  Someone needs
+	 * to throw a south lake birthday party for the folks
+	 * in Microelectronics who refused to fix this shit.
+	 */
+
+	/* This file is meant to be read efficiently by the CPU, not humans.
+	 * Staraj sie tego nikomu nie pierdolnac...
+	 */
+	.text
+	.align		32
+	.globl		__flush_tlb_mm
+__flush_tlb_mm:		/* 19 insns */
+	/* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
+	ldxa		[%o1] ASI_DMMU, %g2
+	cmp		%g2, %o0
+	bne,pn		%icc, __spitfire_flush_tlb_mm_slow
+	 mov		0x50, %g3
+	stxa		%g0, [%g3] ASI_DMMU_DEMAP
+	stxa		%g0, [%g3] ASI_IMMU_DEMAP
+	sethi		%hi(KERNBASE), %g3
+	flush		%g3
+	retl
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	.align		32
+	.globl		__flush_tlb_page
+__flush_tlb_page:	/* 22 insns */
+	/* %o0 = context, %o1 = vaddr */
+	rdpr		%pstate, %g7
+	andn		%g7, PSTATE_IE, %g2
+	wrpr		%g2, %pstate
+	mov		SECONDARY_CONTEXT, %o4
+	ldxa		[%o4] ASI_DMMU, %g2
+	stxa		%o0, [%o4] ASI_DMMU
+	andcc		%o1, 1, %g0
+	andn		%o1, 1, %o3
+	be,pn		%icc, 1f
+	 or		%o3, 0x10, %o3
+	stxa		%g0, [%o3] ASI_IMMU_DEMAP
+1:	stxa		%g0, [%o3] ASI_DMMU_DEMAP
+	membar		#Sync
+	stxa		%g2, [%o4] ASI_DMMU
+	sethi		%hi(KERNBASE), %o4
+	flush		%o4
+	retl
+	 wrpr		%g7, 0x0, %pstate
+	nop
+	nop
+	nop
+	nop
+
+	.align		32
+	.globl		__flush_tlb_pending
+__flush_tlb_pending:	/* 27 insns */
+	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+	rdpr		%pstate, %g7
+	sllx		%o1, 3, %o1
+	andn		%g7, PSTATE_IE, %g2
+	wrpr		%g2, %pstate
+	mov		SECONDARY_CONTEXT, %o4
+	ldxa		[%o4] ASI_DMMU, %g2
+	stxa		%o0, [%o4] ASI_DMMU
+1:	sub		%o1, (1 << 3), %o1
+	ldx		[%o2 + %o1], %o3
+	andcc		%o3, 1, %g0
+	andn		%o3, 1, %o3
+	be,pn		%icc, 2f
+	 or		%o3, 0x10, %o3
+	stxa		%g0, [%o3] ASI_IMMU_DEMAP
+2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP
+	membar		#Sync
+	brnz,pt		%o1, 1b
+	 nop
+	stxa		%g2, [%o4] ASI_DMMU
+	sethi		%hi(KERNBASE), %o4
+	flush		%o4
+	retl
+	 wrpr		%g7, 0x0, %pstate
+	nop
+	nop
+	nop
+	nop
+
+	.align		32
+	.globl		__flush_tlb_kernel_range
+__flush_tlb_kernel_range:	/* 31 insns */
+	/* %o0=start, %o1=end */
+	cmp		%o0, %o1
+	be,pn		%xcc, 2f
+	 sub		%o1, %o0, %o3
+	srlx		%o3, 18, %o4
+	brnz,pn		%o4, __spitfire_flush_tlb_kernel_range_slow
+	 sethi		%hi(PAGE_SIZE), %o4
+	sub		%o3, %o4, %o3
+	or		%o0, 0x20, %o0		! Nucleus
+1:	stxa		%g0, [%o0 + %o3] ASI_DMMU_DEMAP
+	stxa		%g0, [%o0 + %o3] ASI_IMMU_DEMAP
+	membar		#Sync
+	brnz,pt		%o3, 1b
+	 sub		%o3, %o4, %o3
+2:	sethi		%hi(KERNBASE), %o3
+	flush		%o3
+	retl
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+__spitfire_flush_tlb_kernel_range_slow:
+	mov		63 * 8, %o4
+1:	ldxa		[%o4] ASI_ITLB_DATA_ACCESS, %o3
+	andcc		%o3, 0x40, %g0			/* _PAGE_L_4U */
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %o3
+	stxa		%g0, [%o3] ASI_IMMU
+	stxa		%g0, [%o4] ASI_ITLB_DATA_ACCESS
+	membar		#Sync
+2:	ldxa		[%o4] ASI_DTLB_DATA_ACCESS, %o3
+	andcc		%o3, 0x40, %g0
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %o3
+	stxa		%g0, [%o3] ASI_DMMU
+	stxa		%g0, [%o4] ASI_DTLB_DATA_ACCESS
+	membar		#Sync
+2:	sub		%o4, 8, %o4
+	brgez,pt	%o4, 1b
+	 nop
+	retl
+	 nop
+
+__spitfire_flush_tlb_mm_slow:
+	rdpr		%pstate, %g1
+	wrpr		%g1, PSTATE_IE, %pstate
+	stxa		%o0, [%o1] ASI_DMMU
+	stxa		%g0, [%g3] ASI_DMMU_DEMAP
+	stxa		%g0, [%g3] ASI_IMMU_DEMAP
+	flush		%g6
+	stxa		%g2, [%o1] ASI_DMMU
+	sethi		%hi(KERNBASE), %o1
+	flush		%o1
+	retl
+	 wrpr		%g1, 0, %pstate
+
+/*
+ * The following code flushes one page_size worth.
+ */
+	.section .kprobes.text, "ax"
+	.align		32
+	.globl		__flush_icache_page
+__flush_icache_page:	/* %o0 = phys_page */
+	srlx		%o0, PAGE_SHIFT, %o0
+	sethi		%hi(PAGE_OFFSET), %g1
+	sllx		%o0, PAGE_SHIFT, %o0
+	sethi		%hi(PAGE_SIZE), %g2
+	ldx		[%g1 + %lo(PAGE_OFFSET)], %g1
+	add		%o0, %g1, %o0
+1:	subcc		%g2, 32, %g2
+	bne,pt		%icc, 1b
+	 flush		%o0 + %g2
+	retl
+	 nop
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+
+#if (PAGE_SHIFT != 13)
+#error only page shift of 13 is supported by dcache flush
+#endif
+
+#define DTAG_MASK 0x3
+
+	/* This routine is Spitfire specific so the hardcoded
+	 * D-cache size and line-size are OK.
+	 */
+	.align		64
+	.globl		__flush_dcache_page
+__flush_dcache_page:	/* %o0=kaddr, %o1=flush_icache */
+	sethi		%hi(PAGE_OFFSET), %g1
+	ldx		[%g1 + %lo(PAGE_OFFSET)], %g1
+	sub		%o0, %g1, %o0			! physical address
+	srlx		%o0, 11, %o0			! make D-cache TAG
+	sethi		%hi(1 << 14), %o2		! D-cache size
+	sub		%o2, (1 << 5), %o2		! D-cache line size
+1:	ldxa		[%o2] ASI_DCACHE_TAG, %o3	! load D-cache TAG
+	andcc		%o3, DTAG_MASK, %g0		! Valid?
+	be,pn		%xcc, 2f			! Nope, branch
+	 andn		%o3, DTAG_MASK, %o3		! Clear valid bits
+	cmp		%o3, %o0			! TAG match?
+	bne,pt		%xcc, 2f			! Nope, branch
+	 nop
+	stxa		%g0, [%o2] ASI_DCACHE_TAG	! Invalidate TAG
+	membar		#Sync
+2:	brnz,pt		%o2, 1b
+	 sub		%o2, (1 << 5), %o2		! D-cache line size
+
+	/* The I-cache does not snoop local stores so we
+	 * better flush that too when necessary.
+	 */
+	brnz,pt		%o1, __flush_icache_page
+	 sllx		%o0, 11, %o0
+	retl
+	 nop
+
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+	.previous
+
+	/* Cheetah specific versions, patched at boot time. */
+__cheetah_flush_tlb_mm: /* 19 insns */
+	rdpr		%pstate, %g7
+	andn		%g7, PSTATE_IE, %g2
+	wrpr		%g2, 0x0, %pstate
+	wrpr		%g0, 1, %tl
+	mov		PRIMARY_CONTEXT, %o2
+	mov		0x40, %g3
+	ldxa		[%o2] ASI_DMMU, %g2
+	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %o1
+	sllx		%o1, CTX_PGSZ1_NUC_SHIFT, %o1
+	or		%o0, %o1, %o0	/* Preserve nucleus page size fields */
+	stxa		%o0, [%o2] ASI_DMMU
+	stxa		%g0, [%g3] ASI_DMMU_DEMAP
+	stxa		%g0, [%g3] ASI_IMMU_DEMAP
+	stxa		%g2, [%o2] ASI_DMMU
+	sethi		%hi(KERNBASE), %o2
+	flush		%o2
+	wrpr		%g0, 0, %tl
+	retl
+	 wrpr		%g7, 0x0, %pstate
+
+__cheetah_flush_tlb_page:	/* 22 insns */
+	/* %o0 = context, %o1 = vaddr */
+	rdpr		%pstate, %g7
+	andn		%g7, PSTATE_IE, %g2
+	wrpr		%g2, 0x0, %pstate
+	wrpr		%g0, 1, %tl
+	mov		PRIMARY_CONTEXT, %o4
+	ldxa		[%o4] ASI_DMMU, %g2
+	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %o3
+	sllx		%o3, CTX_PGSZ1_NUC_SHIFT, %o3
+	or		%o0, %o3, %o0	/* Preserve nucleus page size fields */
+	stxa		%o0, [%o4] ASI_DMMU
+	andcc		%o1, 1, %g0
+	be,pn		%icc, 1f
+	 andn		%o1, 1, %o3
+	stxa		%g0, [%o3] ASI_IMMU_DEMAP
+1:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
+	membar		#Sync
+	stxa		%g2, [%o4] ASI_DMMU
+	sethi		%hi(KERNBASE), %o4
+	flush		%o4
+	wrpr		%g0, 0, %tl
+	retl
+	 wrpr		%g7, 0x0, %pstate
+
+__cheetah_flush_tlb_pending:	/* 27 insns */
+	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+	rdpr		%pstate, %g7
+	sllx		%o1, 3, %o1
+	andn		%g7, PSTATE_IE, %g2
+	wrpr		%g2, 0x0, %pstate
+	wrpr		%g0, 1, %tl
+	mov		PRIMARY_CONTEXT, %o4
+	ldxa		[%o4] ASI_DMMU, %g2
+	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %o3
+	sllx		%o3, CTX_PGSZ1_NUC_SHIFT, %o3
+	or		%o0, %o3, %o0	/* Preserve nucleus page size fields */
+	stxa		%o0, [%o4] ASI_DMMU
+1:	sub		%o1, (1 << 3), %o1
+	ldx		[%o2 + %o1], %o3
+	andcc		%o3, 1, %g0
+	be,pn		%icc, 2f
+	 andn		%o3, 1, %o3
+	stxa		%g0, [%o3] ASI_IMMU_DEMAP
+2:	stxa		%g0, [%o3] ASI_DMMU_DEMAP	
+	membar		#Sync
+	brnz,pt		%o1, 1b
+	 nop
+	stxa		%g2, [%o4] ASI_DMMU
+	sethi		%hi(KERNBASE), %o4
+	flush		%o4
+	wrpr		%g0, 0, %tl
+	retl
+	 wrpr		%g7, 0x0, %pstate
+
+__cheetah_flush_tlb_kernel_range:	/* 31 insns */
+	/* %o0=start, %o1=end */
+	cmp		%o0, %o1
+	be,pn		%xcc, 2f
+	 sub		%o1, %o0, %o3
+	srlx		%o3, 18, %o4
+	brnz,pn		%o4, 3f
+	 sethi		%hi(PAGE_SIZE), %o4
+	sub		%o3, %o4, %o3
+	or		%o0, 0x20, %o0		! Nucleus
+1:	stxa		%g0, [%o0 + %o3] ASI_DMMU_DEMAP
+	stxa		%g0, [%o0 + %o3] ASI_IMMU_DEMAP
+	membar		#Sync
+	brnz,pt		%o3, 1b
+	 sub		%o3, %o4, %o3
+2:	sethi		%hi(KERNBASE), %o3
+	flush		%o3
+	retl
+	 nop
+3:	mov		0x80, %o4
+	stxa		%g0, [%o4] ASI_DMMU_DEMAP
+	membar		#Sync
+	stxa		%g0, [%o4] ASI_IMMU_DEMAP
+	membar		#Sync
+	retl
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+__cheetah_flush_dcache_page: /* 11 insns */
+	sethi		%hi(PAGE_OFFSET), %g1
+	ldx		[%g1 + %lo(PAGE_OFFSET)], %g1
+	sub		%o0, %g1, %o0
+	sethi		%hi(PAGE_SIZE), %o4
+1:	subcc		%o4, (1 << 5), %o4
+	stxa		%g0, [%o0 + %o4] ASI_DCACHE_INVALIDATE
+	membar		#Sync
+	bne,pt		%icc, 1b
+	 nop
+	retl		/* I-cache flush never needed on Cheetah, see callers. */
+	 nop
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+	/* Hypervisor specific versions, patched at boot time.  */
+__hypervisor_tlb_tl0_error:
+	save		%sp, -192, %sp
+	mov		%i0, %o0
+	call		hypervisor_tlbop_error
+	 mov		%i1, %o1
+	ret
+	 restore
+
+__hypervisor_flush_tlb_mm: /* 19 insns */
+	mov		%o0, %o2	/* ARG2: mmu context */
+	mov		0, %o0		/* ARG0: CPU lists unimplemented */
+	mov		0, %o1		/* ARG1: CPU lists unimplemented */
+	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
+	mov		HV_FAST_MMU_DEMAP_CTX, %o5
+	ta		HV_FAST_TRAP
+	brnz,pn		%o0, 1f
+	 mov		HV_FAST_MMU_DEMAP_CTX, %o1
+	retl
+	 nop
+1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o5
+	jmpl		%o5 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+__hypervisor_flush_tlb_page: /* 22 insns */
+	/* %o0 = context, %o1 = vaddr */
+	mov		%o0, %g2
+	mov		%o1, %o0              /* ARG0: vaddr + IMMU-bit */
+	mov		%g2, %o1	      /* ARG1: mmu context */
+	mov		HV_MMU_ALL, %o2	      /* ARG2: flags */
+	srlx		%o0, PAGE_SHIFT, %o0
+	sllx		%o0, PAGE_SHIFT, %o0
+	ta		HV_MMU_UNMAP_ADDR_TRAP
+	brnz,pn		%o0, 1f
+	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
+	retl
+	 nop
+1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
+	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+__hypervisor_flush_tlb_pending: /* 27 insns */
+	/* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
+	sllx		%o1, 3, %g1
+	mov		%o2, %g2
+	mov		%o0, %g3
+1:	sub		%g1, (1 << 3), %g1
+	ldx		[%g2 + %g1], %o0      /* ARG0: vaddr + IMMU-bit */
+	mov		%g3, %o1	      /* ARG1: mmu context */
+	mov		HV_MMU_ALL, %o2	      /* ARG2: flags */
+	srlx		%o0, PAGE_SHIFT, %o0
+	sllx		%o0, PAGE_SHIFT, %o0
+	ta		HV_MMU_UNMAP_ADDR_TRAP
+	brnz,pn		%o0, 1f
+	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
+	brnz,pt		%g1, 1b
+	 nop
+	retl
+	 nop
+1:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
+	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+__hypervisor_flush_tlb_kernel_range: /* 31 insns */
+	/* %o0=start, %o1=end */
+	cmp		%o0, %o1
+	be,pn		%xcc, 2f
+	 sub		%o1, %o0, %g2
+	srlx		%g2, 18, %g3
+	brnz,pn		%g3, 4f
+	 mov		%o0, %g1
+	sethi		%hi(PAGE_SIZE), %g3
+	sub		%g2, %g3, %g2
+1:	add		%g1, %g2, %o0	/* ARG0: virtual address */
+	mov		0, %o1		/* ARG1: mmu context */
+	mov		HV_MMU_ALL, %o2	/* ARG2: flags */
+	ta		HV_MMU_UNMAP_ADDR_TRAP
+	brnz,pn		%o0, 3f
+	 mov		HV_MMU_UNMAP_ADDR_TRAP, %o1
+	brnz,pt		%g2, 1b
+	 sub		%g2, %g3, %g2
+2:	retl
+	 nop
+3:	sethi		%hi(__hypervisor_tlb_tl0_error), %o2
+	jmpl		%o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+	 nop
+4:	mov		0, %o0		/* ARG0: CPU lists unimplemented */
+	mov		0, %o1		/* ARG1: CPU lists unimplemented */
+	mov		0, %o2		/* ARG2: mmu context == nucleus */
+	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
+	mov		HV_FAST_MMU_DEMAP_CTX, %o5
+	ta		HV_FAST_TRAP
+	brnz,pn		%o0, 3b
+	 mov		HV_FAST_MMU_DEMAP_CTX, %o1
+	retl
+	 nop
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	/* XXX Niagara and friends have an 8K cache, so no aliasing is
+	 * XXX possible, but nothing explicit in the Hypervisor API
+	 * XXX guarantees this.
+	 */
+__hypervisor_flush_dcache_page:	/* 2 insns */
+	retl
+	 nop
+#endif
+
+tlb_patch_one:
+1:	lduw		[%o1], %g1
+	stw		%g1, [%o0]
+	flush		%o0
+	subcc		%o2, 1, %o2
+	add		%o1, 4, %o1
+	bne,pt		%icc, 1b
+	 add		%o0, 4, %o0
+	retl
+	 nop
+
+#ifdef CONFIG_SMP
+	/* These are all called by the slaves of a cross call, at
+	 * trap level 1, with interrupts fully disabled.
+	 *
+	 * Register usage:
+	 *   %g5	mm->context	(all tlb flushes)
+	 *   %g1	address arg 1	(tlb page and range flushes)
+	 *   %g7	address arg 2	(tlb range flush only)
+	 *
+	 *   %g6	scratch 1
+	 *   %g2	scratch 2
+	 *   %g3	scratch 3
+	 *   %g4	scratch 4
+	 */
+	.align		32
+	.globl		xcall_flush_tlb_mm
+xcall_flush_tlb_mm:	/* 24 insns */
+	mov		PRIMARY_CONTEXT, %g2
+	ldxa		[%g2] ASI_DMMU, %g3
+	srlx		%g3, CTX_PGSZ1_NUC_SHIFT, %g4
+	sllx		%g4, CTX_PGSZ1_NUC_SHIFT, %g4
+	or		%g5, %g4, %g5	/* Preserve nucleus page size fields */
+	stxa		%g5, [%g2] ASI_DMMU
+	mov		0x40, %g4
+	stxa		%g0, [%g4] ASI_DMMU_DEMAP
+	stxa		%g0, [%g4] ASI_IMMU_DEMAP
+	stxa		%g3, [%g2] ASI_DMMU
+	retry
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	.globl		xcall_flush_tlb_page
+xcall_flush_tlb_page:	/* 20 insns */
+	/* %g5=context, %g1=vaddr */
+	mov		PRIMARY_CONTEXT, %g4
+	ldxa		[%g4] ASI_DMMU, %g2
+	srlx		%g2, CTX_PGSZ1_NUC_SHIFT, %g4
+	sllx		%g4, CTX_PGSZ1_NUC_SHIFT, %g4
+	or		%g5, %g4, %g5
+	mov		PRIMARY_CONTEXT, %g4
+	stxa		%g5, [%g4] ASI_DMMU
+	andcc		%g1, 0x1, %g0
+	be,pn		%icc, 2f
+	 andn		%g1, 0x1, %g5
+	stxa		%g0, [%g5] ASI_IMMU_DEMAP
+2:	stxa		%g0, [%g5] ASI_DMMU_DEMAP
+	membar		#Sync
+	stxa		%g2, [%g4] ASI_DMMU
+	retry
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	.globl		xcall_flush_tlb_kernel_range
+xcall_flush_tlb_kernel_range:	/* 44 insns */
+	sethi		%hi(PAGE_SIZE - 1), %g2
+	or		%g2, %lo(PAGE_SIZE - 1), %g2
+	andn		%g1, %g2, %g1
+	andn		%g7, %g2, %g7
+	sub		%g7, %g1, %g3
+	srlx		%g3, 18, %g2
+	brnz,pn		%g2, 2f
+	 add		%g2, 1, %g2
+	sub		%g3, %g2, %g3
+	or		%g1, 0x20, %g1		! Nucleus
+1:	stxa		%g0, [%g1 + %g3] ASI_DMMU_DEMAP
+	stxa		%g0, [%g1 + %g3] ASI_IMMU_DEMAP
+	membar		#Sync
+	brnz,pt		%g3, 1b
+	 sub		%g3, %g2, %g3
+	retry
+2:	mov		63 * 8, %g1
+1:	ldxa		[%g1] ASI_ITLB_DATA_ACCESS, %g2
+	andcc		%g2, 0x40, %g0			/* _PAGE_L_4U */
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %g2
+	stxa		%g0, [%g2] ASI_IMMU
+	stxa		%g0, [%g1] ASI_ITLB_DATA_ACCESS
+	membar		#Sync
+2:	ldxa		[%g1] ASI_DTLB_DATA_ACCESS, %g2
+	andcc		%g2, 0x40, %g0
+	bne,pn		%xcc, 2f
+	 mov		TLB_TAG_ACCESS, %g2
+	stxa		%g0, [%g2] ASI_DMMU
+	stxa		%g0, [%g1] ASI_DTLB_DATA_ACCESS
+	membar		#Sync
+2:	sub		%g1, 8, %g1
+	brgez,pt	%g1, 1b
+	 nop
+	retry
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/* This runs in a very controlled environment, so we do
+	 * not need to worry about BH races etc.
+	 */
+	.globl		xcall_sync_tick
+xcall_sync_tick:
+
+661:	rdpr		%pstate, %g2
+	wrpr		%g2, PSTATE_IG | PSTATE_AG, %pstate
+	.section	.sun4v_2insn_patch, "ax"
+	.word		661b
+	nop
+	nop
+	.previous
+
+	rdpr		%pil, %g2
+	wrpr		%g0, PIL_NORMAL_MAX, %pil
+	sethi		%hi(109f), %g7
+	b,pt		%xcc, etrap_irq
+109:	 or		%g7, %lo(109b), %g7
+#ifdef CONFIG_TRACE_IRQFLAGS
+	call		trace_hardirqs_off
+	 nop
+#endif
+	call		smp_synchronize_tick_client
+	 nop
+	b		rtrap_xcall
+	 ldx		[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
+
+	.globl		xcall_fetch_glob_regs
+xcall_fetch_glob_regs:
+	sethi		%hi(global_cpu_snapshot), %g1
+	or		%g1, %lo(global_cpu_snapshot), %g1
+	__GET_CPUID(%g2)
+	sllx		%g2, 6, %g3
+	add		%g1, %g3, %g1
+	rdpr		%tstate, %g7
+	stx		%g7, [%g1 + GR_SNAP_TSTATE]
+	rdpr		%tpc, %g7
+	stx		%g7, [%g1 + GR_SNAP_TPC]
+	rdpr		%tnpc, %g7
+	stx		%g7, [%g1 + GR_SNAP_TNPC]
+	stx		%o7, [%g1 + GR_SNAP_O7]
+	stx		%i7, [%g1 + GR_SNAP_I7]
+	/* Don't try this at home kids... */
+	rdpr		%cwp, %g3
+	sub		%g3, 1, %g7
+	wrpr		%g7, %cwp
+	mov		%i7, %g7
+	wrpr		%g3, %cwp
+	stx		%g7, [%g1 + GR_SNAP_RPC]
+	sethi		%hi(trap_block), %g7
+	or		%g7, %lo(trap_block), %g7
+	sllx		%g2, TRAP_BLOCK_SZ_SHIFT, %g2
+	add		%g7, %g2, %g7
+	ldx		[%g7 + TRAP_PER_CPU_THREAD], %g3
+	stx		%g3, [%g1 + GR_SNAP_THREAD]
+	retry
+
+	.globl		xcall_fetch_glob_pmu
+xcall_fetch_glob_pmu:
+	sethi		%hi(global_cpu_snapshot), %g1
+	or		%g1, %lo(global_cpu_snapshot), %g1
+	__GET_CPUID(%g2)
+	sllx		%g2, 6, %g3
+	add		%g1, %g3, %g1
+	rd		%pic, %g7
+	stx		%g7, [%g1 + (4 * 8)]
+	rd		%pcr, %g7
+	stx		%g7, [%g1 + (0 * 8)]
+	retry
+
+	.globl		xcall_fetch_glob_pmu_n4
+xcall_fetch_glob_pmu_n4:
+	sethi		%hi(global_cpu_snapshot), %g1
+	or		%g1, %lo(global_cpu_snapshot), %g1
+	__GET_CPUID(%g2)
+	sllx		%g2, 6, %g3
+	add		%g1, %g3, %g1
+
+	ldxa		[%g0] ASI_PIC, %g7
+	stx		%g7, [%g1 + (4 * 8)]
+	mov		0x08, %g3
+	ldxa		[%g3] ASI_PIC, %g7
+	stx		%g7, [%g1 + (5 * 8)]
+	mov		0x10, %g3
+	ldxa		[%g3] ASI_PIC, %g7
+	stx		%g7, [%g1 + (6 * 8)]
+	mov		0x18, %g3
+	ldxa		[%g3] ASI_PIC, %g7
+	stx		%g7, [%g1 + (7 * 8)]
+
+	mov		%o0, %g2
+	mov		%o1, %g3
+	mov		%o5, %g7
+
+	mov		HV_FAST_VT_GET_PERFREG, %o5
+	mov		3, %o0
+	ta		HV_FAST_TRAP
+	stx		%o1, [%g1 + (3 * 8)]
+	mov		HV_FAST_VT_GET_PERFREG, %o5
+	mov		2, %o0
+	ta		HV_FAST_TRAP
+	stx		%o1, [%g1 + (2 * 8)]
+	mov		HV_FAST_VT_GET_PERFREG, %o5
+	mov		1, %o0
+	ta		HV_FAST_TRAP
+	stx		%o1, [%g1 + (1 * 8)]
+	mov		HV_FAST_VT_GET_PERFREG, %o5
+	mov		0, %o0
+	ta		HV_FAST_TRAP
+	stx		%o1, [%g1 + (0 * 8)]
+
+	mov		%g2, %o0
+	mov		%g3, %o1
+	mov		%g7, %o5
+
+	retry
+
+__cheetah_xcall_flush_tlb_kernel_range:	/* 44 insns */
+	sethi		%hi(PAGE_SIZE - 1), %g2
+	or		%g2, %lo(PAGE_SIZE - 1), %g2
+	andn		%g1, %g2, %g1
+	andn		%g7, %g2, %g7
+	sub		%g7, %g1, %g3
+	srlx		%g3, 18, %g2
+	brnz,pn		%g2, 2f
+	 add		%g2, 1, %g2
+	sub		%g3, %g2, %g3
+	or		%g1, 0x20, %g1		! Nucleus
+1:	stxa		%g0, [%g1 + %g3] ASI_DMMU_DEMAP
+	stxa		%g0, [%g1 + %g3] ASI_IMMU_DEMAP
+	membar		#Sync
+	brnz,pt		%g3, 1b
+	 sub		%g3, %g2, %g3
+	retry
+2:	mov		0x80, %g2
+	stxa		%g0, [%g2] ASI_DMMU_DEMAP
+	membar		#Sync
+	stxa		%g0, [%g2] ASI_IMMU_DEMAP
+	membar		#Sync
+	retry
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	.align		32
+	.globl		xcall_flush_dcache_page_cheetah
+xcall_flush_dcache_page_cheetah: /* %g1 == physical page address */
+	sethi		%hi(PAGE_SIZE), %g3
+1:	subcc		%g3, (1 << 5), %g3
+	stxa		%g0, [%g1 + %g3] ASI_DCACHE_INVALIDATE
+	membar		#Sync
+	bne,pt		%icc, 1b
+	 nop
+	retry
+	nop
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+	.globl		xcall_flush_dcache_page_spitfire
+xcall_flush_dcache_page_spitfire: /* %g1 == physical page address
+				     %g7 == kernel page virtual address
+				     %g5 == (page->mapping != NULL)  */
+#ifdef DCACHE_ALIASING_POSSIBLE
+	srlx		%g1, (13 - 2), %g1	! Form tag comparitor
+	sethi		%hi(L1DCACHE_SIZE), %g3	! D$ size == 16K
+	sub		%g3, (1 << 5), %g3	! D$ linesize == 32
+1:	ldxa		[%g3] ASI_DCACHE_TAG, %g2
+	andcc		%g2, 0x3, %g0
+	be,pn		%xcc, 2f
+	 andn		%g2, 0x3, %g2
+	cmp		%g2, %g1
+
+	bne,pt		%xcc, 2f
+	 nop
+	stxa		%g0, [%g3] ASI_DCACHE_TAG
+	membar		#Sync
+2:	cmp		%g3, 0
+	bne,pt		%xcc, 1b
+	 sub		%g3, (1 << 5), %g3
+
+	brz,pn		%g5, 2f
+#endif /* DCACHE_ALIASING_POSSIBLE */
+	 sethi		%hi(PAGE_SIZE), %g3
+
+1:	flush		%g7
+	subcc		%g3, (1 << 5), %g3
+	bne,pt		%icc, 1b
+	 add		%g7, (1 << 5), %g7
+
+2:	retry
+	nop
+	nop
+
+	/* %g5:	error
+	 * %g6:	tlb op
+	 */
+__hypervisor_tlb_xcall_error:
+	mov	%g5, %g4
+	mov	%g6, %g5
+	ba,pt	%xcc, etrap
+	 rd	%pc, %g7
+	mov	%l4, %o0
+	call	hypervisor_tlbop_error_xcall
+	 mov	%l5, %o1
+	ba,a,pt	%xcc, rtrap
+
+	.globl		__hypervisor_xcall_flush_tlb_mm
+__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
+	/* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
+	mov		%o0, %g2
+	mov		%o1, %g3
+	mov		%o2, %g4
+	mov		%o3, %g1
+	mov		%o5, %g7
+	clr		%o0		/* ARG0: CPU lists unimplemented */
+	clr		%o1		/* ARG1: CPU lists unimplemented */
+	mov		%g5, %o2	/* ARG2: mmu context */
+	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
+	mov		HV_FAST_MMU_DEMAP_CTX, %o5
+	ta		HV_FAST_TRAP
+	mov		HV_FAST_MMU_DEMAP_CTX, %g6
+	brnz,pn		%o0, 1f
+	 mov		%o0, %g5
+	mov		%g2, %o0
+	mov		%g3, %o1
+	mov		%g4, %o2
+	mov		%g1, %o3
+	mov		%g7, %o5
+	membar		#Sync
+	retry
+1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
+	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+	 nop
+
+	.globl		__hypervisor_xcall_flush_tlb_page
+__hypervisor_xcall_flush_tlb_page: /* 20 insns */
+	/* %g5=ctx, %g1=vaddr */
+	mov		%o0, %g2
+	mov		%o1, %g3
+	mov		%o2, %g4
+	mov		%g1, %o0	        /* ARG0: virtual address */
+	mov		%g5, %o1		/* ARG1: mmu context */
+	mov		HV_MMU_ALL, %o2		/* ARG2: flags */
+	srlx		%o0, PAGE_SHIFT, %o0
+	sllx		%o0, PAGE_SHIFT, %o0
+	ta		HV_MMU_UNMAP_ADDR_TRAP
+	mov		HV_MMU_UNMAP_ADDR_TRAP, %g6
+	brnz,a,pn	%o0, 1f
+	 mov		%o0, %g5
+	mov		%g2, %o0
+	mov		%g3, %o1
+	mov		%g4, %o2
+	membar		#Sync
+	retry
+1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
+	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+	 nop
+
+	.globl		__hypervisor_xcall_flush_tlb_kernel_range
+__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
+	/* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
+	sethi		%hi(PAGE_SIZE - 1), %g2
+	or		%g2, %lo(PAGE_SIZE - 1), %g2
+	andn		%g1, %g2, %g1
+	andn		%g7, %g2, %g7
+	sub		%g7, %g1, %g3
+	srlx		%g3, 18, %g7
+	add		%g2, 1, %g2
+	sub		%g3, %g2, %g3
+	mov		%o0, %g2
+	mov		%o1, %g4
+	brnz,pn		%g7, 2f
+	 mov		%o2, %g7
+1:	add		%g1, %g3, %o0	/* ARG0: virtual address */
+	mov		0, %o1		/* ARG1: mmu context */
+	mov		HV_MMU_ALL, %o2	/* ARG2: flags */
+	ta		HV_MMU_UNMAP_ADDR_TRAP
+	mov		HV_MMU_UNMAP_ADDR_TRAP, %g6
+	brnz,pn		%o0, 1f
+	 mov		%o0, %g5
+	sethi		%hi(PAGE_SIZE), %o2
+	brnz,pt		%g3, 1b
+	 sub		%g3, %o2, %g3
+5:	mov		%g2, %o0
+	mov		%g4, %o1
+	mov		%g7, %o2
+	membar		#Sync
+	retry
+1:	sethi		%hi(__hypervisor_tlb_xcall_error), %g4
+	jmpl		%g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+	 nop
+2:	mov		%o3, %g1
+	mov		%o5, %g3
+	mov		0, %o0		/* ARG0: CPU lists unimplemented */
+	mov		0, %o1		/* ARG1: CPU lists unimplemented */
+	mov		0, %o2		/* ARG2: mmu context == nucleus */
+	mov		HV_MMU_ALL, %o3	/* ARG3: flags */
+	mov		HV_FAST_MMU_DEMAP_CTX, %o5
+	ta		HV_FAST_TRAP
+	mov		%g1, %o3
+	brz,pt		%o0, 5b
+	 mov		%g3, %o5
+	mov		HV_FAST_MMU_DEMAP_CTX, %g6
+	ba,pt		%xcc, 1b
+	 clr		%g5
+
+	/* These just get rescheduled to PIL vectors. */
+	.globl		xcall_call_function
+xcall_call_function:
+	wr		%g0, (1 << PIL_SMP_CALL_FUNC), %set_softint
+	retry
+
+	.globl		xcall_call_function_single
+xcall_call_function_single:
+	wr		%g0, (1 << PIL_SMP_CALL_FUNC_SNGL), %set_softint
+	retry
+
+	.globl		xcall_receive_signal
+xcall_receive_signal:
+	wr		%g0, (1 << PIL_SMP_RECEIVE_SIGNAL), %set_softint
+	retry
+
+	.globl		xcall_capture
+xcall_capture:
+	wr		%g0, (1 << PIL_SMP_CAPTURE), %set_softint
+	retry
+
+#ifdef CONFIG_KGDB
+	.globl		xcall_kgdb_capture
+xcall_kgdb_capture:
+	wr		%g0, (1 << PIL_KGDB_CAPTURE), %set_softint
+	retry
+#endif
+
+#endif /* CONFIG_SMP */
+
+	.globl		cheetah_patch_cachetlbops
+cheetah_patch_cachetlbops:
+	save		%sp, -128, %sp
+
+	sethi		%hi(__flush_tlb_mm), %o0
+	or		%o0, %lo(__flush_tlb_mm), %o0
+	sethi		%hi(__cheetah_flush_tlb_mm), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_mm), %o1
+	call		tlb_patch_one
+	 mov		19, %o2
+
+	sethi		%hi(__flush_tlb_page), %o0
+	or		%o0, %lo(__flush_tlb_page), %o0
+	sethi		%hi(__cheetah_flush_tlb_page), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_page), %o1
+	call		tlb_patch_one
+	 mov		22, %o2
+
+	sethi		%hi(__flush_tlb_pending), %o0
+	or		%o0, %lo(__flush_tlb_pending), %o0
+	sethi		%hi(__cheetah_flush_tlb_pending), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_pending), %o1
+	call		tlb_patch_one
+	 mov		27, %o2
+
+	sethi		%hi(__flush_tlb_kernel_range), %o0
+	or		%o0, %lo(__flush_tlb_kernel_range), %o0
+	sethi		%hi(__cheetah_flush_tlb_kernel_range), %o1
+	or		%o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
+	call		tlb_patch_one
+	 mov		31, %o2
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	sethi		%hi(__flush_dcache_page), %o0
+	or		%o0, %lo(__flush_dcache_page), %o0
+	sethi		%hi(__cheetah_flush_dcache_page), %o1
+	or		%o1, %lo(__cheetah_flush_dcache_page), %o1
+	call		tlb_patch_one
+	 mov		11, %o2
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+#ifdef CONFIG_SMP
+	sethi		%hi(xcall_flush_tlb_kernel_range), %o0
+	or		%o0, %lo(xcall_flush_tlb_kernel_range), %o0
+	sethi		%hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
+	or		%o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
+	call		tlb_patch_one
+	 mov		44, %o2
+#endif /* CONFIG_SMP */
+
+	ret
+	 restore
+
+	.globl		hypervisor_patch_cachetlbops
+hypervisor_patch_cachetlbops:
+	save		%sp, -128, %sp
+
+	sethi		%hi(__flush_tlb_mm), %o0
+	or		%o0, %lo(__flush_tlb_mm), %o0
+	sethi		%hi(__hypervisor_flush_tlb_mm), %o1
+	or		%o1, %lo(__hypervisor_flush_tlb_mm), %o1
+	call		tlb_patch_one
+	 mov		19, %o2
+
+	sethi		%hi(__flush_tlb_page), %o0
+	or		%o0, %lo(__flush_tlb_page), %o0
+	sethi		%hi(__hypervisor_flush_tlb_page), %o1
+	or		%o1, %lo(__hypervisor_flush_tlb_page), %o1
+	call		tlb_patch_one
+	 mov		22, %o2
+
+	sethi		%hi(__flush_tlb_pending), %o0
+	or		%o0, %lo(__flush_tlb_pending), %o0
+	sethi		%hi(__hypervisor_flush_tlb_pending), %o1
+	or		%o1, %lo(__hypervisor_flush_tlb_pending), %o1
+	call		tlb_patch_one
+	 mov		27, %o2
+
+	sethi		%hi(__flush_tlb_kernel_range), %o0
+	or		%o0, %lo(__flush_tlb_kernel_range), %o0
+	sethi		%hi(__hypervisor_flush_tlb_kernel_range), %o1
+	or		%o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
+	call		tlb_patch_one
+	 mov		31, %o2
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+	sethi		%hi(__flush_dcache_page), %o0
+	or		%o0, %lo(__flush_dcache_page), %o0
+	sethi		%hi(__hypervisor_flush_dcache_page), %o1
+	or		%o1, %lo(__hypervisor_flush_dcache_page), %o1
+	call		tlb_patch_one
+	 mov		2, %o2
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+#ifdef CONFIG_SMP
+	sethi		%hi(xcall_flush_tlb_mm), %o0
+	or		%o0, %lo(xcall_flush_tlb_mm), %o0
+	sethi		%hi(__hypervisor_xcall_flush_tlb_mm), %o1
+	or		%o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
+	call		tlb_patch_one
+	 mov		24, %o2
+
+	sethi		%hi(xcall_flush_tlb_page), %o0
+	or		%o0, %lo(xcall_flush_tlb_page), %o0
+	sethi		%hi(__hypervisor_xcall_flush_tlb_page), %o1
+	or		%o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
+	call		tlb_patch_one
+	 mov		20, %o2
+
+	sethi		%hi(xcall_flush_tlb_kernel_range), %o0
+	or		%o0, %lo(xcall_flush_tlb_kernel_range), %o0
+	sethi		%hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+	or		%o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
+	call		tlb_patch_one
+	 mov		44, %o2
+#endif /* CONFIG_SMP */
+
+	ret
+	 restore
diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S
new file mode 100644
index 0000000..adaef6e
--- /dev/null
+++ b/arch/sparc/mm/viking.S
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * viking.S: High speed Viking cache/mmu operations
+ *
+ * Copyright (C) 1997  Eddie C. Dost  (ecd@skynet.be)
+ * Copyright (C) 1997,1998,1999  Jakub Jelinek  (jj@ultra.linux.cz)
+ * Copyright (C) 1999  Pavel Semerad  (semerad@ss1000.ms.mff.cuni.cz)
+ */
+
+#include <asm/ptrace.h>
+#include <asm/psr.h>
+#include <asm/asm-offsets.h>
+#include <asm/asi.h>
+#include <asm/mxcc.h>
+#include <asm/page.h>
+#include <asm/pgtsrmmu.h>
+#include <asm/viking.h>
+
+#ifdef CONFIG_SMP
+	.data
+	.align	4
+sun4dsmp_flush_tlb_spin:
+	.word	0
+#endif
+
+	.text
+	.align	4
+
+	.globl	viking_flush_cache_all, viking_flush_cache_mm
+	.globl	viking_flush_cache_range, viking_flush_cache_page
+	.globl	viking_flush_page, viking_mxcc_flush_page
+	.globl	viking_flush_page_for_dma, viking_flush_page_to_ram
+	.globl	viking_flush_sig_insns
+	.globl	viking_flush_tlb_all, viking_flush_tlb_mm
+	.globl	viking_flush_tlb_range, viking_flush_tlb_page
+
+viking_flush_page:
+	sethi	%hi(PAGE_OFFSET), %g2
+	sub	%o0, %g2, %g3
+	srl	%g3, 12, %g1		! ppage >> 12
+
+	clr	%o1			! set counter, 0 - 127
+	sethi	%hi(PAGE_OFFSET + PAGE_SIZE - 0x80000000), %o3
+	sethi	%hi(0x80000000), %o4
+	sethi	%hi(VIKING_PTAG_VALID), %o5
+	sethi	%hi(2*PAGE_SIZE), %o0
+	sethi	%hi(PAGE_SIZE), %g7
+	clr	%o2			! block counter, 0 - 3
+5:
+	sll	%o1, 5, %g4
+	or	%g4, %o4, %g4		! 0x80000000 | (set << 5)
+
+	sll	%o2, 26, %g5		! block << 26
+6:
+	or	%g5, %g4, %g5
+	ldda	[%g5] ASI_M_DATAC_TAG, %g2
+	cmp	%g3, %g1		! ptag == ppage?
+	bne	7f
+	 inc	%o2
+
+	andcc	%g2, %o5, %g0		! ptag VALID?
+	be	7f
+	 add	%g4, %o3, %g2		! (PAGE_OFFSET + PAGE_SIZE) | (set << 5)
+	ld	[%g2], %g3
+	ld	[%g2 + %g7], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	ld	[%g2 + %g7], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	ld	[%g2 + %g7], %g3
+	add	%g2, %o0, %g2
+	ld	[%g2], %g3
+	b	8f
+	 ld	[%g2 + %g7], %g3
+
+7:
+	cmp	%o2, 3
+	ble	6b
+	 sll	%o2, 26, %g5			! block << 26
+
+8:	inc	%o1
+	cmp	%o1, 0x7f
+	ble	5b
+	 clr	%o2
+
+9:	retl
+	 nop
+
+viking_mxcc_flush_page:
+	sethi	%hi(PAGE_OFFSET), %g2
+	sub	%o0, %g2, %g3
+	sub	%g3, -PAGE_SIZE, %g3		! ppage + PAGE_SIZE
+	sethi	%hi(MXCC_SRCSTREAM), %o3	! assume %hi(MXCC_SRCSTREAM) == %hi(MXCC_DESTSTREAM)
+	mov	0x10, %g2			! set cacheable bit
+	or	%o3, %lo(MXCC_SRCSTREAM), %o2
+	or	%o3, %lo(MXCC_DESSTREAM), %o3
+	sub	%g3, MXCC_STREAM_SIZE, %g3
+6:
+	stda	%g2, [%o2] ASI_M_MXCC
+	stda	%g2, [%o3] ASI_M_MXCC
+	andncc	%g3, PAGE_MASK, %g0
+	bne	6b
+	 sub	%g3, MXCC_STREAM_SIZE, %g3
+
+9:	retl
+	 nop
+
+viking_flush_cache_page:
+viking_flush_cache_range:
+#ifndef CONFIG_SMP
+	ld	[%o0 + VMA_VM_MM], %o0
+#endif
+viking_flush_cache_mm:
+#ifndef CONFIG_SMP
+	ld	[%o0 + AOFF_mm_context], %g1
+	cmp	%g1, -1
+	bne	viking_flush_cache_all
+	 nop
+	b,a	viking_flush_cache_out
+#endif
+viking_flush_cache_all:
+	WINDOW_FLUSH(%g4, %g5)
+viking_flush_cache_out:
+	retl
+	 nop
+
+viking_flush_tlb_all:
+	mov	0x400, %g1
+	retl
+	 sta	%g0, [%g1] ASI_M_FLUSH_PROBE
+
+viking_flush_tlb_mm:
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o1
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef CONFIG_SMP
+	cmp	%o1, -1
+	be	1f
+#endif
+	mov	0x300, %g2
+	sta	%o1, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+#ifndef CONFIG_SMP
+1:	retl
+	 nop
+#endif
+
+viking_flush_tlb_range:
+	ld	[%o0 + VMA_VM_MM], %o0
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef CONFIG_SMP
+	cmp	%o3, -1
+	be	2f
+#endif
+	sethi	%hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	and	%o1, %o4, %o1
+	add	%o1, 0x200, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+1:	sub	%o1, %o4, %o1
+	cmp	%o1, %o2
+	blu,a	1b
+	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+#ifndef CONFIG_SMP
+2:	retl
+	 nop
+#endif
+
+viking_flush_tlb_page:
+	ld	[%o0 + VMA_VM_MM], %o0
+	mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o3
+	lda	[%g1] ASI_M_MMUREGS, %g5
+#ifndef CONFIG_SMP
+	cmp	%o3, -1
+	be	1f
+#endif
+	and	%o1, PAGE_MASK, %o1
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+	retl
+	 sta	%g5, [%g1] ASI_M_MMUREGS
+#ifndef CONFIG_SMP
+1:	retl
+	 nop
+#endif
+
+viking_flush_page_to_ram:
+viking_flush_page_for_dma:
+viking_flush_sig_insns:
+	retl
+	 nop
+
+#ifdef CONFIG_SMP
+	.globl	sun4dsmp_flush_tlb_all, sun4dsmp_flush_tlb_mm
+	.globl	sun4dsmp_flush_tlb_range, sun4dsmp_flush_tlb_page
+sun4dsmp_flush_tlb_all:
+	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
+1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	tst	%g5
+	bne	2f
+	 mov	0x400, %g1
+	sta	%g0, [%g1] ASI_M_FLUSH_PROBE
+	retl
+	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+2:	tst	%g5
+	bne,a	2b
+	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	b,a	1b
+
+sun4dsmp_flush_tlb_mm:
+	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
+1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	tst	%g5
+	bne	2f
+	 mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + AOFF_mm_context], %o1
+	lda	[%g1] ASI_M_MMUREGS, %g5
+	mov	0x300, %g2
+	sta	%o1, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%g2] ASI_M_FLUSH_PROBE
+	sta	%g5, [%g1] ASI_M_MMUREGS
+	retl
+	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+2:	tst	%g5
+	bne,a	2b
+	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	b,a	1b
+
+sun4dsmp_flush_tlb_range:
+	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
+1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	tst	%g5
+	bne	3f
+	 mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + VMA_VM_MM], %o0
+	ld	[%o0 + AOFF_mm_context], %o3
+	lda	[%g1] ASI_M_MMUREGS, %g5
+	sethi	%hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	and	%o1, %o4, %o1
+	add	%o1, 0x200, %o1
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+2:	sub	%o1, %o4, %o1
+	cmp	%o1, %o2
+	blu,a	2b
+	 sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+	sta	%g5, [%g1] ASI_M_MMUREGS
+	retl
+	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+3:	tst	%g5
+	bne,a	3b
+	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	b,a	1b
+
+sun4dsmp_flush_tlb_page:
+	sethi	%hi(sun4dsmp_flush_tlb_spin), %g3
+1:	ldstub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	tst	%g5
+	bne	2f
+	 mov	SRMMU_CTX_REG, %g1
+	ld	[%o0 + VMA_VM_MM], %o0
+	ld	[%o0 + AOFF_mm_context], %o3
+	lda	[%g1] ASI_M_MMUREGS, %g5
+	and	%o1, PAGE_MASK, %o1
+	sta	%o3, [%g1] ASI_M_MMUREGS
+	sta	%g0, [%o1] ASI_M_FLUSH_PROBE
+	sta	%g5, [%g1] ASI_M_MMUREGS
+	retl
+	 stb	%g0, [%g3 + %lo(sun4dsmp_flush_tlb_spin)]
+2:	tst	%g5
+	bne,a	2b
+	 ldub	[%g3 + %lo(sun4dsmp_flush_tlb_spin)], %g5
+	b,a	1b
+	 nop
+#endif
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile
new file mode 100644
index 0000000..d32aac3
--- /dev/null
+++ b/arch/sparc/net/Makefile
@@ -0,0 +1,7 @@
+#
+# Arch-specific network modules
+#
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp_$(BITS).o
+ifeq ($(BITS),32)
+obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_32.o
+endif
diff --git a/arch/sparc/net/bpf_jit_32.h b/arch/sparc/net/bpf_jit_32.h
new file mode 100644
index 0000000..cfd6a8b
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_32.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+/* Conventions:
+ *  %g1 : temporary
+ *  %g2 : Secondary temporary used by SKB data helper stubs.
+ *  %g3 : packet offset passed into SKB data helper stubs.
+ *  %o0 : pointer to skb (first argument given to JIT function)
+ *  %o1 : BPF A accumulator
+ *  %o2 : BPF X accumulator
+ *  %o3 : Holds saved %o7 so we can call helper functions without needing
+ *        to allocate a register window.
+ *  %o4 : skb->len - skb->data_len
+ *  %o5 : skb->data
+ */
+
+#ifndef __ASSEMBLER__
+#define G0		0x00
+#define G1		0x01
+#define G3		0x03
+#define G6		0x06
+#define O0		0x08
+#define O1		0x09
+#define O2		0x0a
+#define O3		0x0b
+#define O4		0x0c
+#define O5		0x0d
+#define SP		0x0e
+#define O7		0x0f
+#define FP		0x1e
+
+#define r_SKB		O0
+#define r_A		O1
+#define r_X		O2
+#define r_saved_O7	O3
+#define r_HEADLEN	O4
+#define r_SKB_DATA	O5
+#define r_TMP		G1
+#define r_TMP2		G2
+#define r_OFF		G3
+
+/* assembly code in arch/sparc/net/bpf_jit_asm_32.S */
+extern u32 bpf_jit_load_word[];
+extern u32 bpf_jit_load_half[];
+extern u32 bpf_jit_load_byte[];
+extern u32 bpf_jit_load_byte_msh[];
+extern u32 bpf_jit_load_word_positive_offset[];
+extern u32 bpf_jit_load_half_positive_offset[];
+extern u32 bpf_jit_load_byte_positive_offset[];
+extern u32 bpf_jit_load_byte_msh_positive_offset[];
+extern u32 bpf_jit_load_word_negative_offset[];
+extern u32 bpf_jit_load_half_negative_offset[];
+extern u32 bpf_jit_load_byte_negative_offset[];
+extern u32 bpf_jit_load_byte_msh_negative_offset[];
+
+#else
+#define r_SKB		%o0
+#define r_A		%o1
+#define r_X		%o2
+#define r_saved_O7	%o3
+#define r_HEADLEN	%o4
+#define r_SKB_DATA	%o5
+#define r_TMP		%g1
+#define r_TMP2		%g2
+#define r_OFF		%g3
+#endif
+
+#endif /* _BPF_JIT_H */
diff --git a/arch/sparc/net/bpf_jit_64.h b/arch/sparc/net/bpf_jit_64.h
new file mode 100644
index 0000000..fbc836f
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_64.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+#ifndef __ASSEMBLER__
+#define G0		0x00
+#define G1		0x01
+#define G2		0x02
+#define G3		0x03
+#define G6		0x06
+#define G7		0x07
+#define O0		0x08
+#define O1		0x09
+#define O2		0x0a
+#define O3		0x0b
+#define O4		0x0c
+#define O5		0x0d
+#define SP		0x0e
+#define O7		0x0f
+#define L0		0x10
+#define L1		0x11
+#define L2		0x12
+#define L3		0x13
+#define L4		0x14
+#define L5		0x15
+#define L6		0x16
+#define L7		0x17
+#define I0		0x18
+#define I1		0x19
+#define I2		0x1a
+#define I3		0x1b
+#define I4		0x1c
+#define I5		0x1d
+#define FP		0x1e
+#define I7		0x1f
+#endif
+
+#endif /* _BPF_JIT_H */
diff --git a/arch/sparc/net/bpf_jit_asm_32.S b/arch/sparc/net/bpf_jit_asm_32.S
new file mode 100644
index 0000000..a2e28e0
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_asm_32.S
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/ptrace.h>
+
+#include "bpf_jit_32.h"
+
+#define SAVE_SZ		96
+#define SCRATCH_OFF	72
+#define BE_PTR(label)	be label
+#define SIGN_EXTEND(reg)
+
+#define SKF_MAX_NEG_OFF	(-0x200000) /* SKF_LL_OFF from filter.h */
+
+	.text
+	.globl	bpf_jit_load_word
+bpf_jit_load_word:
+	cmp	r_OFF, 0
+	bl	bpf_slow_path_word_neg
+	 nop
+	.globl	bpf_jit_load_word_positive_offset
+bpf_jit_load_word_positive_offset:
+	sub	r_HEADLEN, r_OFF, r_TMP
+	cmp	r_TMP, 3
+	ble	bpf_slow_path_word
+	 add	r_SKB_DATA, r_OFF, r_TMP
+	andcc	r_TMP, 3, %g0
+	bne	load_word_unaligned
+	 nop
+	retl
+	 ld	[r_TMP], r_A
+load_word_unaligned:
+	ldub	[r_TMP + 0x0], r_OFF
+	ldub	[r_TMP + 0x1], r_TMP2
+	sll	r_OFF, 8, r_OFF
+	or	r_OFF, r_TMP2, r_OFF
+	ldub	[r_TMP + 0x2], r_TMP2
+	sll	r_OFF, 8, r_OFF
+	or	r_OFF, r_TMP2, r_OFF
+	ldub	[r_TMP + 0x3], r_TMP2
+	sll	r_OFF, 8, r_OFF
+	retl
+	 or	r_OFF, r_TMP2, r_A
+
+	.globl	bpf_jit_load_half
+bpf_jit_load_half:
+	cmp	r_OFF, 0
+	bl	bpf_slow_path_half_neg
+	 nop
+	.globl	bpf_jit_load_half_positive_offset
+bpf_jit_load_half_positive_offset:
+	sub	r_HEADLEN, r_OFF, r_TMP
+	cmp	r_TMP, 1
+	ble	bpf_slow_path_half
+	 add	r_SKB_DATA, r_OFF, r_TMP
+	andcc	r_TMP, 1, %g0
+	bne	load_half_unaligned
+	 nop
+	retl
+	 lduh	[r_TMP], r_A
+load_half_unaligned:
+	ldub	[r_TMP + 0x0], r_OFF
+	ldub	[r_TMP + 0x1], r_TMP2
+	sll	r_OFF, 8, r_OFF
+	retl
+	 or	r_OFF, r_TMP2, r_A
+
+	.globl	bpf_jit_load_byte
+bpf_jit_load_byte:
+	cmp	r_OFF, 0
+	bl	bpf_slow_path_byte_neg
+	 nop
+	.globl	bpf_jit_load_byte_positive_offset
+bpf_jit_load_byte_positive_offset:
+	cmp	r_OFF, r_HEADLEN
+	bge	bpf_slow_path_byte
+	 nop
+	retl
+	 ldub	[r_SKB_DATA + r_OFF], r_A
+
+	.globl	bpf_jit_load_byte_msh
+bpf_jit_load_byte_msh:
+	cmp	r_OFF, 0
+	bl	bpf_slow_path_byte_msh_neg
+	 nop
+	.globl	bpf_jit_load_byte_msh_positive_offset
+bpf_jit_load_byte_msh_positive_offset:
+	cmp	r_OFF, r_HEADLEN
+	bge	bpf_slow_path_byte_msh
+	 nop
+	ldub	[r_SKB_DATA + r_OFF], r_OFF
+	and	r_OFF, 0xf, r_OFF
+	retl
+	 sll	r_OFF, 2, r_X
+
+#define bpf_slow_path_common(LEN)	\
+	save	%sp, -SAVE_SZ, %sp;	\
+	mov	%i0, %o0;		\
+	mov	r_OFF, %o1;		\
+	add	%fp, SCRATCH_OFF, %o2;	\
+	call	skb_copy_bits;		\
+	 mov	(LEN), %o3;		\
+	cmp	%o0, 0;			\
+	restore;
+
+bpf_slow_path_word:
+	bpf_slow_path_common(4)
+	bl	bpf_error
+	 ld	[%sp + SCRATCH_OFF], r_A
+	retl
+	 nop
+bpf_slow_path_half:
+	bpf_slow_path_common(2)
+	bl	bpf_error
+	 lduh	[%sp + SCRATCH_OFF], r_A
+	retl
+	 nop
+bpf_slow_path_byte:
+	bpf_slow_path_common(1)
+	bl	bpf_error
+	 ldub	[%sp + SCRATCH_OFF], r_A
+	retl
+	 nop
+bpf_slow_path_byte_msh:
+	bpf_slow_path_common(1)
+	bl	bpf_error
+	 ldub	[%sp + SCRATCH_OFF], r_A
+	and	r_OFF, 0xf, r_OFF
+	retl
+	 sll	r_OFF, 2, r_X
+
+#define bpf_negative_common(LEN)			\
+	save	%sp, -SAVE_SZ, %sp;			\
+	mov	%i0, %o0;				\
+	mov	r_OFF, %o1;				\
+	SIGN_EXTEND(%o1);				\
+	call	bpf_internal_load_pointer_neg_helper;	\
+	 mov	(LEN), %o2;				\
+	mov	%o0, r_TMP;				\
+	cmp	%o0, 0;					\
+	BE_PTR(bpf_error);				\
+	 restore;
+
+bpf_slow_path_word_neg:
+	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
+	cmp	r_OFF, r_TMP
+	bl	bpf_error
+	 nop
+	.globl	bpf_jit_load_word_negative_offset
+bpf_jit_load_word_negative_offset:
+	bpf_negative_common(4)
+	andcc	r_TMP, 3, %g0
+	bne	load_word_unaligned
+	 nop
+	retl
+	 ld	[r_TMP], r_A
+
+bpf_slow_path_half_neg:
+	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
+	cmp	r_OFF, r_TMP
+	bl	bpf_error
+	 nop
+	.globl	bpf_jit_load_half_negative_offset
+bpf_jit_load_half_negative_offset:
+	bpf_negative_common(2)
+	andcc	r_TMP, 1, %g0
+	bne	load_half_unaligned
+	 nop
+	retl
+	 lduh	[r_TMP], r_A
+
+bpf_slow_path_byte_neg:
+	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
+	cmp	r_OFF, r_TMP
+	bl	bpf_error
+	 nop
+	.globl	bpf_jit_load_byte_negative_offset
+bpf_jit_load_byte_negative_offset:
+	bpf_negative_common(1)
+	retl
+	 ldub	[r_TMP], r_A
+
+bpf_slow_path_byte_msh_neg:
+	sethi	%hi(SKF_MAX_NEG_OFF), r_TMP
+	cmp	r_OFF, r_TMP
+	bl	bpf_error
+	 nop
+	.globl	bpf_jit_load_byte_msh_negative_offset
+bpf_jit_load_byte_msh_negative_offset:
+	bpf_negative_common(1)
+	ldub	[r_TMP], r_OFF
+	and	r_OFF, 0xf, r_OFF
+	retl
+	 sll	r_OFF, 2, r_X
+
+bpf_error:
+	/* Make the JIT program return zero.  The JIT epilogue
+	 * stores away the original %o7 into r_saved_O7.  The
+	 * normal leaf function return is to use "retl" which
+	 * would evalute to "jmpl %o7 + 8, %g0" but we want to
+	 * use the saved value thus the sequence you see here.
+	 */
+	jmpl	r_saved_O7 + 8, %g0
+	 clr	%o0
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
new file mode 100644
index 0000000..a5ff886
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -0,0 +1,765 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/moduleloader.h>
+#include <linux/workqueue.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/cache.h>
+#include <linux/if_vlan.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ptrace.h>
+
+#include "bpf_jit_32.h"
+
+static inline bool is_simm13(unsigned int value)
+{
+	return value + 0x1000 < 0x2000;
+}
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG    2 /* ebx is used */
+#define SEEN_MEM     4 /* use mem[] for temporary storage */
+
+#define S13(X)		((X) & 0x1fff)
+#define IMMED		0x00002000
+#define RD(X)		((X) << 25)
+#define RS1(X)		((X) << 14)
+#define RS2(X)		((X))
+#define OP(X)		((X) << 30)
+#define OP2(X)		((X) << 22)
+#define OP3(X)		((X) << 19)
+#define COND(X)		((X) << 25)
+#define F1(X)		OP(X)
+#define F2(X, Y)	(OP(X) | OP2(Y))
+#define F3(X, Y)	(OP(X) | OP3(Y))
+
+#define CONDN		COND(0x0)
+#define CONDE		COND(0x1)
+#define CONDLE		COND(0x2)
+#define CONDL		COND(0x3)
+#define CONDLEU		COND(0x4)
+#define CONDCS		COND(0x5)
+#define CONDNEG		COND(0x6)
+#define CONDVC		COND(0x7)
+#define CONDA		COND(0x8)
+#define CONDNE		COND(0x9)
+#define CONDG		COND(0xa)
+#define CONDGE		COND(0xb)
+#define CONDGU		COND(0xc)
+#define CONDCC		COND(0xd)
+#define CONDPOS		COND(0xe)
+#define CONDVS		COND(0xf)
+
+#define CONDGEU		CONDCC
+#define CONDLU		CONDCS
+
+#define WDISP22(X)	(((X) >> 2) & 0x3fffff)
+
+#define BA		(F2(0, 2) | CONDA)
+#define BGU		(F2(0, 2) | CONDGU)
+#define BLEU		(F2(0, 2) | CONDLEU)
+#define BGEU		(F2(0, 2) | CONDGEU)
+#define BLU		(F2(0, 2) | CONDLU)
+#define BE		(F2(0, 2) | CONDE)
+#define BNE		(F2(0, 2) | CONDNE)
+
+#define BE_PTR		BE
+
+#define SETHI(K, REG)	\
+	(F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
+#define OR_LO(K, REG)	\
+	(F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
+
+#define ADD		F3(2, 0x00)
+#define AND		F3(2, 0x01)
+#define ANDCC		F3(2, 0x11)
+#define OR		F3(2, 0x02)
+#define XOR		F3(2, 0x03)
+#define SUB		F3(2, 0x04)
+#define SUBCC		F3(2, 0x14)
+#define MUL		F3(2, 0x0a)	/* umul */
+#define DIV		F3(2, 0x0e)	/* udiv */
+#define SLL		F3(2, 0x25)
+#define SRL		F3(2, 0x26)
+#define JMPL		F3(2, 0x38)
+#define CALL		F1(1)
+#define BR		F2(0, 0x01)
+#define RD_Y		F3(2, 0x28)
+#define WR_Y		F3(2, 0x30)
+
+#define LD32		F3(3, 0x00)
+#define LD8		F3(3, 0x01)
+#define LD16		F3(3, 0x02)
+#define LD64		F3(3, 0x0b)
+#define ST32		F3(3, 0x04)
+
+#define LDPTR		LD32
+#define BASE_STACKFRAME	96
+
+#define LD32I		(LD32 | IMMED)
+#define LD8I		(LD8 | IMMED)
+#define LD16I		(LD16 | IMMED)
+#define LD64I		(LD64 | IMMED)
+#define LDPTRI		(LDPTR | IMMED)
+#define ST32I		(ST32 | IMMED)
+
+#define emit_nop()		\
+do {				\
+	*prog++ = SETHI(0, G0);	\
+} while (0)
+
+#define emit_neg()					\
+do {	/* sub %g0, r_A, r_A */				\
+	*prog++ = SUB | RS1(G0) | RS2(r_A) | RD(r_A);	\
+} while (0)
+
+#define emit_reg_move(FROM, TO)				\
+do {	/* or %g0, FROM, TO */				\
+	*prog++ = OR | RS1(G0) | RS2(FROM) | RD(TO);	\
+} while (0)
+
+#define emit_clear(REG)					\
+do {	/* or %g0, %g0, REG */				\
+	*prog++ = OR | RS1(G0) | RS2(G0) | RD(REG);	\
+} while (0)
+
+#define emit_set_const(K, REG)					\
+do {	/* sethi %hi(K), REG */					\
+	*prog++ = SETHI(K, REG);				\
+	/* or REG, %lo(K), REG */				\
+	*prog++ = OR_LO(K, REG);				\
+} while (0)
+
+	/* Emit
+	 *
+	 *	OP	r_A, r_X, r_A
+	 */
+#define emit_alu_X(OPCODE)					\
+do {								\
+	seen |= SEEN_XREG;					\
+	*prog++ = OPCODE | RS1(r_A) | RS2(r_X) | RD(r_A);	\
+} while (0)
+
+	/* Emit either:
+	 *
+	 *	OP	r_A, K, r_A
+	 *
+	 * or
+	 *
+	 *	sethi	%hi(K), r_TMP
+	 *	or	r_TMP, %lo(K), r_TMP
+	 *	OP	r_A, r_TMP, r_A
+	 *
+	 * depending upon whether K fits in a signed 13-bit
+	 * immediate instruction field.  Emit nothing if K
+	 * is zero.
+	 */
+#define emit_alu_K(OPCODE, K)					\
+do {								\
+	if (K || OPCODE == AND || OPCODE == MUL) {		\
+		unsigned int _insn = OPCODE;			\
+		_insn |= RS1(r_A) | RD(r_A);			\
+		if (is_simm13(K)) {				\
+			*prog++ = _insn | IMMED | S13(K);	\
+		} else {					\
+			emit_set_const(K, r_TMP);		\
+			*prog++ = _insn | RS2(r_TMP);		\
+		}						\
+	}							\
+} while (0)
+
+#define emit_loadimm(K, DEST)						\
+do {									\
+	if (is_simm13(K)) {						\
+		/* or %g0, K, DEST */					\
+		*prog++ = OR | IMMED | RS1(G0) | S13(K) | RD(DEST);	\
+	} else {							\
+		emit_set_const(K, DEST);				\
+	}								\
+} while (0)
+
+#define emit_loadptr(BASE, STRUCT, FIELD, DEST)				\
+do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
+	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *));	\
+	*prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST);		\
+} while (0)
+
+#define emit_load32(BASE, STRUCT, FIELD, DEST)				\
+do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
+	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32));	\
+	*prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST);		\
+} while (0)
+
+#define emit_load16(BASE, STRUCT, FIELD, DEST)				\
+do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
+	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16));	\
+	*prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST);		\
+} while (0)
+
+#define __emit_load8(BASE, STRUCT, FIELD, DEST)				\
+do {	unsigned int _off = offsetof(STRUCT, FIELD);			\
+	*prog++ = LD8I | RS1(BASE) | S13(_off) | RD(DEST);		\
+} while (0)
+
+#define emit_load8(BASE, STRUCT, FIELD, DEST)				\
+do {	BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8));	\
+	__emit_load8(BASE, STRUCT, FIELD, DEST);			\
+} while (0)
+
+#define BIAS (-4)
+
+#define emit_ldmem(OFF, DEST)						\
+do {	*prog++ = LD32I | RS1(SP) | S13(BIAS - (OFF)) | RD(DEST);	\
+} while (0)
+
+#define emit_stmem(OFF, SRC)						\
+do {	*prog++ = ST32I | RS1(SP) | S13(BIAS - (OFF)) | RD(SRC);	\
+} while (0)
+
+#ifdef CONFIG_SMP
+#define emit_load_cpu(REG)						\
+	emit_load32(G6, struct thread_info, cpu, REG)
+#else
+#define emit_load_cpu(REG)	emit_clear(REG)
+#endif
+
+#define emit_skb_loadptr(FIELD, DEST) \
+	emit_loadptr(r_SKB, struct sk_buff, FIELD, DEST)
+#define emit_skb_load32(FIELD, DEST) \
+	emit_load32(r_SKB, struct sk_buff, FIELD, DEST)
+#define emit_skb_load16(FIELD, DEST) \
+	emit_load16(r_SKB, struct sk_buff, FIELD, DEST)
+#define __emit_skb_load8(FIELD, DEST) \
+	__emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
+#define emit_skb_load8(FIELD, DEST) \
+	emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
+
+#define emit_jmpl(BASE, IMM_OFF, LREG) \
+	*prog++ = (JMPL | IMMED | RS1(BASE) | S13(IMM_OFF) | RD(LREG))
+
+#define emit_call(FUNC)					\
+do {	void *_here = image + addrs[i] - 8;		\
+	unsigned int _off = (void *)(FUNC) - _here;	\
+	*prog++ = CALL | (((_off) >> 2) & 0x3fffffff);	\
+	emit_nop();					\
+} while (0)
+
+#define emit_branch(BR_OPC, DEST)			\
+do {	unsigned int _here = addrs[i] - 8;		\
+	*prog++ = BR_OPC | WDISP22((DEST) - _here);	\
+} while (0)
+
+#define emit_branch_off(BR_OPC, OFF)			\
+do {	*prog++ = BR_OPC | WDISP22(OFF);		\
+} while (0)
+
+#define emit_jump(DEST)		emit_branch(BA, DEST)
+
+#define emit_read_y(REG)	*prog++ = RD_Y | RD(REG)
+#define emit_write_y(REG)	*prog++ = WR_Y | IMMED | RS1(REG) | S13(0)
+
+#define emit_cmp(R1, R2) \
+	*prog++ = (SUBCC | RS1(R1) | RS2(R2) | RD(G0))
+
+#define emit_cmpi(R1, IMM) \
+	*prog++ = (SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
+
+#define emit_btst(R1, R2) \
+	*prog++ = (ANDCC | RS1(R1) | RS2(R2) | RD(G0))
+
+#define emit_btsti(R1, IMM) \
+	*prog++ = (ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
+
+#define emit_sub(R1, R2, R3) \
+	*prog++ = (SUB | RS1(R1) | RS2(R2) | RD(R3))
+
+#define emit_subi(R1, IMM, R3) \
+	*prog++ = (SUB | IMMED | RS1(R1) | S13(IMM) | RD(R3))
+
+#define emit_add(R1, R2, R3) \
+	*prog++ = (ADD | RS1(R1) | RS2(R2) | RD(R3))
+
+#define emit_addi(R1, IMM, R3) \
+	*prog++ = (ADD | IMMED | RS1(R1) | S13(IMM) | RD(R3))
+
+#define emit_and(R1, R2, R3) \
+	*prog++ = (AND | RS1(R1) | RS2(R2) | RD(R3))
+
+#define emit_andi(R1, IMM, R3) \
+	*prog++ = (AND | IMMED | RS1(R1) | S13(IMM) | RD(R3))
+
+#define emit_alloc_stack(SZ) \
+	*prog++ = (SUB | IMMED | RS1(SP) | S13(SZ) | RD(SP))
+
+#define emit_release_stack(SZ) \
+	*prog++ = (ADD | IMMED | RS1(SP) | S13(SZ) | RD(SP))
+
+/* A note about branch offset calculations.  The addrs[] array,
+ * indexed by BPF instruction, records the address after all the
+ * sparc instructions emitted for that BPF instruction.
+ *
+ * The most common case is to emit a branch at the end of such
+ * a code sequence.  So this would be two instructions, the
+ * branch and it's delay slot.
+ *
+ * Therefore by default the branch emitters calculate the branch
+ * offset field as:
+ *
+ *	destination - (addrs[i] - 8)
+ *
+ * This "addrs[i] - 8" is the address of the branch itself or
+ * what "." would be in assembler notation.  The "8" part is
+ * how we take into consideration the branch and it's delay
+ * slot mentioned above.
+ *
+ * Sometimes we need to emit a branch earlier in the code
+ * sequence.  And in these situations we adjust "destination"
+ * to accommodate this difference.  For example, if we needed
+ * to emit a branch (and it's delay slot) right before the
+ * final instruction emitted for a BPF opcode, we'd use
+ * "destination + 4" instead of just plain "destination" above.
+ *
+ * This is why you see all of these funny emit_branch() and
+ * emit_jump() calls with adjusted offsets.
+ */
+
+void bpf_jit_compile(struct bpf_prog *fp)
+{
+	unsigned int cleanup_addr, proglen, oldproglen = 0;
+	u32 temp[8], *prog, *func, seen = 0, pass;
+	const struct sock_filter *filter = fp->insns;
+	int i, flen = fp->len, pc_ret0 = -1;
+	unsigned int *addrs;
+	void *image;
+
+	if (!bpf_jit_enable)
+		return;
+
+	addrs = kmalloc_array(flen, sizeof(*addrs), GFP_KERNEL);
+	if (addrs == NULL)
+		return;
+
+	/* Before first pass, make a rough estimation of addrs[]
+	 * each bpf instruction is translated to less than 64 bytes
+	 */
+	for (proglen = 0, i = 0; i < flen; i++) {
+		proglen += 64;
+		addrs[i] = proglen;
+	}
+	cleanup_addr = proglen; /* epilogue address */
+	image = NULL;
+	for (pass = 0; pass < 10; pass++) {
+		u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
+
+		/* no prologue/epilogue for trivial filters (RET something) */
+		proglen = 0;
+		prog = temp;
+
+		/* Prologue */
+		if (seen_or_pass0) {
+			if (seen_or_pass0 & SEEN_MEM) {
+				unsigned int sz = BASE_STACKFRAME;
+				sz += BPF_MEMWORDS * sizeof(u32);
+				emit_alloc_stack(sz);
+			}
+
+			/* Make sure we dont leek kernel memory. */
+			if (seen_or_pass0 & SEEN_XREG)
+				emit_clear(r_X);
+
+			/* If this filter needs to access skb data,
+			 * load %o4 and %o5 with:
+			 *  %o4 = skb->len - skb->data_len
+			 *  %o5 = skb->data
+			 * And also back up %o7 into r_saved_O7 so we can
+			 * invoke the stubs using 'call'.
+			 */
+			if (seen_or_pass0 & SEEN_DATAREF) {
+				emit_load32(r_SKB, struct sk_buff, len, r_HEADLEN);
+				emit_load32(r_SKB, struct sk_buff, data_len, r_TMP);
+				emit_sub(r_HEADLEN, r_TMP, r_HEADLEN);
+				emit_loadptr(r_SKB, struct sk_buff, data, r_SKB_DATA);
+			}
+		}
+		emit_reg_move(O7, r_saved_O7);
+
+		/* Make sure we dont leak kernel information to the user. */
+		if (bpf_needs_clear_a(&filter[0]))
+			emit_clear(r_A); /* A = 0 */
+
+		for (i = 0; i < flen; i++) {
+			unsigned int K = filter[i].k;
+			unsigned int t_offset;
+			unsigned int f_offset;
+			u32 t_op, f_op;
+			u16 code = bpf_anc_helper(&filter[i]);
+			int ilen;
+
+			switch (code) {
+			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
+				emit_alu_X(ADD);
+				break;
+			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
+				emit_alu_K(ADD, K);
+				break;
+			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
+				emit_alu_X(SUB);
+				break;
+			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
+				emit_alu_K(SUB, K);
+				break;
+			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
+				emit_alu_X(AND);
+				break;
+			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
+				emit_alu_K(AND, K);
+				break;
+			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
+				emit_alu_X(OR);
+				break;
+			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
+				emit_alu_K(OR, K);
+				break;
+			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+			case BPF_ALU | BPF_XOR | BPF_X:
+				emit_alu_X(XOR);
+				break;
+			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
+				emit_alu_K(XOR, K);
+				break;
+			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
+				emit_alu_X(SLL);
+				break;
+			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
+				emit_alu_K(SLL, K);
+				break;
+			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
+				emit_alu_X(SRL);
+				break;
+			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
+				emit_alu_K(SRL, K);
+				break;
+			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
+				emit_alu_X(MUL);
+				break;
+			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
+				emit_alu_K(MUL, K);
+				break;
+			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
+				if (K == 1)
+					break;
+				emit_write_y(G0);
+				/* The Sparc v8 architecture requires
+				 * three instructions between a %y
+				 * register write and the first use.
+				 */
+				emit_nop();
+				emit_nop();
+				emit_nop();
+				emit_alu_K(DIV, K);
+				break;
+			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
+				emit_cmpi(r_X, 0);
+				if (pc_ret0 > 0) {
+					t_offset = addrs[pc_ret0 - 1];
+					emit_branch(BE, t_offset + 20);
+					emit_nop(); /* delay slot */
+				} else {
+					emit_branch_off(BNE, 16);
+					emit_nop();
+					emit_jump(cleanup_addr + 20);
+					emit_clear(r_A);
+				}
+				emit_write_y(G0);
+				/* The Sparc v8 architecture requires
+				 * three instructions between a %y
+				 * register write and the first use.
+				 */
+				emit_nop();
+				emit_nop();
+				emit_nop();
+				emit_alu_X(DIV);
+				break;
+			case BPF_ALU | BPF_NEG:
+				emit_neg();
+				break;
+			case BPF_RET | BPF_K:
+				if (!K) {
+					if (pc_ret0 == -1)
+						pc_ret0 = i;
+					emit_clear(r_A);
+				} else {
+					emit_loadimm(K, r_A);
+				}
+				/* Fallthrough */
+			case BPF_RET | BPF_A:
+				if (seen_or_pass0) {
+					if (i != flen - 1) {
+						emit_jump(cleanup_addr);
+						emit_nop();
+						break;
+					}
+					if (seen_or_pass0 & SEEN_MEM) {
+						unsigned int sz = BASE_STACKFRAME;
+						sz += BPF_MEMWORDS * sizeof(u32);
+						emit_release_stack(sz);
+					}
+				}
+				/* jmpl %r_saved_O7 + 8, %g0 */
+				emit_jmpl(r_saved_O7, 8, G0);
+				emit_reg_move(r_A, O0); /* delay slot */
+				break;
+			case BPF_MISC | BPF_TAX:
+				seen |= SEEN_XREG;
+				emit_reg_move(r_A, r_X);
+				break;
+			case BPF_MISC | BPF_TXA:
+				seen |= SEEN_XREG;
+				emit_reg_move(r_X, r_A);
+				break;
+			case BPF_ANC | SKF_AD_CPU:
+				emit_load_cpu(r_A);
+				break;
+			case BPF_ANC | SKF_AD_PROTOCOL:
+				emit_skb_load16(protocol, r_A);
+				break;
+			case BPF_ANC | SKF_AD_PKTTYPE:
+				__emit_skb_load8(__pkt_type_offset, r_A);
+				emit_andi(r_A, PKT_TYPE_MAX, r_A);
+				emit_alu_K(SRL, 5);
+				break;
+			case BPF_ANC | SKF_AD_IFINDEX:
+				emit_skb_loadptr(dev, r_A);
+				emit_cmpi(r_A, 0);
+				emit_branch(BE_PTR, cleanup_addr + 4);
+				emit_nop();
+				emit_load32(r_A, struct net_device, ifindex, r_A);
+				break;
+			case BPF_ANC | SKF_AD_MARK:
+				emit_skb_load32(mark, r_A);
+				break;
+			case BPF_ANC | SKF_AD_QUEUE:
+				emit_skb_load16(queue_mapping, r_A);
+				break;
+			case BPF_ANC | SKF_AD_HATYPE:
+				emit_skb_loadptr(dev, r_A);
+				emit_cmpi(r_A, 0);
+				emit_branch(BE_PTR, cleanup_addr + 4);
+				emit_nop();
+				emit_load16(r_A, struct net_device, type, r_A);
+				break;
+			case BPF_ANC | SKF_AD_RXHASH:
+				emit_skb_load32(hash, r_A);
+				break;
+			case BPF_ANC | SKF_AD_VLAN_TAG:
+			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
+				emit_skb_load16(vlan_tci, r_A);
+				if (code != (BPF_ANC | SKF_AD_VLAN_TAG)) {
+					emit_alu_K(SRL, 12);
+					emit_andi(r_A, 1, r_A);
+				} else {
+					emit_loadimm(~VLAN_TAG_PRESENT, r_TMP);
+					emit_and(r_A, r_TMP, r_A);
+				}
+				break;
+			case BPF_LD | BPF_W | BPF_LEN:
+				emit_skb_load32(len, r_A);
+				break;
+			case BPF_LDX | BPF_W | BPF_LEN:
+				emit_skb_load32(len, r_X);
+				break;
+			case BPF_LD | BPF_IMM:
+				emit_loadimm(K, r_A);
+				break;
+			case BPF_LDX | BPF_IMM:
+				emit_loadimm(K, r_X);
+				break;
+			case BPF_LD | BPF_MEM:
+				seen |= SEEN_MEM;
+				emit_ldmem(K * 4, r_A);
+				break;
+			case BPF_LDX | BPF_MEM:
+				seen |= SEEN_MEM | SEEN_XREG;
+				emit_ldmem(K * 4, r_X);
+				break;
+			case BPF_ST:
+				seen |= SEEN_MEM;
+				emit_stmem(K * 4, r_A);
+				break;
+			case BPF_STX:
+				seen |= SEEN_MEM | SEEN_XREG;
+				emit_stmem(K * 4, r_X);
+				break;
+
+#define CHOOSE_LOAD_FUNC(K, func) \
+	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+
+			case BPF_LD | BPF_W | BPF_ABS:
+				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
+common_load:			seen |= SEEN_DATAREF;
+				emit_loadimm(K, r_OFF);
+				emit_call(func);
+				break;
+			case BPF_LD | BPF_H | BPF_ABS:
+				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
+				goto common_load;
+			case BPF_LD | BPF_B | BPF_ABS:
+				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
+				goto common_load;
+			case BPF_LDX | BPF_B | BPF_MSH:
+				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
+				goto common_load;
+			case BPF_LD | BPF_W | BPF_IND:
+				func = bpf_jit_load_word;
+common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
+				if (K) {
+					if (is_simm13(K)) {
+						emit_addi(r_X, K, r_OFF);
+					} else {
+						emit_loadimm(K, r_TMP);
+						emit_add(r_X, r_TMP, r_OFF);
+					}
+				} else {
+					emit_reg_move(r_X, r_OFF);
+				}
+				emit_call(func);
+				break;
+			case BPF_LD | BPF_H | BPF_IND:
+				func = bpf_jit_load_half;
+				goto common_load_ind;
+			case BPF_LD | BPF_B | BPF_IND:
+				func = bpf_jit_load_byte;
+				goto common_load_ind;
+			case BPF_JMP | BPF_JA:
+				emit_jump(addrs[i + K]);
+				emit_nop();
+				break;
+
+#define COND_SEL(CODE, TOP, FOP)	\
+	case CODE:			\
+		t_op = TOP;		\
+		f_op = FOP;		\
+		goto cond_branch
+
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);
+
+cond_branch:			f_offset = addrs[i + filter[i].jf];
+				t_offset = addrs[i + filter[i].jt];
+
+				/* same targets, can avoid doing the test :) */
+				if (filter[i].jt == filter[i].jf) {
+					emit_jump(t_offset);
+					emit_nop();
+					break;
+				}
+
+				switch (code) {
+				case BPF_JMP | BPF_JGT | BPF_X:
+				case BPF_JMP | BPF_JGE | BPF_X:
+				case BPF_JMP | BPF_JEQ | BPF_X:
+					seen |= SEEN_XREG;
+					emit_cmp(r_A, r_X);
+					break;
+				case BPF_JMP | BPF_JSET | BPF_X:
+					seen |= SEEN_XREG;
+					emit_btst(r_A, r_X);
+					break;
+				case BPF_JMP | BPF_JEQ | BPF_K:
+				case BPF_JMP | BPF_JGT | BPF_K:
+				case BPF_JMP | BPF_JGE | BPF_K:
+					if (is_simm13(K)) {
+						emit_cmpi(r_A, K);
+					} else {
+						emit_loadimm(K, r_TMP);
+						emit_cmp(r_A, r_TMP);
+					}
+					break;
+				case BPF_JMP | BPF_JSET | BPF_K:
+					if (is_simm13(K)) {
+						emit_btsti(r_A, K);
+					} else {
+						emit_loadimm(K, r_TMP);
+						emit_btst(r_A, r_TMP);
+					}
+					break;
+				}
+				if (filter[i].jt != 0) {
+					if (filter[i].jf)
+						t_offset += 8;
+					emit_branch(t_op, t_offset);
+					emit_nop(); /* delay slot */
+					if (filter[i].jf) {
+						emit_jump(f_offset);
+						emit_nop();
+					}
+					break;
+				}
+				emit_branch(f_op, f_offset);
+				emit_nop(); /* delay slot */
+				break;
+
+			default:
+				/* hmm, too complex filter, give up with jit compiler */
+				goto out;
+			}
+			ilen = (void *) prog - (void *) temp;
+			if (image) {
+				if (unlikely(proglen + ilen > oldproglen)) {
+					pr_err("bpb_jit_compile fatal error\n");
+					kfree(addrs);
+					module_memfree(image);
+					return;
+				}
+				memcpy(image + proglen, temp, ilen);
+			}
+			proglen += ilen;
+			addrs[i] = proglen;
+			prog = temp;
+		}
+		/* last bpf instruction is always a RET :
+		 * use it to give the cleanup instruction(s) addr
+		 */
+		cleanup_addr = proglen - 8; /* jmpl; mov r_A,%o0; */
+		if (seen_or_pass0 & SEEN_MEM)
+			cleanup_addr -= 4; /* add %sp, X, %sp; */
+
+		if (image) {
+			if (proglen != oldproglen)
+				pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n",
+				       proglen, oldproglen);
+			break;
+		}
+		if (proglen == oldproglen) {
+			image = module_alloc(proglen);
+			if (!image)
+				goto out;
+		}
+		oldproglen = proglen;
+	}
+
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(flen, proglen, pass + 1, image);
+
+	if (image) {
+		fp->bpf_func = (void *)image;
+		fp->jited = 1;
+	}
+out:
+	kfree(addrs);
+	return;
+}
+
+void bpf_jit_free(struct bpf_prog *fp)
+{
+	if (fp->jited)
+		module_memfree(fp->bpf_func);
+
+	bpf_prog_unlock_free(fp);
+}
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
new file mode 100644
index 0000000..222785a
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -0,0 +1,1549 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/moduleloader.h>
+#include <linux/workqueue.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/cache.h>
+#include <linux/if_vlan.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ptrace.h>
+
+#include "bpf_jit_64.h"
+
+static inline bool is_simm13(unsigned int value)
+{
+	return value + 0x1000 < 0x2000;
+}
+
+static inline bool is_simm10(unsigned int value)
+{
+	return value + 0x200 < 0x400;
+}
+
+static inline bool is_simm5(unsigned int value)
+{
+	return value + 0x10 < 0x20;
+}
+
+static inline bool is_sethi(unsigned int value)
+{
+	return (value & ~0x3fffff) == 0;
+}
+
+static void bpf_flush_icache(void *start_, void *end_)
+{
+	/* Cheetah's I-cache is fully coherent.  */
+	if (tlb_type == spitfire) {
+		unsigned long start = (unsigned long) start_;
+		unsigned long end = (unsigned long) end_;
+
+		start &= ~7UL;
+		end = (end + 7UL) & ~7UL;
+		while (start < end) {
+			flushi(start);
+			start += 32;
+		}
+	}
+}
+
+#define S13(X)		((X) & 0x1fff)
+#define S5(X)		((X) & 0x1f)
+#define IMMED		0x00002000
+#define RD(X)		((X) << 25)
+#define RS1(X)		((X) << 14)
+#define RS2(X)		((X))
+#define OP(X)		((X) << 30)
+#define OP2(X)		((X) << 22)
+#define OP3(X)		((X) << 19)
+#define COND(X)		(((X) & 0xf) << 25)
+#define CBCOND(X)	(((X) & 0x1f) << 25)
+#define F1(X)		OP(X)
+#define F2(X, Y)	(OP(X) | OP2(Y))
+#define F3(X, Y)	(OP(X) | OP3(Y))
+#define ASI(X)		(((X) & 0xff) << 5)
+
+#define CONDN		COND(0x0)
+#define CONDE		COND(0x1)
+#define CONDLE		COND(0x2)
+#define CONDL		COND(0x3)
+#define CONDLEU		COND(0x4)
+#define CONDCS		COND(0x5)
+#define CONDNEG		COND(0x6)
+#define CONDVC		COND(0x7)
+#define CONDA		COND(0x8)
+#define CONDNE		COND(0x9)
+#define CONDG		COND(0xa)
+#define CONDGE		COND(0xb)
+#define CONDGU		COND(0xc)
+#define CONDCC		COND(0xd)
+#define CONDPOS		COND(0xe)
+#define CONDVS		COND(0xf)
+
+#define CONDGEU		CONDCC
+#define CONDLU		CONDCS
+
+#define WDISP22(X)	(((X) >> 2) & 0x3fffff)
+#define WDISP19(X)	(((X) >> 2) & 0x7ffff)
+
+/* The 10-bit branch displacement for CBCOND is split into two fields */
+static u32 WDISP10(u32 off)
+{
+	u32 ret = ((off >> 2) & 0xff) << 5;
+
+	ret |= ((off >> (2 + 8)) & 0x03) << 19;
+
+	return ret;
+}
+
+#define CBCONDE		CBCOND(0x09)
+#define CBCONDLE	CBCOND(0x0a)
+#define CBCONDL		CBCOND(0x0b)
+#define CBCONDLEU	CBCOND(0x0c)
+#define CBCONDCS	CBCOND(0x0d)
+#define CBCONDN		CBCOND(0x0e)
+#define CBCONDVS	CBCOND(0x0f)
+#define CBCONDNE	CBCOND(0x19)
+#define CBCONDG		CBCOND(0x1a)
+#define CBCONDGE	CBCOND(0x1b)
+#define CBCONDGU	CBCOND(0x1c)
+#define CBCONDCC	CBCOND(0x1d)
+#define CBCONDPOS	CBCOND(0x1e)
+#define CBCONDVC	CBCOND(0x1f)
+
+#define CBCONDGEU	CBCONDCC
+#define CBCONDLU	CBCONDCS
+
+#define ANNUL		(1 << 29)
+#define XCC		(1 << 21)
+
+#define BRANCH		(F2(0, 1) | XCC)
+#define CBCOND_OP	(F2(0, 3) | XCC)
+
+#define BA		(BRANCH | CONDA)
+#define BG		(BRANCH | CONDG)
+#define BL		(BRANCH | CONDL)
+#define BLE		(BRANCH | CONDLE)
+#define BGU		(BRANCH | CONDGU)
+#define BLEU		(BRANCH | CONDLEU)
+#define BGE		(BRANCH | CONDGE)
+#define BGEU		(BRANCH | CONDGEU)
+#define BLU		(BRANCH | CONDLU)
+#define BE		(BRANCH | CONDE)
+#define BNE		(BRANCH | CONDNE)
+
+#define SETHI(K, REG)	\
+	(F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
+#define OR_LO(K, REG)	\
+	(F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
+
+#define ADD		F3(2, 0x00)
+#define AND		F3(2, 0x01)
+#define ANDCC		F3(2, 0x11)
+#define OR		F3(2, 0x02)
+#define XOR		F3(2, 0x03)
+#define SUB		F3(2, 0x04)
+#define SUBCC		F3(2, 0x14)
+#define MUL		F3(2, 0x0a)
+#define MULX		F3(2, 0x09)
+#define UDIVX		F3(2, 0x0d)
+#define DIV		F3(2, 0x0e)
+#define SLL		F3(2, 0x25)
+#define SLLX		(F3(2, 0x25)|(1<<12))
+#define SRA		F3(2, 0x27)
+#define SRAX		(F3(2, 0x27)|(1<<12))
+#define SRL		F3(2, 0x26)
+#define SRLX		(F3(2, 0x26)|(1<<12))
+#define JMPL		F3(2, 0x38)
+#define SAVE		F3(2, 0x3c)
+#define RESTORE		F3(2, 0x3d)
+#define CALL		F1(1)
+#define BR		F2(0, 0x01)
+#define RD_Y		F3(2, 0x28)
+#define WR_Y		F3(2, 0x30)
+
+#define LD32		F3(3, 0x00)
+#define LD8		F3(3, 0x01)
+#define LD16		F3(3, 0x02)
+#define LD64		F3(3, 0x0b)
+#define LD64A		F3(3, 0x1b)
+#define ST8		F3(3, 0x05)
+#define ST16		F3(3, 0x06)
+#define ST32		F3(3, 0x04)
+#define ST64		F3(3, 0x0e)
+
+#define CAS		F3(3, 0x3c)
+#define CASX		F3(3, 0x3e)
+
+#define LDPTR		LD64
+#define BASE_STACKFRAME	176
+
+#define LD32I		(LD32 | IMMED)
+#define LD8I		(LD8 | IMMED)
+#define LD16I		(LD16 | IMMED)
+#define LD64I		(LD64 | IMMED)
+#define LDPTRI		(LDPTR | IMMED)
+#define ST32I		(ST32 | IMMED)
+
+struct jit_ctx {
+	struct bpf_prog		*prog;
+	unsigned int		*offset;
+	int			idx;
+	int			epilogue_offset;
+	bool 			tmp_1_used;
+	bool 			tmp_2_used;
+	bool 			tmp_3_used;
+	bool			saw_frame_pointer;
+	bool			saw_call;
+	bool			saw_tail_call;
+	u32			*image;
+};
+
+#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
+#define TMP_REG_3	(MAX_BPF_JIT_REG + 2)
+
+/* Map BPF registers to SPARC registers */
+static const int bpf2sparc[] = {
+	/* return value from in-kernel function, and exit value from eBPF */
+	[BPF_REG_0] = O5,
+
+	/* arguments from eBPF program to in-kernel function */
+	[BPF_REG_1] = O0,
+	[BPF_REG_2] = O1,
+	[BPF_REG_3] = O2,
+	[BPF_REG_4] = O3,
+	[BPF_REG_5] = O4,
+
+	/* callee saved registers that in-kernel function will preserve */
+	[BPF_REG_6] = L0,
+	[BPF_REG_7] = L1,
+	[BPF_REG_8] = L2,
+	[BPF_REG_9] = L3,
+
+	/* read-only frame pointer to access stack */
+	[BPF_REG_FP] = L6,
+
+	[BPF_REG_AX] = G7,
+
+	/* temporary register for internal BPF JIT */
+	[TMP_REG_1] = G1,
+	[TMP_REG_2] = G2,
+	[TMP_REG_3] = G3,
+};
+
+static void emit(const u32 insn, struct jit_ctx *ctx)
+{
+	if (ctx->image != NULL)
+		ctx->image[ctx->idx] = insn;
+
+	ctx->idx++;
+}
+
+static void emit_call(u32 *func, struct jit_ctx *ctx)
+{
+	if (ctx->image != NULL) {
+		void *here = &ctx->image[ctx->idx];
+		unsigned int off;
+
+		off = (void *)func - here;
+		ctx->image[ctx->idx] = CALL | ((off >> 2) & 0x3fffffff);
+	}
+	ctx->idx++;
+}
+
+static void emit_nop(struct jit_ctx *ctx)
+{
+	emit(SETHI(0, G0), ctx);
+}
+
+static void emit_reg_move(u32 from, u32 to, struct jit_ctx *ctx)
+{
+	emit(OR | RS1(G0) | RS2(from) | RD(to), ctx);
+}
+
+/* Emit 32-bit constant, zero extended. */
+static void emit_set_const(s32 K, u32 reg, struct jit_ctx *ctx)
+{
+	emit(SETHI(K, reg), ctx);
+	emit(OR_LO(K, reg), ctx);
+}
+
+/* Emit 32-bit constant, sign extended. */
+static void emit_set_const_sext(s32 K, u32 reg, struct jit_ctx *ctx)
+{
+	if (K >= 0) {
+		emit(SETHI(K, reg), ctx);
+		emit(OR_LO(K, reg), ctx);
+	} else {
+		u32 hbits = ~(u32) K;
+		u32 lbits = -0x400 | (u32) K;
+
+		emit(SETHI(hbits, reg), ctx);
+		emit(XOR | IMMED | RS1(reg) | S13(lbits) | RD(reg), ctx);
+	}
+}
+
+static void emit_alu(u32 opcode, u32 src, u32 dst, struct jit_ctx *ctx)
+{
+	emit(opcode | RS1(dst) | RS2(src) | RD(dst), ctx);
+}
+
+static void emit_alu3(u32 opcode, u32 a, u32 b, u32 c, struct jit_ctx *ctx)
+{
+	emit(opcode | RS1(a) | RS2(b) | RD(c), ctx);
+}
+
+static void emit_alu_K(unsigned int opcode, unsigned int dst, unsigned int imm,
+		       struct jit_ctx *ctx)
+{
+	bool small_immed = is_simm13(imm);
+	unsigned int insn = opcode;
+
+	insn |= RS1(dst) | RD(dst);
+	if (small_immed) {
+		emit(insn | IMMED | S13(imm), ctx);
+	} else {
+		unsigned int tmp = bpf2sparc[TMP_REG_1];
+
+		ctx->tmp_1_used = true;
+
+		emit_set_const_sext(imm, tmp, ctx);
+		emit(insn | RS2(tmp), ctx);
+	}
+}
+
+static void emit_alu3_K(unsigned int opcode, unsigned int src, unsigned int imm,
+			unsigned int dst, struct jit_ctx *ctx)
+{
+	bool small_immed = is_simm13(imm);
+	unsigned int insn = opcode;
+
+	insn |= RS1(src) | RD(dst);
+	if (small_immed) {
+		emit(insn | IMMED | S13(imm), ctx);
+	} else {
+		unsigned int tmp = bpf2sparc[TMP_REG_1];
+
+		ctx->tmp_1_used = true;
+
+		emit_set_const_sext(imm, tmp, ctx);
+		emit(insn | RS2(tmp), ctx);
+	}
+}
+
+static void emit_loadimm32(s32 K, unsigned int dest, struct jit_ctx *ctx)
+{
+	if (K >= 0 && is_simm13(K)) {
+		/* or %g0, K, DEST */
+		emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
+	} else {
+		emit_set_const(K, dest, ctx);
+	}
+}
+
+static void emit_loadimm(s32 K, unsigned int dest, struct jit_ctx *ctx)
+{
+	if (is_simm13(K)) {
+		/* or %g0, K, DEST */
+		emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
+	} else {
+		emit_set_const(K, dest, ctx);
+	}
+}
+
+static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
+{
+	if (is_simm13(K)) {
+		/* or %g0, K, DEST */
+		emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
+	} else {
+		emit_set_const_sext(K, dest, ctx);
+	}
+}
+
+static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
+				   int *hbsp, int *lbsp, int *abbasp)
+{
+	int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+	int i;
+
+	lowest_bit_set = highest_bit_set = -1;
+	i = 0;
+	do {
+		if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
+			lowest_bit_set = i;
+		if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
+			highest_bit_set = (64 - i - 1);
+	}  while (++i < 32 && (highest_bit_set == -1 ||
+			       lowest_bit_set == -1));
+	if (i == 32) {
+		i = 0;
+		do {
+			if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
+				lowest_bit_set = i + 32;
+			if (highest_bit_set == -1 &&
+			    ((low_bits >> (32 - i - 1)) & 1))
+				highest_bit_set = 32 - i - 1;
+		} while (++i < 32 && (highest_bit_set == -1 ||
+				      lowest_bit_set == -1));
+	}
+
+	all_bits_between_are_set = 1;
+	for (i = lowest_bit_set; i <= highest_bit_set; i++) {
+		if (i < 32) {
+			if ((low_bits & (1 << i)) != 0)
+				continue;
+		} else {
+			if ((high_bits & (1 << (i - 32))) != 0)
+				continue;
+		}
+		all_bits_between_are_set = 0;
+		break;
+	}
+	*hbsp = highest_bit_set;
+	*lbsp = lowest_bit_set;
+	*abbasp = all_bits_between_are_set;
+}
+
+static unsigned long create_simple_focus_bits(unsigned long high_bits,
+					      unsigned long low_bits,
+					      int lowest_bit_set, int shift)
+{
+	long hi, lo;
+
+	if (lowest_bit_set < 32) {
+		lo = (low_bits >> lowest_bit_set) << shift;
+		hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+	} else {
+		lo = 0;
+		hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+	}
+	return hi | lo;
+}
+
+static bool const64_is_2insns(unsigned long high_bits,
+			      unsigned long low_bits)
+{
+	int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+	if (high_bits == 0 || high_bits == 0xffffffff)
+		return true;
+
+	analyze_64bit_constant(high_bits, low_bits,
+			       &highest_bit_set, &lowest_bit_set,
+			       &all_bits_between_are_set);
+
+	if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
+	    all_bits_between_are_set != 0)
+		return true;
+
+	if (highest_bit_set - lowest_bit_set < 21)
+		return true;
+
+	return false;
+}
+
+static void sparc_emit_set_const64_quick2(unsigned long high_bits,
+					  unsigned long low_imm,
+					  unsigned int dest,
+					  int shift_count, struct jit_ctx *ctx)
+{
+	emit_loadimm32(high_bits, dest, ctx);
+
+	/* Now shift it up into place.  */
+	emit_alu_K(SLLX, dest, shift_count, ctx);
+
+	/* If there is a low immediate part piece, finish up by
+	 * putting that in as well.
+	 */
+	if (low_imm != 0)
+		emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
+}
+
+static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
+{
+	int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
+	unsigned int tmp = bpf2sparc[TMP_REG_1];
+	u32 low_bits = (K & 0xffffffff);
+	u32 high_bits = (K >> 32);
+
+	/* These two tests also take care of all of the one
+	 * instruction cases.
+	 */
+	if (high_bits == 0xffffffff && (low_bits & 0x80000000))
+		return emit_loadimm_sext(K, dest, ctx);
+	if (high_bits == 0x00000000)
+		return emit_loadimm32(K, dest, ctx);
+
+	analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
+			       &lowest_bit_set, &all_bits_between_are_set);
+
+	/* 1) mov	-1, %reg
+	 *    sllx	%reg, shift, %reg
+	 * 2) mov	-1, %reg
+	 *    srlx	%reg, shift, %reg
+	 * 3) mov	some_small_const, %reg
+	 *    sllx	%reg, shift, %reg
+	 */
+	if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
+	     all_bits_between_are_set != 0) ||
+	    ((highest_bit_set - lowest_bit_set) < 12)) {
+		int shift = lowest_bit_set;
+		long the_const = -1;
+
+		if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
+		    all_bits_between_are_set == 0) {
+			the_const =
+				create_simple_focus_bits(high_bits, low_bits,
+							 lowest_bit_set, 0);
+		} else if (lowest_bit_set == 0)
+			shift = -(63 - highest_bit_set);
+
+		emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
+		if (shift > 0)
+			emit_alu_K(SLLX, dest, shift, ctx);
+		else if (shift < 0)
+			emit_alu_K(SRLX, dest, -shift, ctx);
+
+		return;
+	}
+
+	/* Now a range of 22 or less bits set somewhere.
+	 * 1) sethi	%hi(focus_bits), %reg
+	 *    sllx	%reg, shift, %reg
+	 * 2) sethi	%hi(focus_bits), %reg
+	 *    srlx	%reg, shift, %reg
+	 */
+	if ((highest_bit_set - lowest_bit_set) < 21) {
+		unsigned long focus_bits =
+			create_simple_focus_bits(high_bits, low_bits,
+						 lowest_bit_set, 10);
+
+		emit(SETHI(focus_bits, dest), ctx);
+
+		/* If lowest_bit_set == 10 then a sethi alone could
+		 * have done it.
+		 */
+		if (lowest_bit_set < 10)
+			emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
+		else if (lowest_bit_set > 10)
+			emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
+		return;
+	}
+
+	/* Ok, now 3 instruction sequences.  */
+	if (low_bits == 0) {
+		emit_loadimm32(high_bits, dest, ctx);
+		emit_alu_K(SLLX, dest, 32, ctx);
+		return;
+	}
+
+	/* We may be able to do something quick
+	 * when the constant is negated, so try that.
+	 */
+	if (const64_is_2insns((~high_bits) & 0xffffffff,
+			      (~low_bits) & 0xfffffc00)) {
+		/* NOTE: The trailing bits get XOR'd so we need the
+		 * non-negated bits, not the negated ones.
+		 */
+		unsigned long trailing_bits = low_bits & 0x3ff;
+
+		if ((((~high_bits) & 0xffffffff) == 0 &&
+		     ((~low_bits) & 0x80000000) == 0) ||
+		    (((~high_bits) & 0xffffffff) == 0xffffffff &&
+		     ((~low_bits) & 0x80000000) != 0)) {
+			unsigned long fast_int = (~low_bits & 0xffffffff);
+
+			if ((is_sethi(fast_int) &&
+			     (~high_bits & 0xffffffff) == 0)) {
+				emit(SETHI(fast_int, dest), ctx);
+			} else if (is_simm13(fast_int)) {
+				emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
+			} else {
+				emit_loadimm64(fast_int, dest, ctx);
+			}
+		} else {
+			u64 n = ((~low_bits) & 0xfffffc00) |
+				(((unsigned long)((~high_bits) & 0xffffffff))<<32);
+			emit_loadimm64(n, dest, ctx);
+		}
+
+		low_bits = -0x400 | trailing_bits;
+
+		emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
+		return;
+	}
+
+	/* 1) sethi	%hi(xxx), %reg
+	 *    or	%reg, %lo(xxx), %reg
+	 *    sllx	%reg, yyy, %reg
+	 */
+	if ((highest_bit_set - lowest_bit_set) < 32) {
+		unsigned long focus_bits =
+			create_simple_focus_bits(high_bits, low_bits,
+						 lowest_bit_set, 0);
+
+		/* So what we know is that the set bits straddle the
+		 * middle of the 64-bit word.
+		 */
+		sparc_emit_set_const64_quick2(focus_bits, 0, dest,
+					      lowest_bit_set, ctx);
+		return;
+	}
+
+	/* 1) sethi	%hi(high_bits), %reg
+	 *    or	%reg, %lo(high_bits), %reg
+	 *    sllx	%reg, 32, %reg
+	 *    or	%reg, low_bits, %reg
+	 */
+	if (is_simm13(low_bits) && ((int)low_bits > 0)) {
+		sparc_emit_set_const64_quick2(high_bits, low_bits,
+					      dest, 32, ctx);
+		return;
+	}
+
+	/* Oh well, we tried... Do a full 64-bit decomposition.  */
+	ctx->tmp_1_used = true;
+
+	emit_loadimm32(high_bits, tmp, ctx);
+	emit_loadimm32(low_bits, dest, ctx);
+	emit_alu_K(SLLX, tmp, 32, ctx);
+	emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
+}
+
+static void emit_branch(unsigned int br_opc, unsigned int from_idx, unsigned int to_idx,
+			struct jit_ctx *ctx)
+{
+	unsigned int off = to_idx - from_idx;
+
+	if (br_opc & XCC)
+		emit(br_opc | WDISP19(off << 2), ctx);
+	else
+		emit(br_opc | WDISP22(off << 2), ctx);
+}
+
+static void emit_cbcond(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
+			const u8 dst, const u8 src, struct jit_ctx *ctx)
+{
+	unsigned int off = to_idx - from_idx;
+
+	emit(cb_opc | WDISP10(off << 2) | RS1(dst) | RS2(src), ctx);
+}
+
+static void emit_cbcondi(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
+			 const u8 dst, s32 imm, struct jit_ctx *ctx)
+{
+	unsigned int off = to_idx - from_idx;
+
+	emit(cb_opc | IMMED | WDISP10(off << 2) | RS1(dst) | S5(imm), ctx);
+}
+
+#define emit_read_y(REG, CTX)	emit(RD_Y | RD(REG), CTX)
+#define emit_write_y(REG, CTX)	emit(WR_Y | IMMED | RS1(REG) | S13(0), CTX)
+
+#define emit_cmp(R1, R2, CTX)				\
+	emit(SUBCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
+
+#define emit_cmpi(R1, IMM, CTX)				\
+	emit(SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
+
+#define emit_btst(R1, R2, CTX)				\
+	emit(ANDCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
+
+#define emit_btsti(R1, IMM, CTX)			\
+	emit(ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
+
+static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
+				   const s32 imm, bool is_imm, int branch_dst,
+				   struct jit_ctx *ctx)
+{
+	bool use_cbcond = (sparc64_elf_hwcap & AV_SPARC_CBCOND) != 0;
+	const u8 tmp = bpf2sparc[TMP_REG_1];
+
+	branch_dst = ctx->offset[branch_dst];
+
+	if (!is_simm10(branch_dst - ctx->idx) ||
+	    BPF_OP(code) == BPF_JSET)
+		use_cbcond = false;
+
+	if (is_imm) {
+		bool fits = true;
+
+		if (use_cbcond) {
+			if (!is_simm5(imm))
+				fits = false;
+		} else if (!is_simm13(imm)) {
+			fits = false;
+		}
+		if (!fits) {
+			ctx->tmp_1_used = true;
+			emit_loadimm_sext(imm, tmp, ctx);
+			src = tmp;
+			is_imm = false;
+		}
+	}
+
+	if (!use_cbcond) {
+		u32 br_opcode;
+
+		if (BPF_OP(code) == BPF_JSET) {
+			if (is_imm)
+				emit_btsti(dst, imm, ctx);
+			else
+				emit_btst(dst, src, ctx);
+		} else {
+			if (is_imm)
+				emit_cmpi(dst, imm, ctx);
+			else
+				emit_cmp(dst, src, ctx);
+		}
+		switch (BPF_OP(code)) {
+		case BPF_JEQ:
+			br_opcode = BE;
+			break;
+		case BPF_JGT:
+			br_opcode = BGU;
+			break;
+		case BPF_JLT:
+			br_opcode = BLU;
+			break;
+		case BPF_JGE:
+			br_opcode = BGEU;
+			break;
+		case BPF_JLE:
+			br_opcode = BLEU;
+			break;
+		case BPF_JSET:
+		case BPF_JNE:
+			br_opcode = BNE;
+			break;
+		case BPF_JSGT:
+			br_opcode = BG;
+			break;
+		case BPF_JSLT:
+			br_opcode = BL;
+			break;
+		case BPF_JSGE:
+			br_opcode = BGE;
+			break;
+		case BPF_JSLE:
+			br_opcode = BLE;
+			break;
+		default:
+			/* Make sure we dont leak kernel information to the
+			 * user.
+			 */
+			return -EFAULT;
+		}
+		emit_branch(br_opcode, ctx->idx, branch_dst, ctx);
+		emit_nop(ctx);
+	} else {
+		u32 cbcond_opcode;
+
+		switch (BPF_OP(code)) {
+		case BPF_JEQ:
+			cbcond_opcode = CBCONDE;
+			break;
+		case BPF_JGT:
+			cbcond_opcode = CBCONDGU;
+			break;
+		case BPF_JLT:
+			cbcond_opcode = CBCONDLU;
+			break;
+		case BPF_JGE:
+			cbcond_opcode = CBCONDGEU;
+			break;
+		case BPF_JLE:
+			cbcond_opcode = CBCONDLEU;
+			break;
+		case BPF_JNE:
+			cbcond_opcode = CBCONDNE;
+			break;
+		case BPF_JSGT:
+			cbcond_opcode = CBCONDG;
+			break;
+		case BPF_JSLT:
+			cbcond_opcode = CBCONDL;
+			break;
+		case BPF_JSGE:
+			cbcond_opcode = CBCONDGE;
+			break;
+		case BPF_JSLE:
+			cbcond_opcode = CBCONDLE;
+			break;
+		default:
+			/* Make sure we dont leak kernel information to the
+			 * user.
+			 */
+			return -EFAULT;
+		}
+		cbcond_opcode |= CBCOND_OP;
+		if (is_imm)
+			emit_cbcondi(cbcond_opcode, ctx->idx, branch_dst,
+				     dst, imm, ctx);
+		else
+			emit_cbcond(cbcond_opcode, ctx->idx, branch_dst,
+				    dst, src, ctx);
+	}
+	return 0;
+}
+
+/* Just skip the save instruction and the ctx register move.  */
+#define BPF_TAILCALL_PROLOGUE_SKIP	16
+#define BPF_TAILCALL_CNT_SP_OFF		(STACK_BIAS + 128)
+
+static void build_prologue(struct jit_ctx *ctx)
+{
+	s32 stack_needed = BASE_STACKFRAME;
+
+	if (ctx->saw_frame_pointer || ctx->saw_tail_call) {
+		struct bpf_prog *prog = ctx->prog;
+		u32 stack_depth;
+
+		stack_depth = prog->aux->stack_depth;
+		stack_needed += round_up(stack_depth, 16);
+	}
+
+	if (ctx->saw_tail_call)
+		stack_needed += 8;
+
+	/* save %sp, -176, %sp */
+	emit(SAVE | IMMED | RS1(SP) | S13(-stack_needed) | RD(SP), ctx);
+
+	/* tail_call_cnt = 0 */
+	if (ctx->saw_tail_call) {
+		u32 off = BPF_TAILCALL_CNT_SP_OFF;
+
+		emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(G0), ctx);
+	} else {
+		emit_nop(ctx);
+	}
+	if (ctx->saw_frame_pointer) {
+		const u8 vfp = bpf2sparc[BPF_REG_FP];
+
+		emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx);
+	}
+
+	emit_reg_move(I0, O0, ctx);
+	/* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
+}
+
+static void build_epilogue(struct jit_ctx *ctx)
+{
+	ctx->epilogue_offset = ctx->idx;
+
+	/* ret (jmpl %i7 + 8, %g0) */
+	emit(JMPL | IMMED | RS1(I7) | S13(8) | RD(G0), ctx);
+
+	/* restore %i5, %g0, %o0 */
+	emit(RESTORE | RS1(bpf2sparc[BPF_REG_0]) | RS2(G0) | RD(O0), ctx);
+}
+
+static void emit_tail_call(struct jit_ctx *ctx)
+{
+	const u8 bpf_array = bpf2sparc[BPF_REG_2];
+	const u8 bpf_index = bpf2sparc[BPF_REG_3];
+	const u8 tmp = bpf2sparc[TMP_REG_1];
+	u32 off;
+
+	ctx->saw_tail_call = true;
+
+	off = offsetof(struct bpf_array, map.max_entries);
+	emit(LD32 | IMMED | RS1(bpf_array) | S13(off) | RD(tmp), ctx);
+	emit_cmp(bpf_index, tmp, ctx);
+#define OFFSET1 17
+	emit_branch(BGEU, ctx->idx, ctx->idx + OFFSET1, ctx);
+	emit_nop(ctx);
+
+	off = BPF_TAILCALL_CNT_SP_OFF;
+	emit(LD32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx);
+	emit_cmpi(tmp, MAX_TAIL_CALL_CNT, ctx);
+#define OFFSET2 13
+	emit_branch(BGU, ctx->idx, ctx->idx + OFFSET2, ctx);
+	emit_nop(ctx);
+
+	emit_alu_K(ADD, tmp, 1, ctx);
+	off = BPF_TAILCALL_CNT_SP_OFF;
+	emit(ST32 | IMMED | RS1(SP) | S13(off) | RD(tmp), ctx);
+
+	emit_alu3_K(SLL, bpf_index, 3, tmp, ctx);
+	emit_alu(ADD, bpf_array, tmp, ctx);
+	off = offsetof(struct bpf_array, ptrs);
+	emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx);
+
+	emit_cmpi(tmp, 0, ctx);
+#define OFFSET3 5
+	emit_branch(BE, ctx->idx, ctx->idx + OFFSET3, ctx);
+	emit_nop(ctx);
+
+	off = offsetof(struct bpf_prog, bpf_func);
+	emit(LD64 | IMMED | RS1(tmp) | S13(off) | RD(tmp), ctx);
+
+	off = BPF_TAILCALL_PROLOGUE_SKIP;
+	emit(JMPL | IMMED | RS1(tmp) | S13(off) | RD(G0), ctx);
+	emit_nop(ctx);
+}
+
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+{
+	const u8 code = insn->code;
+	const u8 dst = bpf2sparc[insn->dst_reg];
+	const u8 src = bpf2sparc[insn->src_reg];
+	const int i = insn - ctx->prog->insnsi;
+	const s16 off = insn->off;
+	const s32 imm = insn->imm;
+
+	if (insn->src_reg == BPF_REG_FP)
+		ctx->saw_frame_pointer = true;
+
+	switch (code) {
+	/* dst = src */
+	case BPF_ALU | BPF_MOV | BPF_X:
+		emit_alu3_K(SRL, src, 0, dst, ctx);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_X:
+		emit_reg_move(src, dst, ctx);
+		break;
+	/* dst = dst OP src */
+	case BPF_ALU | BPF_ADD | BPF_X:
+	case BPF_ALU64 | BPF_ADD | BPF_X:
+		emit_alu(ADD, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_SUB | BPF_X:
+	case BPF_ALU64 | BPF_SUB | BPF_X:
+		emit_alu(SUB, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_AND | BPF_X:
+	case BPF_ALU64 | BPF_AND | BPF_X:
+		emit_alu(AND, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_OR | BPF_X:
+	case BPF_ALU64 | BPF_OR | BPF_X:
+		emit_alu(OR, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_XOR | BPF_X:
+	case BPF_ALU64 | BPF_XOR | BPF_X:
+		emit_alu(XOR, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_MUL | BPF_X:
+		emit_alu(MUL, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU64 | BPF_MUL | BPF_X:
+		emit_alu(MULX, src, dst, ctx);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_X:
+		emit_write_y(G0, ctx);
+		emit_alu(DIV, src, dst, ctx);
+		break;
+	case BPF_ALU64 | BPF_DIV | BPF_X:
+		emit_alu(UDIVX, src, dst, ctx);
+		break;
+	case BPF_ALU | BPF_MOD | BPF_X: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+
+		ctx->tmp_1_used = true;
+
+		emit_write_y(G0, ctx);
+		emit_alu3(DIV, dst, src, tmp, ctx);
+		emit_alu3(MULX, tmp, src, tmp, ctx);
+		emit_alu3(SUB, dst, tmp, dst, ctx);
+		goto do_alu32_trunc;
+	}
+	case BPF_ALU64 | BPF_MOD | BPF_X: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+
+		ctx->tmp_1_used = true;
+
+		emit_alu3(UDIVX, dst, src, tmp, ctx);
+		emit_alu3(MULX, tmp, src, tmp, ctx);
+		emit_alu3(SUB, dst, tmp, dst, ctx);
+		break;
+	}
+	case BPF_ALU | BPF_LSH | BPF_X:
+		emit_alu(SLL, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU64 | BPF_LSH | BPF_X:
+		emit_alu(SLLX, src, dst, ctx);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_X:
+		emit_alu(SRL, src, dst, ctx);
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_X:
+		emit_alu(SRLX, src, dst, ctx);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_X:
+		emit_alu(SRA, src, dst, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU64 | BPF_ARSH | BPF_X:
+		emit_alu(SRAX, src, dst, ctx);
+		break;
+
+	/* dst = -dst */
+	case BPF_ALU | BPF_NEG:
+	case BPF_ALU64 | BPF_NEG:
+		emit(SUB | RS1(0) | RS2(dst) | RD(dst), ctx);
+		goto do_alu32_trunc;
+
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+		switch (imm) {
+		case 16:
+			emit_alu_K(SLL, dst, 16, ctx);
+			emit_alu_K(SRL, dst, 16, ctx);
+			break;
+		case 32:
+			emit_alu_K(SRL, dst, 0, ctx);
+			break;
+		case 64:
+			/* nop */
+			break;
+
+		}
+		break;
+
+	/* dst = BSWAP##imm(dst) */
+	case BPF_ALU | BPF_END | BPF_FROM_LE: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+		const u8 tmp2 = bpf2sparc[TMP_REG_2];
+
+		ctx->tmp_1_used = true;
+		switch (imm) {
+		case 16:
+			emit_alu3_K(AND, dst, 0xff, tmp, ctx);
+			emit_alu3_K(SRL, dst, 8, dst, ctx);
+			emit_alu3_K(AND, dst, 0xff, dst, ctx);
+			emit_alu3_K(SLL, tmp, 8, tmp, ctx);
+			emit_alu(OR, tmp, dst, ctx);
+			break;
+
+		case 32:
+			ctx->tmp_2_used = true;
+			emit_alu3_K(SRL, dst, 24, tmp, ctx);	/* tmp  = dst >> 24 */
+			emit_alu3_K(SRL, dst, 16, tmp2, ctx);	/* tmp2 = dst >> 16 */
+			emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */
+			emit_alu3_K(SLL, tmp2, 8, tmp2, ctx);	/* tmp2 = tmp2 << 8 */
+			emit_alu(OR, tmp2, tmp, ctx);		/* tmp  = tmp | tmp2 */
+			emit_alu3_K(SRL, dst, 8, tmp2, ctx);	/* tmp2 = dst >> 8 */
+			emit_alu3_K(AND, tmp2, 0xff, tmp2, ctx);/* tmp2 = tmp2 & 0xff */
+			emit_alu3_K(SLL, tmp2, 16, tmp2, ctx);	/* tmp2 = tmp2 << 16 */
+			emit_alu(OR, tmp2, tmp, ctx);		/* tmp  = tmp | tmp2 */
+			emit_alu3_K(AND, dst, 0xff, dst, ctx);	/* dst	= dst & 0xff */
+			emit_alu3_K(SLL, dst, 24, dst, ctx);	/* dst  = dst << 24 */
+			emit_alu(OR, tmp, dst, ctx);		/* dst  = dst | tmp */
+			break;
+
+		case 64:
+			emit_alu3_K(ADD, SP, STACK_BIAS + 128, tmp, ctx);
+			emit(ST64 | RS1(tmp) | RS2(G0) | RD(dst), ctx);
+			emit(LD64A | ASI(ASI_PL) | RS1(tmp) | RS2(G0) | RD(dst), ctx);
+			break;
+		}
+		break;
+	}
+	/* dst = imm */
+	case BPF_ALU | BPF_MOV | BPF_K:
+		emit_loadimm32(imm, dst, ctx);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_K:
+		emit_loadimm_sext(imm, dst, ctx);
+		break;
+	/* dst = dst OP imm */
+	case BPF_ALU | BPF_ADD | BPF_K:
+	case BPF_ALU64 | BPF_ADD | BPF_K:
+		emit_alu_K(ADD, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_SUB | BPF_K:
+	case BPF_ALU64 | BPF_SUB | BPF_K:
+		emit_alu_K(SUB, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_AND | BPF_K:
+	case BPF_ALU64 | BPF_AND | BPF_K:
+		emit_alu_K(AND, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_OR | BPF_K:
+	case BPF_ALU64 | BPF_OR | BPF_K:
+		emit_alu_K(OR, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_XOR | BPF_K:
+	case BPF_ALU64 | BPF_XOR | BPF_K:
+		emit_alu_K(XOR, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU | BPF_MUL | BPF_K:
+		emit_alu_K(MUL, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU64 | BPF_MUL | BPF_K:
+		emit_alu_K(MULX, dst, imm, ctx);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_K:
+		if (imm == 0)
+			return -EINVAL;
+
+		emit_write_y(G0, ctx);
+		emit_alu_K(DIV, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU64 | BPF_DIV | BPF_K:
+		if (imm == 0)
+			return -EINVAL;
+
+		emit_alu_K(UDIVX, dst, imm, ctx);
+		break;
+	case BPF_ALU64 | BPF_MOD | BPF_K:
+	case BPF_ALU | BPF_MOD | BPF_K: {
+		const u8 tmp = bpf2sparc[TMP_REG_2];
+		unsigned int div;
+
+		if (imm == 0)
+			return -EINVAL;
+
+		div = (BPF_CLASS(code) == BPF_ALU64) ? UDIVX : DIV;
+
+		ctx->tmp_2_used = true;
+
+		if (BPF_CLASS(code) != BPF_ALU64)
+			emit_write_y(G0, ctx);
+		if (is_simm13(imm)) {
+			emit(div | IMMED | RS1(dst) | S13(imm) | RD(tmp), ctx);
+			emit(MULX | IMMED | RS1(tmp) | S13(imm) | RD(tmp), ctx);
+			emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx);
+		} else {
+			const u8 tmp1 = bpf2sparc[TMP_REG_1];
+
+			ctx->tmp_1_used = true;
+
+			emit_set_const_sext(imm, tmp1, ctx);
+			emit(div | RS1(dst) | RS2(tmp1) | RD(tmp), ctx);
+			emit(MULX | RS1(tmp) | RS2(tmp1) | RD(tmp), ctx);
+			emit(SUB | RS1(dst) | RS2(tmp) | RD(dst), ctx);
+		}
+		goto do_alu32_trunc;
+	}
+	case BPF_ALU | BPF_LSH | BPF_K:
+		emit_alu_K(SLL, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU64 | BPF_LSH | BPF_K:
+		emit_alu_K(SLLX, dst, imm, ctx);
+		break;
+	case BPF_ALU | BPF_RSH | BPF_K:
+		emit_alu_K(SRL, dst, imm, ctx);
+		break;
+	case BPF_ALU64 | BPF_RSH | BPF_K:
+		emit_alu_K(SRLX, dst, imm, ctx);
+		break;
+	case BPF_ALU | BPF_ARSH | BPF_K:
+		emit_alu_K(SRA, dst, imm, ctx);
+		goto do_alu32_trunc;
+	case BPF_ALU64 | BPF_ARSH | BPF_K:
+		emit_alu_K(SRAX, dst, imm, ctx);
+		break;
+
+	do_alu32_trunc:
+		if (BPF_CLASS(code) == BPF_ALU)
+			emit_alu_K(SRL, dst, 0, ctx);
+		break;
+
+	/* JUMP off */
+	case BPF_JMP | BPF_JA:
+		emit_branch(BA, ctx->idx, ctx->offset[i + off], ctx);
+		emit_nop(ctx);
+		break;
+	/* IF (dst COND src) JUMP off */
+	case BPF_JMP | BPF_JEQ | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
+	case BPF_JMP | BPF_JSET | BPF_X: {
+		int err;
+
+		err = emit_compare_and_branch(code, dst, src, 0, false, i + off, ctx);
+		if (err)
+			return err;
+		break;
+	}
+	/* IF (dst COND imm) JUMP off */
+	case BPF_JMP | BPF_JEQ | BPF_K:
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+	case BPF_JMP | BPF_JNE | BPF_K:
+	case BPF_JMP | BPF_JSGT | BPF_K:
+	case BPF_JMP | BPF_JSLT | BPF_K:
+	case BPF_JMP | BPF_JSGE | BPF_K:
+	case BPF_JMP | BPF_JSLE | BPF_K:
+	case BPF_JMP | BPF_JSET | BPF_K: {
+		int err;
+
+		err = emit_compare_and_branch(code, dst, 0, imm, true, i + off, ctx);
+		if (err)
+			return err;
+		break;
+	}
+
+	/* function call */
+	case BPF_JMP | BPF_CALL:
+	{
+		u8 *func = ((u8 *)__bpf_call_base) + imm;
+
+		ctx->saw_call = true;
+
+		emit_call((u32 *)func, ctx);
+		emit_nop(ctx);
+
+		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
+		break;
+	}
+
+	/* tail call */
+	case BPF_JMP | BPF_TAIL_CALL:
+		emit_tail_call(ctx);
+		break;
+
+	/* function return */
+	case BPF_JMP | BPF_EXIT:
+		/* Optimization: when last instruction is EXIT,
+		   simply fallthrough to epilogue. */
+		if (i == ctx->prog->len - 1)
+			break;
+		emit_branch(BA, ctx->idx, ctx->epilogue_offset, ctx);
+		emit_nop(ctx);
+		break;
+
+	/* dst = imm64 */
+	case BPF_LD | BPF_IMM | BPF_DW:
+	{
+		const struct bpf_insn insn1 = insn[1];
+		u64 imm64;
+
+		imm64 = (u64)insn1.imm << 32 | (u32)imm;
+		emit_loadimm64(imm64, dst, ctx);
+
+		return 1;
+	}
+
+	/* LDX: dst = *(size *)(src + off) */
+	case BPF_LDX | BPF_MEM | BPF_W:
+	case BPF_LDX | BPF_MEM | BPF_H:
+	case BPF_LDX | BPF_MEM | BPF_B:
+	case BPF_LDX | BPF_MEM | BPF_DW: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+		u32 opcode = 0, rs2;
+
+		ctx->tmp_1_used = true;
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			opcode = LD32;
+			break;
+		case BPF_H:
+			opcode = LD16;
+			break;
+		case BPF_B:
+			opcode = LD8;
+			break;
+		case BPF_DW:
+			opcode = LD64;
+			break;
+		}
+
+		if (is_simm13(off)) {
+			opcode |= IMMED;
+			rs2 = S13(off);
+		} else {
+			emit_loadimm(off, tmp, ctx);
+			rs2 = RS2(tmp);
+		}
+		emit(opcode | RS1(src) | rs2 | RD(dst), ctx);
+		break;
+	}
+	/* ST: *(size *)(dst + off) = imm */
+	case BPF_ST | BPF_MEM | BPF_W:
+	case BPF_ST | BPF_MEM | BPF_H:
+	case BPF_ST | BPF_MEM | BPF_B:
+	case BPF_ST | BPF_MEM | BPF_DW: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+		const u8 tmp2 = bpf2sparc[TMP_REG_2];
+		u32 opcode = 0, rs2;
+
+		ctx->tmp_2_used = true;
+		emit_loadimm(imm, tmp2, ctx);
+
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			opcode = ST32;
+			break;
+		case BPF_H:
+			opcode = ST16;
+			break;
+		case BPF_B:
+			opcode = ST8;
+			break;
+		case BPF_DW:
+			opcode = ST64;
+			break;
+		}
+
+		if (is_simm13(off)) {
+			opcode |= IMMED;
+			rs2 = S13(off);
+		} else {
+			ctx->tmp_1_used = true;
+			emit_loadimm(off, tmp, ctx);
+			rs2 = RS2(tmp);
+		}
+		emit(opcode | RS1(dst) | rs2 | RD(tmp2), ctx);
+		break;
+	}
+
+	/* STX: *(size *)(dst + off) = src */
+	case BPF_STX | BPF_MEM | BPF_W:
+	case BPF_STX | BPF_MEM | BPF_H:
+	case BPF_STX | BPF_MEM | BPF_B:
+	case BPF_STX | BPF_MEM | BPF_DW: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+		u32 opcode = 0, rs2;
+
+		switch (BPF_SIZE(code)) {
+		case BPF_W:
+			opcode = ST32;
+			break;
+		case BPF_H:
+			opcode = ST16;
+			break;
+		case BPF_B:
+			opcode = ST8;
+			break;
+		case BPF_DW:
+			opcode = ST64;
+			break;
+		}
+		if (is_simm13(off)) {
+			opcode |= IMMED;
+			rs2 = S13(off);
+		} else {
+			ctx->tmp_1_used = true;
+			emit_loadimm(off, tmp, ctx);
+			rs2 = RS2(tmp);
+		}
+		emit(opcode | RS1(dst) | rs2 | RD(src), ctx);
+		break;
+	}
+
+	/* STX XADD: lock *(u32 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_W: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+		const u8 tmp2 = bpf2sparc[TMP_REG_2];
+		const u8 tmp3 = bpf2sparc[TMP_REG_3];
+
+		ctx->tmp_1_used = true;
+		ctx->tmp_2_used = true;
+		ctx->tmp_3_used = true;
+		emit_loadimm(off, tmp, ctx);
+		emit_alu3(ADD, dst, tmp, tmp, ctx);
+
+		emit(LD32 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx);
+		emit_alu3(ADD, tmp2, src, tmp3, ctx);
+		emit(CAS | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx);
+		emit_cmp(tmp2, tmp3, ctx);
+		emit_branch(BNE, 4, 0, ctx);
+		emit_nop(ctx);
+		break;
+	}
+	/* STX XADD: lock *(u64 *)(dst + off) += src */
+	case BPF_STX | BPF_XADD | BPF_DW: {
+		const u8 tmp = bpf2sparc[TMP_REG_1];
+		const u8 tmp2 = bpf2sparc[TMP_REG_2];
+		const u8 tmp3 = bpf2sparc[TMP_REG_3];
+
+		ctx->tmp_1_used = true;
+		ctx->tmp_2_used = true;
+		ctx->tmp_3_used = true;
+		emit_loadimm(off, tmp, ctx);
+		emit_alu3(ADD, dst, tmp, tmp, ctx);
+
+		emit(LD64 | RS1(tmp) | RS2(G0) | RD(tmp2), ctx);
+		emit_alu3(ADD, tmp2, src, tmp3, ctx);
+		emit(CASX | ASI(ASI_P) | RS1(tmp) | RS2(tmp2) | RD(tmp3), ctx);
+		emit_cmp(tmp2, tmp3, ctx);
+		emit_branch(BNE, 4, 0, ctx);
+		emit_nop(ctx);
+		break;
+	}
+
+	default:
+		pr_err_once("unknown opcode %02x\n", code);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int build_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	int i;
+
+	for (i = 0; i < prog->len; i++) {
+		const struct bpf_insn *insn = &prog->insnsi[i];
+		int ret;
+
+		ret = build_insn(insn, ctx);
+
+		if (ret > 0) {
+			i++;
+			ctx->offset[i] = ctx->idx;
+			continue;
+		}
+		ctx->offset[i] = ctx->idx;
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	u32 *ptr;
+	/* We are guaranteed to have aligned memory. */
+	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
+		*ptr++ = 0x91d02005; /* ta 5 */
+}
+
+struct sparc64_jit_data {
+	struct bpf_binary_header *header;
+	u8 *image;
+	struct jit_ctx ctx;
+};
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	struct bpf_prog *tmp, *orig_prog = prog;
+	struct sparc64_jit_data *jit_data;
+	struct bpf_binary_header *header;
+	bool tmp_blinded = false;
+	bool extra_pass = false;
+	struct jit_ctx ctx;
+	u32 image_size;
+	u8 *image_ptr;
+	int pass;
+
+	if (!prog->jit_requested)
+		return orig_prog;
+
+	tmp = bpf_jit_blind_constants(prog);
+	/* If blinding was requested and we failed during blinding,
+	 * we must fall back to the interpreter.
+	 */
+	if (IS_ERR(tmp))
+		return orig_prog;
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
+
+	jit_data = prog->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			prog = orig_prog;
+			goto out;
+		}
+		prog->aux->jit_data = jit_data;
+	}
+	if (jit_data->ctx.offset) {
+		ctx = jit_data->ctx;
+		image_ptr = jit_data->image;
+		header = jit_data->header;
+		extra_pass = true;
+		image_size = sizeof(u32) * ctx.idx;
+		goto skip_init_ctx;
+	}
+
+	memset(&ctx, 0, sizeof(ctx));
+	ctx.prog = prog;
+
+	ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL);
+	if (ctx.offset == NULL) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	/* Fake pass to detect features used, and get an accurate assessment
+	 * of what the final image size will be.
+	 */
+	if (build_body(&ctx)) {
+		prog = orig_prog;
+		goto out_off;
+	}
+	build_prologue(&ctx);
+	build_epilogue(&ctx);
+
+	/* Now we know the actual image size. */
+	image_size = sizeof(u32) * ctx.idx;
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	if (header == NULL) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	ctx.image = (u32 *)image_ptr;
+skip_init_ctx:
+	for (pass = 1; pass < 3; pass++) {
+		ctx.idx = 0;
+
+		build_prologue(&ctx);
+
+		if (build_body(&ctx)) {
+			bpf_jit_binary_free(header);
+			prog = orig_prog;
+			goto out_off;
+		}
+
+		build_epilogue(&ctx);
+
+		if (bpf_jit_enable > 1)
+			pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
+				image_size - (ctx.idx * 4),
+				ctx.tmp_1_used ? '1' : ' ',
+				ctx.tmp_2_used ? '2' : ' ',
+				ctx.tmp_3_used ? '3' : ' ',
+				ctx.saw_frame_pointer ? 'F' : ' ',
+				ctx.saw_call ? 'C' : ' ',
+				ctx.saw_tail_call ? 'T' : ' ');
+	}
+
+	if (bpf_jit_enable > 1)
+		bpf_jit_dump(prog->len, image_size, pass, ctx.image);
+
+	bpf_flush_icache(header, (u8 *)header + (header->pages * PAGE_SIZE));
+
+	if (!prog->is_func || extra_pass) {
+		bpf_jit_binary_lock_ro(header);
+	} else {
+		jit_data->ctx = ctx;
+		jit_data->image = image_ptr;
+		jit_data->header = header;
+	}
+
+	prog->bpf_func = (void *)ctx.image;
+	prog->jited = 1;
+	prog->jited_len = image_size;
+
+	if (!prog->is_func || extra_pass) {
+out_off:
+		kfree(ctx.offset);
+		kfree(jit_data);
+		prog->aux->jit_data = NULL;
+	}
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+					   tmp : orig_prog);
+	return prog;
+}
diff --git a/arch/sparc/oprofile/Makefile b/arch/sparc/oprofile/Makefile
new file mode 100644
index 0000000..e9feca1
--- /dev/null
+++ b/arch/sparc/oprofile/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) init.o
diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c
new file mode 100644
index 0000000..f9024bc
--- /dev/null
+++ b/arch/sparc/oprofile/init.c
@@ -0,0 +1,87 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/param.h>	/* for HZ */
+ 
+#ifdef CONFIG_SPARC64
+#include <linux/notifier.h>
+#include <linux/rcupdate.h>
+#include <linux/kdebug.h>
+#include <asm/nmi.h>
+
+static int profile_timer_exceptions_notify(struct notifier_block *self,
+					   unsigned long val, void *data)
+{
+	struct die_args *args = data;
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_NMI:
+		oprofile_add_sample(args->regs, 0);
+		ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static struct notifier_block profile_timer_exceptions_nb = {
+	.notifier_call	= profile_timer_exceptions_notify,
+};
+
+static int timer_start(void)
+{
+	if (register_die_notifier(&profile_timer_exceptions_nb))
+		return 1;
+	nmi_adjust_hz(HZ);
+	return 0;
+}
+
+
+static void timer_stop(void)
+{
+	nmi_adjust_hz(1);
+	unregister_die_notifier(&profile_timer_exceptions_nb);
+	synchronize_sched();  /* Allow already-started NMIs to complete. */
+}
+
+static int op_nmi_timer_init(struct oprofile_operations *ops)
+{
+	if (atomic_read(&nmi_active) <= 0)
+		return -ENODEV;
+
+	ops->start = timer_start;
+	ops->stop = timer_stop;
+	ops->cpu_type = "timer";
+	printk(KERN_INFO "oprofile: Using perfctr NMI timer interrupt.\n");
+	return 0;
+}
+#endif
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	int ret = -ENODEV;
+
+#ifdef CONFIG_SPARC64
+	ret = op_nmi_timer_init(ops);
+	if (!ret)
+		return ret;
+#endif
+
+	return ret;
+}
+
+void oprofile_arch_exit(void)
+{
+}
diff --git a/arch/sparc/power/Makefile b/arch/sparc/power/Makefile
new file mode 100644
index 0000000..3201ace
--- /dev/null
+++ b/arch/sparc/power/Makefile
@@ -0,0 +1,3 @@
+# Makefile for Sparc-specific hibernate files.
+
+obj-$(CONFIG_HIBERNATION)	+= hibernate.o hibernate_asm.o
diff --git a/arch/sparc/power/hibernate.c b/arch/sparc/power/hibernate.c
new file mode 100644
index 0000000..47b06f4
--- /dev/null
+++ b/arch/sparc/power/hibernate.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hibernate.c:  Hibernaton support specific for sparc64.
+ *
+ * Copyright (C) 2013 Kirill V Tkhai (tkhai@yandex.ru)
+ */
+
+#include <linux/mm.h>
+
+#include <asm/hibernate.h>
+#include <asm/visasm.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/tlb.h>
+
+struct saved_context saved_context;
+
+/*
+ *	pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = PFN_DOWN((unsigned long)&__nosave_begin);
+	unsigned long nosave_end_pfn = PFN_DOWN((unsigned long)&__nosave_end);
+
+	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+void save_processor_state(void)
+{
+	save_and_clear_fpu();
+}
+
+void restore_processor_state(void)
+{
+	struct mm_struct *mm = current->active_mm;
+
+	tsb_context_switch_ctx(mm, CTX_HWBITS(mm->context));
+}
diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S
new file mode 100644
index 0000000..8cfaf5b
--- /dev/null
+++ b/arch/sparc/power/hibernate_asm.S
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * hibernate_asm.S:  Hibernaton support specific for sparc64.
+ *
+ * Copyright (C) 2013 Kirill V Tkhai (tkhai@yandex.ru)
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/cpudata.h>
+#include <asm/page.h>
+
+ENTRY(swsusp_arch_suspend)
+	save	%sp, -128, %sp
+	save	%sp, -128, %sp
+	flushw
+
+	setuw	saved_context, %g3
+
+	/* Save window regs */
+	rdpr	%cwp, %g2
+	stx	%g2, [%g3 + SC_REG_CWP]
+	rdpr	%wstate, %g2
+	stx	%g2, [%g3 + SC_REG_WSTATE]
+	stx	%fp, [%g3 + SC_REG_FP]
+
+	/* Save state regs */
+	rdpr	%tick, %g2
+	stx	%g2, [%g3 + SC_REG_TICK]
+	rdpr	%pstate, %g2
+	stx	%g2, [%g3 + SC_REG_PSTATE]
+
+	/* Save global regs */
+	stx	%g4, [%g3 + SC_REG_G4]
+	stx	%g5, [%g3 + SC_REG_G5]
+	stx	%g6, [%g3 + SC_REG_G6]
+
+	call	swsusp_save
+	 nop
+
+	mov	%o0, %i0
+	restore
+
+	mov	%o0, %i0
+	ret
+	 restore
+
+ENTRY(swsusp_arch_resume)
+	/* Write restore_pblist to %l0 */
+	sethi	%hi(restore_pblist), %l0
+	ldx	[%l0 + %lo(restore_pblist)], %l0
+
+	call	__flush_tlb_all
+	 nop
+
+	/* Write PAGE_OFFSET to %g7 */
+	sethi	%hi(PAGE_OFFSET), %g7
+	ldx	[%g7 + %lo(PAGE_OFFSET)], %g7
+
+	setuw	(PAGE_SIZE-8), %g3
+
+	/* Use MMU Bypass */
+	rd	%asi, %g1
+	wr	%g0, ASI_PHYS_USE_EC, %asi
+
+	ba	fill_itlb
+	 nop
+
+pbe_loop:
+	cmp	%l0, %g0
+	be	restore_ctx
+	 sub	%l0, %g7, %l0
+
+	ldxa	[%l0    ] %asi, %l1 /* address */
+	ldxa	[%l0 + 8] %asi, %l2 /* orig_address */
+
+	/* phys addr */
+	sub	%l1, %g7, %l1
+	sub	%l2, %g7, %l2
+
+	mov	%g3, %l3 /* PAGE_SIZE-8 */
+copy_loop:
+	ldxa	[%l1 + %l3] ASI_PHYS_USE_EC, %g2
+	stxa	%g2, [%l2 + %l3] ASI_PHYS_USE_EC
+	cmp	%l3, %g0
+	bne	copy_loop
+	 sub	%l3, 8, %l3
+
+	/* next pbe */
+	ba	pbe_loop
+	 ldxa	[%l0 + 16] %asi, %l0
+
+restore_ctx:
+	setuw	saved_context, %g3
+
+	/* Restore window regs */
+	wrpr    %g0, 0, %canrestore
+	wrpr    %g0, 0, %otherwin
+	wrpr	%g0, 6, %cansave
+	wrpr    %g0, 0, %cleanwin
+
+	ldxa	[%g3 + SC_REG_CWP] %asi, %g2
+	wrpr	%g2, %cwp
+	ldxa	[%g3 + SC_REG_WSTATE] %asi, %g2
+	wrpr	%g2, %wstate
+	ldxa	[%g3 + SC_REG_FP] %asi, %fp
+
+	/* Restore state regs */
+	ldxa	[%g3 + SC_REG_PSTATE] %asi, %g2
+	wrpr	%g2, %pstate
+	ldxa	[%g3 + SC_REG_TICK] %asi, %g2
+	wrpr	%g2, %tick
+
+	/* Restore global regs */
+	ldxa	[%g3 + SC_REG_G4] %asi, %g4
+	ldxa	[%g3 + SC_REG_G5] %asi, %g5
+	ldxa	[%g3 + SC_REG_G6] %asi, %g6
+
+	wr	%g1, %g0, %asi
+
+	restore
+	restore
+
+	wrpr	%g0, 14, %pil
+
+	retl
+	 mov	%g0, %o0
+
+fill_itlb:
+	ba	pbe_loop
+	 wrpr	%g0, 15, %pil
diff --git a/arch/sparc/prom/Makefile b/arch/sparc/prom/Makefile
new file mode 100644
index 0000000..397b79a
--- /dev/null
+++ b/arch/sparc/prom/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for the Sun Boot PROM interface library under
+# Linux.
+#
+asflags := -ansi
+ccflags := -Werror
+
+lib-y                 := bootstr_$(BITS).o
+lib-y                 += init_$(BITS).o
+lib-$(CONFIG_SPARC32) += memory.o
+lib-y                 += misc_$(BITS).o
+lib-$(CONFIG_SPARC32) += mp.o
+lib-$(CONFIG_SPARC32) += ranges.o
+lib-y                 += console_$(BITS).o
+lib-y                 += printf.o
+lib-y                 += tree_$(BITS).o
+lib-$(CONFIG_SPARC64) += p1275.o
+lib-$(CONFIG_SPARC64) += cif.o
diff --git a/arch/sparc/prom/bootstr_32.c b/arch/sparc/prom/bootstr_32.c
new file mode 100644
index 0000000..e3b731f
--- /dev/null
+++ b/arch/sparc/prom/bootstr_32.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bootstr.c:  Boot string/argument acquisition from the PROM.
+ *
+ * Copyright(C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/string.h>
+#include <asm/oplib.h>
+#include <linux/init.h>
+
+#define BARG_LEN  256
+static char barg_buf[BARG_LEN] = { 0 };
+static char fetched __initdata = 0;
+
+char * __init
+prom_getbootargs(void)
+{
+	int iter;
+	char *cp, *arg;
+
+	/* This check saves us from a panic when bootfd patches args. */
+	if (fetched) {
+		return barg_buf;
+	}
+
+	switch (prom_vers) {
+	case PROM_V0:
+		cp = barg_buf;
+		/* Start from 1 and go over fd(0,0,0)kernel */
+		for (iter = 1; iter < 8; iter++) {
+			arg = (*(romvec->pv_v0bootargs))->argv[iter];
+			if (arg == NULL)
+				break;
+			while (*arg != 0) {
+				/* Leave place for space and null. */
+				if (cp >= barg_buf + BARG_LEN - 2)
+					/* We might issue a warning here. */
+					break;
+				*cp++ = *arg++;
+			}
+			*cp++ = ' ';
+			if (cp >= barg_buf + BARG_LEN - 1)
+				/* We might issue a warning here. */
+				break;
+		}
+		*cp = 0;
+		break;
+	case PROM_V2:
+	case PROM_V3:
+		/*
+		 * V3 PROM cannot supply as with more than 128 bytes
+		 * of an argument. But a smart bootstrap loader can.
+		 */
+		strlcpy(barg_buf, *romvec->pv_v2bootargs.bootargs, sizeof(barg_buf));
+		break;
+	default:
+		break;
+	}
+
+	fetched = 1;
+	return barg_buf;
+}
diff --git a/arch/sparc/prom/bootstr_64.c b/arch/sparc/prom/bootstr_64.c
new file mode 100644
index 0000000..f1cc34d
--- /dev/null
+++ b/arch/sparc/prom/bootstr_64.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bootstr.c:  Boot string/argument acquisition from the PROM.
+ *
+ * Copyright(C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright(C) 1996,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/string.h>
+#include <linux/init.h>
+#include <asm/oplib.h>
+
+/* WARNING: The boot loader knows that these next three variables come one right
+ *          after another in the .data section.  Do not move this stuff into
+ *          the .bss section or it will break things.
+ */
+
+/* We limit BARG_LEN to 1024 because this is the size of the
+ * 'barg_out' command line buffer in the SILO bootloader.
+ */
+#define BARG_LEN 1024
+struct {
+	int bootstr_len;
+	int bootstr_valid;
+	char bootstr_buf[BARG_LEN];
+} bootstr_info = {
+	.bootstr_len = BARG_LEN,
+#ifdef CONFIG_CMDLINE
+	.bootstr_valid = 1,
+	.bootstr_buf = CONFIG_CMDLINE,
+#endif
+};
+
+char * __init
+prom_getbootargs(void)
+{
+	/* This check saves us from a panic when bootfd patches args. */
+	if (bootstr_info.bootstr_valid)
+		return bootstr_info.bootstr_buf;
+	prom_getstring(prom_chosen_node, "bootargs",
+		       bootstr_info.bootstr_buf, BARG_LEN);
+	bootstr_info.bootstr_valid = 1;
+	return bootstr_info.bootstr_buf;
+}
diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S
new file mode 100644
index 0000000..dd06bb1
--- /dev/null
+++ b/arch/sparc/prom/cif.S
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* cif.S: PROM entry/exit assembler trampolines.
+ *
+ * Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 2005, 2006 David S. Miller <davem@davemloft.net>
+ */
+
+#include <asm/pstate.h>
+#include <asm/cpudata.h>
+#include <asm/thread_info.h>
+
+	.text
+	.globl	prom_cif_direct
+prom_cif_direct:
+	save	%sp, -192, %sp
+	sethi	%hi(p1275buf), %o1
+	or	%o1, %lo(p1275buf), %o1
+	ldx	[%o1 + 0x0008], %l2	! prom_cif_handler
+	mov	%g4, %l0
+	mov	%g5, %l1
+	mov	%g6, %l3
+	call	%l2
+	 mov	%i0, %o0		! prom_args
+	mov	%l0, %g4
+	mov	%l1, %g5
+	mov	%l3, %g6
+	ret
+	 restore
+
+	.globl	prom_cif_callback
+prom_cif_callback:
+	sethi	%hi(p1275buf), %o1
+	or	%o1, %lo(p1275buf), %o1
+	save	%sp, -192, %sp
+	TRAP_LOAD_THREAD_REG(%g6, %g1)
+	LOAD_PER_CPU_BASE(%g5, %g6, %g4, %g3, %o0)
+	ldx	[%g6 + TI_TASK], %g4
+	call	prom_world
+	 mov	0, %o0
+	ldx	[%i1 + 0x000], %l2
+	call	%l2
+	 mov	%i0, %o0
+	mov	%o0, %l1
+	call	prom_world
+	 mov	1, %o0
+	ret
+	 restore %l1, 0, %o0
+
diff --git a/arch/sparc/prom/console_32.c b/arch/sparc/prom/console_32.c
new file mode 100644
index 0000000..6404e5b
--- /dev/null
+++ b/arch/sparc/prom/console_32.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * console.c: Routines that deal with sending and receiving IO
+ *            to/from the current console device using the PROM.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1998 Pete Zaitcev <zaitcev@yahoo.com>
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <linux/string.h>
+
+extern void restore_current(void);
+
+/* Non blocking put character to console device, returns -1 if
+ * unsuccessful.
+ */
+static int prom_nbputchar(const char *buf)
+{
+	unsigned long flags;
+	int i = -1;
+
+	spin_lock_irqsave(&prom_lock, flags);
+	switch(prom_vers) {
+	case PROM_V0:
+		if ((*(romvec->pv_nbputchar))(*buf))
+			i = 1;
+		break;
+	case PROM_V2:
+	case PROM_V3:
+		if ((*(romvec->pv_v2devops).v2_dev_write)(*romvec->pv_v2bootargs.fd_stdout,
+							  buf, 0x1) == 1)
+			i = 1;
+		break;
+	default:
+		break;
+	}
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+	return i; /* Ugh, we could spin forever on unsupported proms ;( */
+}
+
+void prom_console_write_buf(const char *buf, int len)
+{
+	while (len) {
+		int n = prom_nbputchar(buf);
+		if (n < 0)
+			continue;
+		len--;
+		buf++;
+	}
+}
+
diff --git a/arch/sparc/prom/console_64.c b/arch/sparc/prom/console_64.c
new file mode 100644
index 0000000..86dace7
--- /dev/null
+++ b/arch/sparc/prom/console_64.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* console.c: Routines that deal with sending and receiving IO
+ *            to/from the current console device using the PROM.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <linux/string.h>
+
+static int __prom_console_write_buf(const char *buf, int len)
+{
+	unsigned long args[7];
+	int ret;
+
+	args[0] = (unsigned long) "write";
+	args[1] = 3;
+	args[2] = 1;
+	args[3] = (unsigned int) prom_stdout;
+	args[4] = (unsigned long) buf;
+	args[5] = (unsigned int) len;
+	args[6] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	ret = (int) args[6];
+	if (ret < 0)
+		return -1;
+	return ret;
+}
+
+void prom_console_write_buf(const char *buf, int len)
+{
+	while (len) {
+		int n = __prom_console_write_buf(buf, len);
+		if (n < 0)
+			continue;
+		len -= n;
+		buf += len;
+	}
+}
diff --git a/arch/sparc/prom/init_32.c b/arch/sparc/prom/init_32.c
new file mode 100644
index 0000000..d204701
--- /dev/null
+++ b/arch/sparc/prom/init_32.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * init.c:  Initialize internal variables used by the PROM
+ *          library functions.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+
+struct linux_romvec *romvec;
+EXPORT_SYMBOL(romvec);
+
+enum prom_major_version prom_vers;
+unsigned int prom_rev, prom_prev;
+
+/* The root node of the prom device tree. */
+phandle prom_root_node;
+EXPORT_SYMBOL(prom_root_node);
+
+/* Pointer to the device tree operations structure. */
+struct linux_nodeops *prom_nodeops;
+
+/* You must call prom_init() before you attempt to use any of the
+ * routines in the prom library.
+ * It gets passed the pointer to the PROM vector.
+ */
+
+void __init prom_init(struct linux_romvec *rp)
+{
+	romvec = rp;
+
+	switch(romvec->pv_romvers) {
+	case 0:
+		prom_vers = PROM_V0;
+		break;
+	case 2:
+		prom_vers = PROM_V2;
+		break;
+	case 3:
+		prom_vers = PROM_V3;
+		break;
+	default:
+		prom_printf("PROMLIB: Bad PROM version %d\n",
+			    romvec->pv_romvers);
+		prom_halt();
+		break;
+	}
+
+	prom_rev = romvec->pv_plugin_revision;
+	prom_prev = romvec->pv_printrev;
+	prom_nodeops = romvec->pv_nodeops;
+
+	prom_root_node = prom_getsibling(0);
+	if ((prom_root_node == 0) || ((s32)prom_root_node == -1))
+		prom_halt();
+
+	if((((unsigned long) prom_nodeops) == 0) || 
+	   (((unsigned long) prom_nodeops) == -1))
+		prom_halt();
+
+	prom_meminit();
+
+	prom_ranges_init();
+
+	printk("PROMLIB: Sun Boot Prom Version %d Revision %d\n",
+	       romvec->pv_romvers, prom_rev);
+
+	/* Initialization successful. */
+}
diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c
new file mode 100644
index 0000000..103aa91
--- /dev/null
+++ b/arch/sparc/prom/init_64.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * init.c:  Initialize internal variables used by the PROM
+ *          library functions.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+
+/* OBP version string. */
+char prom_version[80];
+
+/* The root node of the prom device tree. */
+int prom_stdout;
+phandle prom_chosen_node;
+
+/* You must call prom_init() before you attempt to use any of the
+ * routines in the prom library.
+ * It gets passed the pointer to the PROM vector.
+ */
+
+extern void prom_cif_init(void *);
+
+void __init prom_init(void *cif_handler)
+{
+	phandle node;
+
+	prom_cif_init(cif_handler);
+
+	prom_chosen_node = prom_finddevice(prom_chosen_path);
+	if (!prom_chosen_node || (s32)prom_chosen_node == -1)
+		prom_halt();
+
+	prom_stdout = prom_getint(prom_chosen_node, "stdout");
+
+	node = prom_finddevice("/openprom");
+	if (!node || (s32)node == -1)
+		prom_halt();
+
+	prom_getstring(node, "version", prom_version, sizeof(prom_version));
+
+	prom_printf("\n");
+}
+
+void __init prom_init_report(void)
+{
+	printk("PROMLIB: Sun IEEE Boot Prom '%s'\n", prom_version);
+	printk("PROMLIB: Root node compatible: %s\n", prom_root_compatible);
+}
diff --git a/arch/sparc/prom/memory.c b/arch/sparc/prom/memory.c
new file mode 100644
index 0000000..269d6ab
--- /dev/null
+++ b/arch/sparc/prom/memory.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* memory.c: Prom routine for acquiring various bits of information
+ *           about RAM on the machine, both virtual and physical.
+ *
+ * Copyright (C) 1995, 2008 David S. Miller (davem@davemloft.net)
+ * Copyright (C) 1997 Michael A. Griffith (grif@acm.org)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sort.h>
+#include <linux/init.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/page.h>
+
+static int __init prom_meminit_v0(void)
+{
+	struct linux_mlist_v0 *p;
+	int index;
+
+	index = 0;
+	for (p = *(romvec->pv_v0mem.v0_available); p; p = p->theres_more) {
+		sp_banks[index].base_addr = (unsigned long) p->start_adr;
+		sp_banks[index].num_bytes = p->num_bytes;
+		index++;
+	}
+
+	return index;
+}
+
+static int __init prom_meminit_v2(void)
+{
+	struct linux_prom_registers reg[64];
+	phandle node;
+	int size, num_ents, i;
+
+	node = prom_searchsiblings(prom_getchild(prom_root_node), "memory");
+	size = prom_getproperty(node, "available", (char *) reg, sizeof(reg));
+	num_ents = size / sizeof(struct linux_prom_registers);
+
+	for (i = 0; i < num_ents; i++) {
+		sp_banks[i].base_addr = reg[i].phys_addr;
+		sp_banks[i].num_bytes = reg[i].reg_size;
+	}
+
+	return num_ents;
+}
+
+static int sp_banks_cmp(const void *a, const void *b)
+{
+	const struct sparc_phys_banks *x = a, *y = b;
+
+	if (x->base_addr > y->base_addr)
+		return 1;
+	if (x->base_addr < y->base_addr)
+		return -1;
+	return 0;
+}
+
+/* Initialize the memory lists based upon the prom version. */
+void __init prom_meminit(void)
+{
+	int i, num_ents = 0;
+
+	switch (prom_vers) {
+	case PROM_V0:
+		num_ents = prom_meminit_v0();
+		break;
+
+	case PROM_V2:
+	case PROM_V3:
+		num_ents = prom_meminit_v2();
+		break;
+
+	default:
+		break;
+	}
+	sort(sp_banks, num_ents, sizeof(struct sparc_phys_banks),
+	     sp_banks_cmp, NULL);
+
+	/* Sentinel.  */
+	sp_banks[num_ents].base_addr = 0xdeadbeef;
+	sp_banks[num_ents].num_bytes = 0;
+
+	for (i = 0; i < num_ents; i++)
+		sp_banks[i].num_bytes &= PAGE_MASK;
+}
diff --git a/arch/sparc/prom/misc_32.c b/arch/sparc/prom/misc_32.c
new file mode 100644
index 0000000..6257509
--- /dev/null
+++ b/arch/sparc/prom/misc_32.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * misc.c:  Miscellaneous prom functions that don't belong
+ *          anywhere else.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/auxio.h>
+
+extern void restore_current(void);
+
+DEFINE_SPINLOCK(prom_lock);
+
+/* Reset and reboot the machine with the command 'bcommand'. */
+void
+prom_reboot(char *bcommand)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&prom_lock, flags);
+	(*(romvec->pv_reboot))(bcommand);
+	/* Never get here. */
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+}
+
+/* Forth evaluate the expression contained in 'fstring'. */
+void
+prom_feval(char *fstring)
+{
+	unsigned long flags;
+	if(!fstring || fstring[0] == 0)
+		return;
+	spin_lock_irqsave(&prom_lock, flags);
+	if(prom_vers == PROM_V0)
+		(*(romvec->pv_fortheval.v0_eval))(strlen(fstring), fstring);
+	else
+		(*(romvec->pv_fortheval.v2_eval))(fstring);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+}
+EXPORT_SYMBOL(prom_feval);
+
+/* Drop into the prom, with the chance to continue with the 'go'
+ * prom command.
+ */
+void
+prom_cmdline(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&prom_lock, flags);
+	(*(romvec->pv_abort))();
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+	set_auxio(AUXIO_LED, 0);
+}
+
+/* Drop into the prom, but completely terminate the program.
+ * No chance of continuing.
+ */
+void __noreturn
+prom_halt(void)
+{
+	unsigned long flags;
+again:
+	spin_lock_irqsave(&prom_lock, flags);
+	(*(romvec->pv_halt))();
+	/* Never get here. */
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+	goto again; /* PROM is out to get me -DaveM */
+}
+
+typedef void (*sfunc_t)(void);
+
+/* Set prom sync handler to call function 'funcp'. */
+void
+prom_setsync(sfunc_t funcp)
+{
+	if(!funcp) return;
+	*romvec->pv_synchook = funcp;
+}
+
+/* Get the idprom and stuff it into buffer 'idbuf'.  Returns the
+ * format type.  'num_bytes' is the number of bytes that your idbuf
+ * has space for.  Returns 0xff on error.
+ */
+unsigned char
+prom_get_idprom(char *idbuf, int num_bytes)
+{
+	int len;
+
+	len = prom_getproplen(prom_root_node, "idprom");
+	if((len>num_bytes) || (len==-1)) return 0xff;
+	if(!prom_getproperty(prom_root_node, "idprom", idbuf, num_bytes))
+		return idbuf[0];
+
+	return 0xff;
+}
+
+/* Get the major prom version number. */
+int
+prom_version(void)
+{
+	return romvec->pv_romvers;
+}
+
+/* Get the prom plugin-revision. */
+int
+prom_getrev(void)
+{
+	return prom_rev;
+}
+
+/* Get the prom firmware print revision. */
+int
+prom_getprev(void)
+{
+	return prom_prev;
+}
diff --git a/arch/sparc/prom/misc_64.c b/arch/sparc/prom/misc_64.c
new file mode 100644
index 0000000..aed94cd
--- /dev/null
+++ b/arch/sparc/prom/misc_64.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * misc.c:  Miscellaneous prom functions that don't belong
+ *          anywhere else.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/ldc.h>
+
+static int prom_service_exists(const char *service_name)
+{
+	unsigned long args[5];
+
+	args[0] = (unsigned long) "test";
+	args[1] = 1;
+	args[2] = 1;
+	args[3] = (unsigned long) service_name;
+	args[4] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	if (args[4])
+		return 0;
+	return 1;
+}
+
+void prom_sun4v_guest_soft_state(void)
+{
+	const char *svc = "SUNW,soft-state-supported";
+	unsigned long args[3];
+
+	if (!prom_service_exists(svc))
+		return;
+	args[0] = (unsigned long) svc;
+	args[1] = 0;
+	args[2] = 0;
+	p1275_cmd_direct(args);
+}
+
+/* Reset and reboot the machine with the command 'bcommand'. */
+void prom_reboot(const char *bcommand)
+{
+	unsigned long args[4];
+
+#ifdef CONFIG_SUN_LDOMS
+	if (ldom_domaining_enabled)
+		ldom_reboot(bcommand);
+#endif
+	args[0] = (unsigned long) "boot";
+	args[1] = 1;
+	args[2] = 0;
+	args[3] = (unsigned long) bcommand;
+
+	p1275_cmd_direct(args);
+}
+
+/* Forth evaluate the expression contained in 'fstring'. */
+void prom_feval(const char *fstring)
+{
+	unsigned long args[5];
+
+	if (!fstring || fstring[0] == 0)
+		return;
+	args[0] = (unsigned long) "interpret";
+	args[1] = 1;
+	args[2] = 1;
+	args[3] = (unsigned long) fstring;
+	args[4] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+}
+EXPORT_SYMBOL(prom_feval);
+
+/* Drop into the prom, with the chance to continue with the 'go'
+ * prom command.
+ */
+void prom_cmdline(void)
+{
+	unsigned long args[3];
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+#ifdef CONFIG_SMP
+	smp_capture();
+#endif
+
+	args[0] = (unsigned long) "enter";
+	args[1] = 0;
+	args[2] = 0;
+
+	p1275_cmd_direct(args);
+
+#ifdef CONFIG_SMP
+	smp_release();
+#endif
+
+	local_irq_restore(flags);
+}
+
+/* Drop into the prom, but completely terminate the program.
+ * No chance of continuing.
+ */
+void notrace prom_halt(void)
+{
+	unsigned long args[3];
+
+#ifdef CONFIG_SUN_LDOMS
+	if (ldom_domaining_enabled)
+		ldom_power_off();
+#endif
+again:
+	args[0] = (unsigned long) "exit";
+	args[1] = 0;
+	args[2] = 0;
+	p1275_cmd_direct(args);
+	goto again; /* PROM is out to get me -DaveM */
+}
+
+void prom_halt_power_off(void)
+{
+	unsigned long args[3];
+
+#ifdef CONFIG_SUN_LDOMS
+	if (ldom_domaining_enabled)
+		ldom_power_off();
+#endif
+	args[0] = (unsigned long) "SUNW,power-off";
+	args[1] = 0;
+	args[2] = 0;
+	p1275_cmd_direct(args);
+
+	/* if nothing else helps, we just halt */
+	prom_halt();
+}
+
+/* Get the idprom and stuff it into buffer 'idbuf'.  Returns the
+ * format type.  'num_bytes' is the number of bytes that your idbuf
+ * has space for.  Returns 0xff on error.
+ */
+unsigned char prom_get_idprom(char *idbuf, int num_bytes)
+{
+	int len;
+
+	len = prom_getproplen(prom_root_node, "idprom");
+	if ((len >num_bytes) || (len == -1))
+		return 0xff;
+	if (!prom_getproperty(prom_root_node, "idprom", idbuf, num_bytes))
+		return idbuf[0];
+
+	return 0xff;
+}
+
+int prom_get_mmu_ihandle(void)
+{
+	phandle node;
+	int ret;
+
+	if (prom_mmu_ihandle_cache != 0)
+		return prom_mmu_ihandle_cache;
+
+	node = prom_finddevice(prom_chosen_path);
+	ret = prom_getint(node, prom_mmu_name);
+	if (ret == -1 || ret == 0)
+		prom_mmu_ihandle_cache = -1;
+	else
+		prom_mmu_ihandle_cache = ret;
+
+	return ret;
+}
+
+static int prom_get_memory_ihandle(void)
+{
+	static int memory_ihandle_cache;
+	phandle node;
+	int ret;
+
+	if (memory_ihandle_cache != 0)
+		return memory_ihandle_cache;
+
+	node = prom_finddevice("/chosen");
+	ret = prom_getint(node, "memory");
+	if (ret == -1 || ret == 0)
+		memory_ihandle_cache = -1;
+	else
+		memory_ihandle_cache = ret;
+
+	return ret;
+}
+
+/* Load explicit I/D TLB entries. */
+static long tlb_load(const char *type, unsigned long index,
+		     unsigned long tte_data, unsigned long vaddr)
+{
+	unsigned long args[9];
+
+	args[0] = (unsigned long) prom_callmethod_name;
+	args[1] = 5;
+	args[2] = 1;
+	args[3] = (unsigned long) type;
+	args[4] = (unsigned int) prom_get_mmu_ihandle();
+	args[5] = vaddr;
+	args[6] = tte_data;
+	args[7] = index;
+	args[8] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (long) args[8];
+}
+
+long prom_itlb_load(unsigned long index,
+		    unsigned long tte_data,
+		    unsigned long vaddr)
+{
+	return tlb_load("SUNW,itlb-load", index, tte_data, vaddr);
+}
+
+long prom_dtlb_load(unsigned long index,
+		    unsigned long tte_data,
+		    unsigned long vaddr)
+{
+	return tlb_load("SUNW,dtlb-load", index, tte_data, vaddr);
+}
+
+int prom_map(int mode, unsigned long size,
+	     unsigned long vaddr, unsigned long paddr)
+{
+	unsigned long args[11];
+	int ret;
+
+	args[0] = (unsigned long) prom_callmethod_name;
+	args[1] = 7;
+	args[2] = 1;
+	args[3] = (unsigned long) prom_map_name;
+	args[4] = (unsigned int) prom_get_mmu_ihandle();
+	args[5] = (unsigned int) mode;
+	args[6] = size;
+	args[7] = vaddr;
+	args[8] = 0;
+	args[9] = paddr;
+	args[10] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	ret = (int) args[10];
+	if (ret == 0)
+		ret = -1;
+	return ret;
+}
+
+void prom_unmap(unsigned long size, unsigned long vaddr)
+{
+	unsigned long args[7];
+
+	args[0] = (unsigned long) prom_callmethod_name;
+	args[1] = 4;
+	args[2] = 0;
+	args[3] = (unsigned long) prom_unmap_name;
+	args[4] = (unsigned int) prom_get_mmu_ihandle();
+	args[5] = size;
+	args[6] = vaddr;
+
+	p1275_cmd_direct(args);
+}
+
+/* Set aside physical memory which is not touched or modified
+ * across soft resets.
+ */
+int prom_retain(const char *name, unsigned long size,
+		unsigned long align, unsigned long *paddr)
+{
+	unsigned long args[11];
+
+	args[0] = (unsigned long) prom_callmethod_name;
+	args[1] = 5;
+	args[2] = 3;
+	args[3] = (unsigned long) "SUNW,retain";
+	args[4] = (unsigned int) prom_get_memory_ihandle();
+	args[5] = align;
+	args[6] = size;
+	args[7] = (unsigned long) name;
+	args[8] = (unsigned long) -1;
+	args[9] = (unsigned long) -1;
+	args[10] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	if (args[8])
+		return (int) args[8];
+
+	/* Next we get "phys_high" then "phys_low".  On 64-bit
+	 * the phys_high cell is don't care since the phys_low
+	 * cell has the full value.
+	 */
+	*paddr = args[10];
+
+	return 0;
+}
+
+/* Get "Unumber" string for the SIMM at the given
+ * memory address.  Usually this will be of the form
+ * "Uxxxx" where xxxx is a decimal number which is
+ * etched into the motherboard next to the SIMM slot
+ * in question.
+ */
+int prom_getunumber(int syndrome_code,
+		    unsigned long phys_addr,
+		    char *buf, int buflen)
+{
+	unsigned long args[12];
+
+	args[0] = (unsigned long) prom_callmethod_name;
+	args[1] = 7;
+	args[2] = 2;
+	args[3] = (unsigned long) "SUNW,get-unumber";
+	args[4] = (unsigned int) prom_get_memory_ihandle();
+	args[5] = buflen;
+	args[6] = (unsigned long) buf;
+	args[7] = 0;
+	args[8] = phys_addr;
+	args[9] = (unsigned int) syndrome_code;
+	args[10] = (unsigned long) -1;
+	args[11] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (int) args[10];
+}
+
+/* Power management extensions. */
+void prom_sleepself(void)
+{
+	unsigned long args[3];
+
+	args[0] = (unsigned long) "SUNW,sleep-self";
+	args[1] = 0;
+	args[2] = 0;
+	p1275_cmd_direct(args);
+}
+
+int prom_sleepsystem(void)
+{
+	unsigned long args[4];
+
+	args[0] = (unsigned long) "SUNW,sleep-system";
+	args[1] = 0;
+	args[2] = 1;
+	args[3] = (unsigned long) -1;
+	p1275_cmd_direct(args);
+
+	return (int) args[3];
+}
+
+int prom_wakeupsystem(void)
+{
+	unsigned long args[4];
+
+	args[0] = (unsigned long) "SUNW,wakeup-system";
+	args[1] = 0;
+	args[2] = 1;
+	args[3] = (unsigned long) -1;
+	p1275_cmd_direct(args);
+
+	return (int) args[3];
+}
+
+#ifdef CONFIG_SMP
+void prom_startcpu(int cpunode, unsigned long pc, unsigned long arg)
+{
+	unsigned long args[6];
+
+	args[0] = (unsigned long) "SUNW,start-cpu";
+	args[1] = 3;
+	args[2] = 0;
+	args[3] = (unsigned int) cpunode;
+	args[4] = pc;
+	args[5] = arg;
+	p1275_cmd_direct(args);
+}
+
+void prom_startcpu_cpuid(int cpuid, unsigned long pc, unsigned long arg)
+{
+	unsigned long args[6];
+
+	args[0] = (unsigned long) "SUNW,start-cpu-by-cpuid";
+	args[1] = 3;
+	args[2] = 0;
+	args[3] = (unsigned int) cpuid;
+	args[4] = pc;
+	args[5] = arg;
+	p1275_cmd_direct(args);
+}
+
+void prom_stopcpu_cpuid(int cpuid)
+{
+	unsigned long args[4];
+
+	args[0] = (unsigned long) "SUNW,stop-cpu-by-cpuid";
+	args[1] = 1;
+	args[2] = 0;
+	args[3] = (unsigned int) cpuid;
+	p1275_cmd_direct(args);
+}
+
+void prom_stopself(void)
+{
+	unsigned long args[3];
+
+	args[0] = (unsigned long) "SUNW,stop-self";
+	args[1] = 0;
+	args[2] = 0;
+	p1275_cmd_direct(args);
+}
+
+void prom_idleself(void)
+{
+	unsigned long args[3];
+
+	args[0] = (unsigned long) "SUNW,idle-self";
+	args[1] = 0;
+	args[2] = 0;
+	p1275_cmd_direct(args);
+}
+
+void prom_resumecpu(int cpunode)
+{
+	unsigned long args[4];
+
+	args[0] = (unsigned long) "SUNW,resume-cpu";
+	args[1] = 1;
+	args[2] = 0;
+	args[3] = (unsigned int) cpunode;
+	p1275_cmd_direct(args);
+}
+#endif
diff --git a/arch/sparc/prom/mp.c b/arch/sparc/prom/mp.c
new file mode 100644
index 0000000..67cf0e9
--- /dev/null
+++ b/arch/sparc/prom/mp.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mp.c:  OpenBoot Prom Multiprocessor support routines.  Don't call
+ *        these on a UP or else you will halt and catch fire. ;)
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+
+extern void restore_current(void);
+
+/* Start cpu with prom-tree node 'cpunode' using context described
+ * by 'ctable_reg' in context 'ctx' at program counter 'pc'.
+ *
+ * XXX Have to look into what the return values mean. XXX
+ */
+int
+prom_startcpu(int cpunode, struct linux_prom_registers *ctable_reg, int ctx, char *pc)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&prom_lock, flags);
+	switch(prom_vers) {
+	case PROM_V0:
+	case PROM_V2:
+	default:
+		ret = -1;
+		break;
+	case PROM_V3:
+		ret = (*(romvec->v3_cpustart))(cpunode, (int) ctable_reg, ctx, pc);
+		break;
+	}
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+
+	return ret;
+}
diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c
new file mode 100644
index 0000000..889aa60
--- /dev/null
+++ b/arch/sparc/prom/p1275.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * p1275.c: Sun IEEE 1275 PROM low level interface routines
+ *
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/irqflags.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/spitfire.h>
+#include <asm/pstate.h>
+#include <asm/ldc.h>
+
+struct {
+	long prom_callback;			/* 0x00 */
+	void (*prom_cif_handler)(long *);	/* 0x08 */
+} p1275buf;
+
+extern void prom_world(int);
+
+extern void prom_cif_direct(unsigned long *args);
+extern void prom_cif_callback(void);
+
+/*
+ * This provides SMP safety on the p1275buf.
+ */
+DEFINE_RAW_SPINLOCK(prom_entry_lock);
+
+void p1275_cmd_direct(unsigned long *args)
+{
+	unsigned long flags;
+
+	local_save_flags(flags);
+	local_irq_restore((unsigned long)PIL_NMI);
+	raw_spin_lock(&prom_entry_lock);
+
+	prom_world(1);
+	prom_cif_direct(args);
+	prom_world(0);
+
+	raw_spin_unlock(&prom_entry_lock);
+	local_irq_restore(flags);
+}
+
+void prom_cif_init(void *cif_handler, void *cif_stack)
+{
+	p1275buf.prom_cif_handler = (void (*)(long *))cif_handler;
+}
diff --git a/arch/sparc/prom/printf.c b/arch/sparc/prom/printf.c
new file mode 100644
index 0000000..dcee3df
--- /dev/null
+++ b/arch/sparc/prom/printf.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * printf.c:  Internal prom library printf facility.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (c) 2002 Pete Zaitcev (zaitcev@yahoo.com)
+ *
+ * We used to warn all over the code: DO NOT USE prom_printf(),
+ * and yet people do. Anton's banking code was outputting banks
+ * with prom_printf for most of the 2.4 lifetime. Since an effective
+ * stick is not available, we deployed a carrot: an early printk
+ * through PROM by means of -p boot option. This ought to fix it.
+ * USE printk; if you need, deploy -p.
+ */
+
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/spinlock.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+
+#define CONSOLE_WRITE_BUF_SIZE	1024
+
+static char ppbuf[1024];
+static char console_write_buf[CONSOLE_WRITE_BUF_SIZE];
+static DEFINE_RAW_SPINLOCK(console_write_lock);
+
+void notrace prom_write(const char *buf, unsigned int n)
+{
+	unsigned int dest_len;
+	unsigned long flags;
+	char *dest;
+
+	dest = console_write_buf;
+	raw_spin_lock_irqsave(&console_write_lock, flags);
+
+	dest_len = 0;
+	while (n-- != 0) {
+		char ch = *buf++;
+		if (ch == '\n') {
+			*dest++ = '\r';
+			dest_len++;
+		}
+		*dest++ = ch;
+		dest_len++;
+		if (dest_len >= CONSOLE_WRITE_BUF_SIZE - 1) {
+			prom_console_write_buf(console_write_buf, dest_len);
+			dest = console_write_buf;
+			dest_len = 0;
+		}
+	}
+	if (dest_len)
+		prom_console_write_buf(console_write_buf, dest_len);
+
+	raw_spin_unlock_irqrestore(&console_write_lock, flags);
+}
+
+void notrace prom_printf(const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i = vscnprintf(ppbuf, sizeof(ppbuf), fmt, args);
+	va_end(args);
+
+	prom_write(ppbuf, i);
+}
diff --git a/arch/sparc/prom/ranges.c b/arch/sparc/prom/ranges.c
new file mode 100644
index 0000000..20cb828
--- /dev/null
+++ b/arch/sparc/prom/ranges.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ranges.c: Handle ranges in newer proms for obio/sbus.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/types.h>
+
+static struct linux_prom_ranges promlib_obio_ranges[PROMREG_MAX];
+static int num_obio_ranges;
+
+/* Adjust register values based upon the ranges parameters. */
+static void prom_adjust_regs(struct linux_prom_registers *regp, int nregs,
+			     struct linux_prom_ranges *rangep, int nranges)
+{
+	int regc, rngc;
+
+	for (regc = 0; regc < nregs; regc++) {
+		for (rngc = 0; rngc < nranges; rngc++)
+			if (regp[regc].which_io == rangep[rngc].ot_child_space)
+				break; /* Fount it */
+		if (rngc == nranges) /* oops */
+			prom_printf("adjust_regs: Could not find range with matching bus type...\n");
+		regp[regc].which_io = rangep[rngc].ot_parent_space;
+		regp[regc].phys_addr -= rangep[rngc].ot_child_base;
+		regp[regc].phys_addr += rangep[rngc].ot_parent_base;
+	}
+}
+
+static void prom_adjust_ranges(struct linux_prom_ranges *ranges1, int nranges1,
+			       struct linux_prom_ranges *ranges2, int nranges2)
+{
+	int rng1c, rng2c;
+
+	for (rng1c = 0; rng1c < nranges1; rng1c++) {
+		for (rng2c = 0; rng2c < nranges2; rng2c++)
+			if (ranges1[rng1c].ot_parent_space == ranges2[rng2c].ot_child_space &&
+			   ranges1[rng1c].ot_parent_base >= ranges2[rng2c].ot_child_base &&
+			   ranges2[rng2c].ot_child_base + ranges2[rng2c].or_size - ranges1[rng1c].ot_parent_base > 0U)
+			break;
+		if (rng2c == nranges2) /* oops */
+			prom_printf("adjust_ranges: Could not find matching bus type...\n");
+		else if (ranges1[rng1c].ot_parent_base + ranges1[rng1c].or_size > ranges2[rng2c].ot_child_base + ranges2[rng2c].or_size)
+			ranges1[rng1c].or_size = ranges2[rng2c].ot_child_base + ranges2[rng2c].or_size - ranges1[rng1c].ot_parent_base;
+		ranges1[rng1c].ot_parent_space = ranges2[rng2c].ot_parent_space;
+		ranges1[rng1c].ot_parent_base += ranges2[rng2c].ot_parent_base;
+	}
+}
+
+/* Apply probed obio ranges to registers passed, if no ranges return. */
+void prom_apply_obio_ranges(struct linux_prom_registers *regs, int nregs)
+{
+	if (num_obio_ranges)
+		prom_adjust_regs(regs, nregs, promlib_obio_ranges, num_obio_ranges);
+}
+EXPORT_SYMBOL(prom_apply_obio_ranges);
+
+void __init prom_ranges_init(void)
+{
+	phandle node, obio_node;
+	int success;
+
+	num_obio_ranges = 0;
+
+	/* Check for obio and sbus ranges. */
+	node = prom_getchild(prom_root_node);
+	obio_node = prom_searchsiblings(node, "obio");
+
+	if (obio_node) {
+		success = prom_getproperty(obio_node, "ranges",
+					   (char *) promlib_obio_ranges,
+					   sizeof(promlib_obio_ranges));
+		if (success != -1)
+			num_obio_ranges = (success / sizeof(struct linux_prom_ranges));
+	}
+
+	if (num_obio_ranges)
+		prom_printf("PROMLIB: obio_ranges %d\n", num_obio_ranges);
+}
+
+void prom_apply_generic_ranges(phandle node, phandle parent,
+			       struct linux_prom_registers *regs, int nregs)
+{
+	int success;
+	int num_ranges;
+	struct linux_prom_ranges ranges[PROMREG_MAX];
+
+	success = prom_getproperty(node, "ranges",
+				   (char *) ranges,
+				   sizeof(ranges));
+	if (success != -1) {
+		num_ranges = (success / sizeof(struct linux_prom_ranges));
+		if (parent) {
+			struct linux_prom_ranges parent_ranges[PROMREG_MAX];
+			int num_parent_ranges;
+
+			success = prom_getproperty(parent, "ranges",
+						   (char *) parent_ranges,
+						   sizeof(parent_ranges));
+			if (success != -1) {
+				num_parent_ranges = (success / sizeof(struct linux_prom_ranges));
+				prom_adjust_ranges(ranges, num_ranges, parent_ranges, num_parent_ranges);
+			}
+		}
+		prom_adjust_regs(regs, nregs, ranges, num_ranges);
+	}
+}
diff --git a/arch/sparc/prom/tree_32.c b/arch/sparc/prom/tree_32.c
new file mode 100644
index 0000000..0fed893
--- /dev/null
+++ b/arch/sparc/prom/tree_32.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tree.c: Basic device tree traversal/scanning for the Linux
+ *         prom library.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+
+extern void restore_current(void);
+
+static char promlib_buf[128];
+
+/* Internal version of prom_getchild that does not alter return values. */
+static phandle __prom_getchild(phandle node)
+{
+	unsigned long flags;
+	phandle cnode;
+
+	spin_lock_irqsave(&prom_lock, flags);
+	cnode = prom_nodeops->no_child(node);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+
+	return cnode;
+}
+
+/* Return the child of node 'node' or zero if no this node has no
+ * direct descendent.
+ */
+phandle prom_getchild(phandle node)
+{
+	phandle cnode;
+
+	if ((s32)node == -1)
+		return 0;
+
+	cnode = __prom_getchild(node);
+	if (cnode == 0 || (s32)cnode == -1)
+		return 0;
+
+	return cnode;
+}
+EXPORT_SYMBOL(prom_getchild);
+
+/* Internal version of prom_getsibling that does not alter return values. */
+static phandle __prom_getsibling(phandle node)
+{
+	unsigned long flags;
+	phandle cnode;
+
+	spin_lock_irqsave(&prom_lock, flags);
+	cnode = prom_nodeops->no_nextnode(node);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+
+	return cnode;
+}
+
+/* Return the next sibling of node 'node' or zero if no more siblings
+ * at this level of depth in the tree.
+ */
+phandle prom_getsibling(phandle node)
+{
+	phandle sibnode;
+
+	if ((s32)node == -1)
+		return 0;
+
+	sibnode = __prom_getsibling(node);
+	if (sibnode == 0 || (s32)sibnode == -1)
+		return 0;
+
+	return sibnode;
+}
+EXPORT_SYMBOL(prom_getsibling);
+
+/* Return the length in bytes of property 'prop' at node 'node'.
+ * Return -1 on error.
+ */
+int prom_getproplen(phandle node, const char *prop)
+{
+	int ret;
+	unsigned long flags;
+
+	if((!node) || (!prop))
+		return -1;
+		
+	spin_lock_irqsave(&prom_lock, flags);
+	ret = prom_nodeops->no_proplen(node, prop);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(prom_getproplen);
+
+/* Acquire a property 'prop' at node 'node' and place it in
+ * 'buffer' which has a size of 'bufsize'.  If the acquisition
+ * was successful the length will be returned, else -1 is returned.
+ */
+int prom_getproperty(phandle node, const char *prop, char *buffer, int bufsize)
+{
+	int plen, ret;
+	unsigned long flags;
+
+	plen = prom_getproplen(node, prop);
+	if((plen > bufsize) || (plen == 0) || (plen == -1))
+		return -1;
+	/* Ok, things seem all right. */
+	spin_lock_irqsave(&prom_lock, flags);
+	ret = prom_nodeops->no_getprop(node, prop, buffer);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(prom_getproperty);
+
+/* Acquire an integer property and return its value.  Returns -1
+ * on failure.
+ */
+int prom_getint(phandle node, char *prop)
+{
+	static int intprop;
+
+	if(prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1)
+		return intprop;
+
+	return -1;
+}
+EXPORT_SYMBOL(prom_getint);
+
+/* Acquire an integer property, upon error return the passed default
+ * integer.
+ */
+int prom_getintdefault(phandle node, char *property, int deflt)
+{
+	int retval;
+
+	retval = prom_getint(node, property);
+	if(retval == -1) return deflt;
+
+	return retval;
+}
+EXPORT_SYMBOL(prom_getintdefault);
+
+/* Acquire a boolean property, 1=TRUE 0=FALSE. */
+int prom_getbool(phandle node, char *prop)
+{
+	int retval;
+
+	retval = prom_getproplen(node, prop);
+	if(retval == -1) return 0;
+	return 1;
+}
+EXPORT_SYMBOL(prom_getbool);
+
+/* Acquire a property whose value is a string, returns a null
+ * string on error.  The char pointer is the user supplied string
+ * buffer.
+ */
+void prom_getstring(phandle node, char *prop, char *user_buf, int ubuf_size)
+{
+	int len;
+
+	len = prom_getproperty(node, prop, user_buf, ubuf_size);
+	if(len != -1) return;
+	user_buf[0] = 0;
+}
+EXPORT_SYMBOL(prom_getstring);
+
+
+/* Search siblings at 'node_start' for a node with name
+ * 'nodename'.  Return node if successful, zero if not.
+ */
+phandle prom_searchsiblings(phandle node_start, char *nodename)
+{
+
+	phandle thisnode;
+	int error;
+
+	for(thisnode = node_start; thisnode;
+	    thisnode=prom_getsibling(thisnode)) {
+		error = prom_getproperty(thisnode, "name", promlib_buf,
+					 sizeof(promlib_buf));
+		/* Should this ever happen? */
+		if(error == -1) continue;
+		if(strcmp(nodename, promlib_buf)==0) return thisnode;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(prom_searchsiblings);
+
+/* Interal version of nextprop that does not alter return values. */
+static char *__prom_nextprop(phandle node, char * oprop)
+{
+	unsigned long flags;
+	char *prop;
+
+	spin_lock_irqsave(&prom_lock, flags);
+	prop = prom_nodeops->no_nextprop(node, oprop);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+
+	return prop;
+}
+
+/* Return the property type string after property type 'oprop'
+ * at node 'node' .  Returns empty string if no more
+ * property types for this node.
+ */
+char *prom_nextprop(phandle node, char *oprop, char *buffer)
+{
+	if (node == 0 || (s32)node == -1)
+		return "";
+
+	return __prom_nextprop(node, oprop);
+}
+EXPORT_SYMBOL(prom_nextprop);
+
+phandle prom_finddevice(char *name)
+{
+	char nbuf[128];
+	char *s = name, *d;
+	phandle node = prom_root_node, node2;
+	unsigned int which_io, phys_addr;
+	struct linux_prom_registers reg[PROMREG_MAX];
+
+	while (*s++) {
+		if (!*s) return node; /* path '.../' is legal */
+		node = prom_getchild(node);
+
+		for (d = nbuf; *s != 0 && *s != '@' && *s != '/';)
+			*d++ = *s++;
+		*d = 0;
+		
+		node = prom_searchsiblings(node, nbuf);
+		if (!node)
+			return 0;
+
+		if (*s == '@') {
+			if (isxdigit(s[1]) && s[2] == ',') {
+				which_io = simple_strtoul(s+1, NULL, 16);
+				phys_addr = simple_strtoul(s+3, &d, 16);
+				if (d != s + 3 && (!*d || *d == '/')
+				    && d <= s + 3 + 8) {
+					node2 = node;
+					while (node2 && (s32)node2 != -1) {
+						if (prom_getproperty (node2, "reg", (char *)reg, sizeof (reg)) > 0) {
+							if (which_io == reg[0].which_io && phys_addr == reg[0].phys_addr) {
+								node = node2;
+								break;
+							}
+						}
+						node2 = prom_getsibling(node2);
+						if (!node2 || (s32)node2 == -1)
+							break;
+						node2 = prom_searchsiblings(prom_getsibling(node2), nbuf);
+					}
+				}
+			}
+			while (*s != 0 && *s != '/') s++;
+		}
+	}
+	return node;
+}
+EXPORT_SYMBOL(prom_finddevice);
+
+/* Set property 'pname' at node 'node' to value 'value' which has a length
+ * of 'size' bytes.  Return the number of bytes the prom accepted.
+ */
+int prom_setprop(phandle node, const char *pname, char *value, int size)
+{
+	unsigned long flags;
+	int ret;
+
+	if (size == 0)
+		return 0;
+	if ((pname == NULL) || (value == NULL))
+		return 0;
+	spin_lock_irqsave(&prom_lock, flags);
+	ret = prom_nodeops->no_setprop(node, pname, value, size);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL(prom_setprop);
+
+phandle prom_inst2pkg(int inst)
+{
+	phandle node;
+	unsigned long flags;
+	
+	spin_lock_irqsave(&prom_lock, flags);
+	node = (*romvec->pv_v2devops.v2_inst2pkg)(inst);
+	restore_current();
+	spin_unlock_irqrestore(&prom_lock, flags);
+	if ((s32)node == -1)
+		return 0;
+	return node;
+}
diff --git a/arch/sparc/prom/tree_64.c b/arch/sparc/prom/tree_64.c
new file mode 100644
index 0000000..989e799
--- /dev/null
+++ b/arch/sparc/prom/tree_64.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tree.c: Basic device tree traversal/scanning for the Linux
+ *         prom library.
+ *
+ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+#include <asm/openprom.h>
+#include <asm/oplib.h>
+#include <asm/ldc.h>
+
+static phandle prom_node_to_node(const char *type, phandle node)
+{
+	unsigned long args[5];
+
+	args[0] = (unsigned long) type;
+	args[1] = 1;
+	args[2] = 1;
+	args[3] = (unsigned int) node;
+	args[4] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (phandle) args[4];
+}
+
+/* Return the child of node 'node' or zero if no this node has no
+ * direct descendent.
+ */
+inline phandle __prom_getchild(phandle node)
+{
+	return prom_node_to_node("child", node);
+}
+
+phandle prom_getchild(phandle node)
+{
+	phandle cnode;
+
+	if ((s32)node == -1)
+		return 0;
+	cnode = __prom_getchild(node);
+	if ((s32)cnode == -1)
+		return 0;
+	return cnode;
+}
+EXPORT_SYMBOL(prom_getchild);
+
+inline phandle prom_getparent(phandle node)
+{
+	phandle cnode;
+
+	if ((s32)node == -1)
+		return 0;
+	cnode = prom_node_to_node("parent", node);
+	if ((s32)cnode == -1)
+		return 0;
+	return cnode;
+}
+
+/* Return the next sibling of node 'node' or zero if no more siblings
+ * at this level of depth in the tree.
+ */
+inline phandle __prom_getsibling(phandle node)
+{
+	return prom_node_to_node(prom_peer_name, node);
+}
+
+phandle prom_getsibling(phandle node)
+{
+	phandle sibnode;
+
+	if ((s32)node == -1)
+		return 0;
+	sibnode = __prom_getsibling(node);
+	if ((s32)sibnode == -1)
+		return 0;
+
+	return sibnode;
+}
+EXPORT_SYMBOL(prom_getsibling);
+
+/* Return the length in bytes of property 'prop' at node 'node'.
+ * Return -1 on error.
+ */
+int prom_getproplen(phandle node, const char *prop)
+{
+	unsigned long args[6];
+
+	if (!node || !prop)
+		return -1;
+
+	args[0] = (unsigned long) "getproplen";
+	args[1] = 2;
+	args[2] = 1;
+	args[3] = (unsigned int) node;
+	args[4] = (unsigned long) prop;
+	args[5] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (int) args[5];
+}
+EXPORT_SYMBOL(prom_getproplen);
+
+/* Acquire a property 'prop' at node 'node' and place it in
+ * 'buffer' which has a size of 'bufsize'.  If the acquisition
+ * was successful the length will be returned, else -1 is returned.
+ */
+int prom_getproperty(phandle node, const char *prop,
+		     char *buffer, int bufsize)
+{
+	unsigned long args[8];
+	int plen;
+
+	plen = prom_getproplen(node, prop);
+	if ((plen > bufsize) || (plen == 0) || (plen == -1))
+		return -1;
+
+	args[0] = (unsigned long) prom_getprop_name;
+	args[1] = 4;
+	args[2] = 1;
+	args[3] = (unsigned int) node;
+	args[4] = (unsigned long) prop;
+	args[5] = (unsigned long) buffer;
+	args[6] = bufsize;
+	args[7] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (int) args[7];
+}
+EXPORT_SYMBOL(prom_getproperty);
+
+/* Acquire an integer property and return its value.  Returns -1
+ * on failure.
+ */
+int prom_getint(phandle node, const char *prop)
+{
+	int intprop;
+
+	if (prom_getproperty(node, prop, (char *) &intprop, sizeof(int)) != -1)
+		return intprop;
+
+	return -1;
+}
+EXPORT_SYMBOL(prom_getint);
+
+/* Acquire an integer property, upon error return the passed default
+ * integer.
+ */
+
+int prom_getintdefault(phandle node, const char *property, int deflt)
+{
+	int retval;
+
+	retval = prom_getint(node, property);
+	if (retval == -1)
+		return deflt;
+
+	return retval;
+}
+EXPORT_SYMBOL(prom_getintdefault);
+
+/* Acquire a boolean property, 1=TRUE 0=FALSE. */
+int prom_getbool(phandle node, const char *prop)
+{
+	int retval;
+
+	retval = prom_getproplen(node, prop);
+	if (retval == -1)
+		return 0;
+	return 1;
+}
+EXPORT_SYMBOL(prom_getbool);
+
+/* Acquire a property whose value is a string, returns a null
+ * string on error.  The char pointer is the user supplied string
+ * buffer.
+ */
+void prom_getstring(phandle node, const char *prop, char *user_buf,
+		int ubuf_size)
+{
+	int len;
+
+	len = prom_getproperty(node, prop, user_buf, ubuf_size);
+	if (len != -1)
+		return;
+	user_buf[0] = 0;
+}
+EXPORT_SYMBOL(prom_getstring);
+
+/* Does the device at node 'node' have name 'name'?
+ * YES = 1   NO = 0
+ */
+int prom_nodematch(phandle node, const char *name)
+{
+	char namebuf[128];
+	prom_getproperty(node, "name", namebuf, sizeof(namebuf));
+	if (strcmp(namebuf, name) == 0)
+		return 1;
+	return 0;
+}
+
+/* Search siblings at 'node_start' for a node with name
+ * 'nodename'.  Return node if successful, zero if not.
+ */
+phandle prom_searchsiblings(phandle node_start, const char *nodename)
+{
+	phandle thisnode;
+	int error;
+	char promlib_buf[128];
+
+	for(thisnode = node_start; thisnode;
+	    thisnode=prom_getsibling(thisnode)) {
+		error = prom_getproperty(thisnode, "name", promlib_buf,
+					 sizeof(promlib_buf));
+		/* Should this ever happen? */
+		if(error == -1) continue;
+		if(strcmp(nodename, promlib_buf)==0) return thisnode;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(prom_searchsiblings);
+
+static const char *prom_nextprop_name = "nextprop";
+
+/* Return the first property type for node 'node'.
+ * buffer should be at least 32B in length
+ */
+char *prom_firstprop(phandle node, char *buffer)
+{
+	unsigned long args[7];
+
+	*buffer = 0;
+	if ((s32)node == -1)
+		return buffer;
+
+	args[0] = (unsigned long) prom_nextprop_name;
+	args[1] = 3;
+	args[2] = 1;
+	args[3] = (unsigned int) node;
+	args[4] = 0;
+	args[5] = (unsigned long) buffer;
+	args[6] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return buffer;
+}
+EXPORT_SYMBOL(prom_firstprop);
+
+/* Return the property type string after property type 'oprop'
+ * at node 'node' .  Returns NULL string if no more
+ * property types for this node.
+ */
+char *prom_nextprop(phandle node, const char *oprop, char *buffer)
+{
+	unsigned long args[7];
+	char buf[32];
+
+	if ((s32)node == -1) {
+		*buffer = 0;
+		return buffer;
+	}
+	if (oprop == buffer) {
+		strcpy (buf, oprop);
+		oprop = buf;
+	}
+
+	args[0] = (unsigned long) prom_nextprop_name;
+	args[1] = 3;
+	args[2] = 1;
+	args[3] = (unsigned int) node;
+	args[4] = (unsigned long) oprop;
+	args[5] = (unsigned long) buffer;
+	args[6] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return buffer;
+}
+EXPORT_SYMBOL(prom_nextprop);
+
+phandle prom_finddevice(const char *name)
+{
+	unsigned long args[5];
+
+	if (!name)
+		return 0;
+	args[0] = (unsigned long) "finddevice";
+	args[1] = 1;
+	args[2] = 1;
+	args[3] = (unsigned long) name;
+	args[4] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (int) args[4];
+}
+EXPORT_SYMBOL(prom_finddevice);
+
+int prom_node_has_property(phandle node, const char *prop)
+{
+	char buf [32];
+        
+	*buf = 0;
+	do {
+		prom_nextprop(node, buf, buf);
+		if (!strcmp(buf, prop))
+			return 1;
+	} while (*buf);
+	return 0;
+}
+EXPORT_SYMBOL(prom_node_has_property);
+
+/* Set property 'pname' at node 'node' to value 'value' which has a length
+ * of 'size' bytes.  Return the number of bytes the prom accepted.
+ */
+int
+prom_setprop(phandle node, const char *pname, char *value, int size)
+{
+	unsigned long args[8];
+
+	if (size == 0)
+		return 0;
+	if ((pname == 0) || (value == 0))
+		return 0;
+	
+#ifdef CONFIG_SUN_LDOMS
+	if (ldom_domaining_enabled) {
+		ldom_set_var(pname, value);
+		return 0;
+	}
+#endif
+	args[0] = (unsigned long) "setprop";
+	args[1] = 4;
+	args[2] = 1;
+	args[3] = (unsigned int) node;
+	args[4] = (unsigned long) pname;
+	args[5] = (unsigned long) value;
+	args[6] = size;
+	args[7] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (int) args[7];
+}
+EXPORT_SYMBOL(prom_setprop);
+
+inline phandle prom_inst2pkg(int inst)
+{
+	unsigned long args[5];
+	phandle node;
+	
+	args[0] = (unsigned long) "instance-to-package";
+	args[1] = 1;
+	args[2] = 1;
+	args[3] = (unsigned int) inst;
+	args[4] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	node = (int) args[4];
+	if ((s32)node == -1)
+		return 0;
+	return node;
+}
+
+int prom_ihandle2path(int handle, char *buffer, int bufsize)
+{
+	unsigned long args[7];
+
+	args[0] = (unsigned long) "instance-to-path";
+	args[1] = 3;
+	args[2] = 1;
+	args[3] = (unsigned int) handle;
+	args[4] = (unsigned long) buffer;
+	args[5] = bufsize;
+	args[6] = (unsigned long) -1;
+
+	p1275_cmd_direct(args);
+
+	return (int) args[6];
+}
diff --git a/arch/sparc/vdso/.gitignore b/arch/sparc/vdso/.gitignore
new file mode 100644
index 0000000..ef925b9
--- /dev/null
+++ b/arch/sparc/vdso/.gitignore
@@ -0,0 +1,3 @@
+vdso.lds
+vdso-image-*.c
+vdso2c
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
new file mode 100644
index 0000000..dc85570
--- /dev/null
+++ b/arch/sparc/vdso/Makefile
@@ -0,0 +1,145 @@
+#
+# Building vDSO images for sparc.
+#
+
+KBUILD_CFLAGS += $(DISABLE_LTO)
+
+VDSO64-$(CONFIG_SPARC64)	:= y
+VDSOCOMPAT-$(CONFIG_COMPAT)	:= y
+
+# files to link into the vdso
+vobjs-y := vdso-note.o vclock_gettime.o
+
+# files to link into kernel
+obj-y				+= vma.o
+
+# vDSO images to build
+vdso_img-$(VDSO64-y)		+= 64
+vdso_img-$(VDSOCOMPAT-y)	+= 32
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.lds $(vobjs-y)
+
+# Build the vDSO image C files and link them in.
+vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
+vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
+vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
+obj-y += $(vdso_img_objs)
+targets += $(vdso_img_cfiles)
+targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
+
+CPPFLAGS_vdso.lds += -P -C
+
+VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
+			-Wl,--no-undefined \
+			-Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \
+			$(DISABLE_LTO)
+
+$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
+	$(call if_changed,vdso)
+
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include
+hostprogs-y			+= vdso2c
+
+quiet_cmd_vdso2c = VDSO2C  $@
+      cmd_vdso2c = $(obj)/vdso2c $< $(<:%.dbg=%) $@
+
+$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
+	$(call if_changed,vdso2c)
+
+#
+# Don't omit frame pointers for ease of userspace debugging, but do
+# optimize sibling calls.
+#
+CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables \
+       -m64 -ffixed-g2 -ffixed-g3 -fcall-used-g4 -fcall-used-g5 -ffixed-g6 \
+       -ffixed-g7 $(filter -g%,$(KBUILD_CFLAGS)) \
+       $(call cc-option, -fno-stack-protector) -fno-omit-frame-pointer \
+       -foptimize-sibling-calls -DBUILD_VDSO
+
+$(vobjs): KBUILD_CFLAGS += $(CFL)
+
+#
+# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
+#
+CFLAGS_REMOVE_vdso-note.o = -pg
+CFLAGS_REMOVE_vclock_gettime.o = -pg
+
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg
+	$(call if_changed,objcopy)
+
+CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf32_sparc,-soname=linux-gate.so.1
+
+#This makes sure the $(obj) subdirectory exists even though vdso32/
+#is not a kbuild sub-make subdirectory
+override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
+
+targets += vdso32/vdso32.lds
+targets += vdso32/vdso-note.o
+targets += vdso32/vclock_gettime.o
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
+$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(obj)/vdso32.so.dbg: asflags-$(CONFIG_SPARC64) += -m32
+
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic -mno-app-regs -ffixed-g7
+KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
+KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
+KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
+KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
+KBUILD_CFLAGS_32 += -mv8plus
+$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+
+$(obj)/vdso32.so.dbg: FORCE \
+			$(obj)/vdso32/vdso32.lds \
+			$(obj)/vdso32/vclock_gettime.o \
+			$(obj)/vdso32/vdso-note.o
+		$(call	if_changed,vdso)
+
+#
+# The DSO images are built using a special linker script.
+#
+quiet_cmd_vdso = VDSO    $@
+      cmd_vdso = $(CC) -nostdlib -o $@ \
+		       $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
+		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
+
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
+	$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic
+GCOV_PROFILE := n
+
+#
+# Install the unstripped copies of vdso*.so.  If our toolchain supports
+# build-id, install .build-id links as well.
+#
+quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
+define cmd_vdso_install
+	cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
+	if readelf -n $< |grep -q 'Build ID'; then \
+	  buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
+	  first=`echo $$buildid | cut -b-2`; \
+	  last=`echo $$buildid | cut -b3-`; \
+	  mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
+	  ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
+	fi
+endef
+
+vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
+
+$(MODLIB)/vdso: FORCE
+	@mkdir -p $(MODLIB)/vdso
+
+$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
+	$(call cmd,vdso_install)
+
+PHONY += vdso_install $(vdso_img_insttargets)
+vdso_install: $(vdso_img_insttargets) FORCE
diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c
new file mode 100644
index 0000000..75dca9a
--- /dev/null
+++ b/arch/sparc/vdso/vclock_gettime.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of clock_gettime, gettimeofday, and time.
+ *
+ * The code should have no internal unresolved relocations.
+ * Check with readelf after changing.
+ * Also alternative() doesn't work.
+ */
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Disable profiling for userspace code: */
+#ifndef	DISABLE_BRANCH_PROFILING
+#define	DISABLE_BRANCH_PROFILING
+#endif
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <asm/io.h>
+#include <asm/unistd.h>
+#include <asm/timex.h>
+#include <asm/clocksource.h>
+#include <asm/vvar.h>
+
+#undef	TICK_PRIV_BIT
+#ifdef	CONFIG_SPARC64
+#define	TICK_PRIV_BIT	(1UL << 63)
+#else
+#define	TICK_PRIV_BIT	(1ULL << 63)
+#endif
+
+#ifdef	CONFIG_SPARC64
+#define SYSCALL_STRING							\
+	"ta	0x6d;"							\
+	"bcs,a	1f;"							\
+	" sub	%%g0, %%o0, %%o0;"					\
+	"1:"
+#else
+#define SYSCALL_STRING							\
+	"ta	0x10;"							\
+	"bcs,a	1f;"							\
+	" sub	%%g0, %%o0, %%o0;"					\
+	"1:"
+#endif
+
+#define SYSCALL_CLOBBERS						\
+	"f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",			\
+	"f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",		\
+	"f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",		\
+	"f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",		\
+	"f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46",		\
+	"f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",		\
+	"cc", "memory"
+
+/*
+ * Compute the vvar page's address in the process address space, and return it
+ * as a pointer to the vvar_data.
+ */
+static notrace noinline struct vvar_data *
+get_vvar_data(void)
+{
+	unsigned long ret;
+
+	/*
+	 * vdso data page is the first vDSO page so grab the return address
+	 * and move up a page to get to the data page.
+	 */
+	ret = (unsigned long)__builtin_return_address(0);
+	ret &= ~(8192 - 1);
+	ret -= 8192;
+
+	return (struct vvar_data *) ret;
+}
+
+static notrace long
+vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+	register long num __asm__("g1") = __NR_clock_gettime;
+	register long o0 __asm__("o0") = clock;
+	register long o1 __asm__("o1") = (long) ts;
+
+	__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
+			     "0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
+	return o0;
+}
+
+static notrace __always_inline long
+vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	register long num __asm__("g1") = __NR_gettimeofday;
+	register long o0 __asm__("o0") = (long) tv;
+	register long o1 __asm__("o1") = (long) tz;
+
+	__asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num),
+			     "0" (o0), "r" (o1) : SYSCALL_CLOBBERS);
+	return o0;
+}
+
+#ifdef	CONFIG_SPARC64
+static notrace noinline u64
+vread_tick(void) {
+	u64	ret;
+
+	__asm__ __volatile__("rd	%%asr24, %0 \n"
+			     ".section	.vread_tick_patch, \"ax\" \n"
+			     "rd	%%tick, %0 \n"
+			     ".previous \n"
+			     : "=&r" (ret));
+	return ret & ~TICK_PRIV_BIT;
+}
+#else
+static notrace noinline u64
+vread_tick(void)
+{
+	unsigned int lo, hi;
+
+	__asm__ __volatile__("rd	%%asr24, %%g1\n\t"
+			     "srlx	%%g1, 32, %1\n\t"
+			     "srl	%%g1, 0, %0\n"
+			     ".section	.vread_tick_patch, \"ax\" \n"
+			     "rd	%%tick, %%g1\n"
+			     ".previous \n"
+			     : "=&r" (lo), "=&r" (hi)
+			     :
+			     : "g1");
+	return lo | ((u64)hi << 32);
+}
+#endif
+
+static notrace inline u64
+vgetsns(struct vvar_data *vvar)
+{
+	u64 v;
+	u64 cycles;
+
+	cycles = vread_tick();
+	v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask;
+	return v * vvar->clock.mult;
+}
+
+static notrace noinline int
+do_realtime(struct vvar_data *vvar, struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+
+	ts->tv_nsec = 0;
+	do {
+		seq = vvar_read_begin(vvar);
+		ts->tv_sec = vvar->wall_time_sec;
+		ns = vvar->wall_time_snsec;
+		ns += vgetsns(vvar);
+		ns >>= vvar->clock.shift;
+	} while (unlikely(vvar_read_retry(vvar, seq)));
+
+	timespec_add_ns(ts, ns);
+
+	return 0;
+}
+
+static notrace noinline int
+do_monotonic(struct vvar_data *vvar, struct timespec *ts)
+{
+	unsigned long seq;
+	u64 ns;
+
+	ts->tv_nsec = 0;
+	do {
+		seq = vvar_read_begin(vvar);
+		ts->tv_sec = vvar->monotonic_time_sec;
+		ns = vvar->monotonic_time_snsec;
+		ns += vgetsns(vvar);
+		ns >>= vvar->clock.shift;
+	} while (unlikely(vvar_read_retry(vvar, seq)));
+
+	timespec_add_ns(ts, ns);
+
+	return 0;
+}
+
+static notrace noinline int
+do_realtime_coarse(struct vvar_data *vvar, struct timespec *ts)
+{
+	unsigned long seq;
+
+	do {
+		seq = vvar_read_begin(vvar);
+		ts->tv_sec = vvar->wall_time_coarse_sec;
+		ts->tv_nsec = vvar->wall_time_coarse_nsec;
+	} while (unlikely(vvar_read_retry(vvar, seq)));
+	return 0;
+}
+
+static notrace noinline int
+do_monotonic_coarse(struct vvar_data *vvar, struct timespec *ts)
+{
+	unsigned long seq;
+
+	do {
+		seq = vvar_read_begin(vvar);
+		ts->tv_sec = vvar->monotonic_time_coarse_sec;
+		ts->tv_nsec = vvar->monotonic_time_coarse_nsec;
+	} while (unlikely(vvar_read_retry(vvar, seq)));
+
+	return 0;
+}
+
+notrace int
+__vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+	struct vvar_data *vvd = get_vvar_data();
+
+	switch (clock) {
+	case CLOCK_REALTIME:
+		if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
+			break;
+		return do_realtime(vvd, ts);
+	case CLOCK_MONOTONIC:
+		if (unlikely(vvd->vclock_mode == VCLOCK_NONE))
+			break;
+		return do_monotonic(vvd, ts);
+	case CLOCK_REALTIME_COARSE:
+		return do_realtime_coarse(vvd, ts);
+	case CLOCK_MONOTONIC_COARSE:
+		return do_monotonic_coarse(vvd, ts);
+	}
+	/*
+	 * Unknown clock ID ? Fall back to the syscall.
+	 */
+	return vdso_fallback_gettime(clock, ts);
+}
+int
+clock_gettime(clockid_t, struct timespec *)
+	__attribute__((weak, alias("__vdso_clock_gettime")));
+
+notrace int
+__vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	struct vvar_data *vvd = get_vvar_data();
+
+	if (likely(vvd->vclock_mode != VCLOCK_NONE)) {
+		if (likely(tv != NULL)) {
+			union tstv_t {
+				struct timespec ts;
+				struct timeval tv;
+			} *tstv = (union tstv_t *) tv;
+			do_realtime(vvd, &tstv->ts);
+			/*
+			 * Assign before dividing to ensure that the division is
+			 * done in the type of tv_usec, not tv_nsec.
+			 *
+			 * There cannot be > 1 billion usec in a second:
+			 * do_realtime() has already distributed such overflow
+			 * into tv_sec.  So we can assign it to an int safely.
+			 */
+			tstv->tv.tv_usec = tstv->ts.tv_nsec;
+			tstv->tv.tv_usec /= 1000;
+		}
+		if (unlikely(tz != NULL)) {
+			/* Avoid memcpy. Some old compilers fail to inline it */
+			tz->tz_minuteswest = vvd->tz_minuteswest;
+			tz->tz_dsttime = vvd->tz_dsttime;
+		}
+		return 0;
+	}
+	return vdso_fallback_gettimeofday(tv, tz);
+}
+int
+gettimeofday(struct timeval *, struct timezone *)
+	__attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S
new file mode 100644
index 0000000..f2c83ab
--- /dev/null
+++ b/arch/sparc/vdso/vdso-layout.lds.S
@@ -0,0 +1,104 @@
+/*
+ * Linker script for vDSO.  This is an ELF shared object prelinked to
+ * its virtual address, and with only one read-only segment.
+ * This script controls its layout.
+ */
+
+#if defined(BUILD_VDSO64)
+# define SHDR_SIZE 64
+#elif defined(BUILD_VDSO32)
+# define SHDR_SIZE 40
+#else
+# error unknown VDSO target
+#endif
+
+#define NUM_FAKE_SHDRS 7
+
+SECTIONS
+{
+	/*
+	 * User/kernel shared data is before the vDSO.  This may be a little
+	 * uglier than putting it after the vDSO, but it avoids issues with
+	 * non-allocatable things that dangle past the end of the PT_LOAD
+	 * segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries
+	 */
+
+	vvar_start = . -8192;
+	vvar_data = vvar_start;
+
+	. = SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.rodata		: {
+		*(.rodata*)
+		*(.data*)
+		*(.sdata*)
+		*(.got.plt) *(.got)
+		*(.gnu.linkonce.d.*)
+		*(.bss*)
+		*(.dynbss*)
+		*(.gnu.linkonce.b.*)
+
+		/*
+		 * Ideally this would live in a C file: kept in here for
+		 * compatibility with x86-64.
+		 */
+		VDSO_FAKE_SECTION_TABLE_START = .;
+		. = . + NUM_FAKE_SHDRS * SHDR_SIZE;
+		VDSO_FAKE_SECTION_TABLE_END = .;
+	}						:text
+
+	.fake_shstrtab	: { *(.fake_shstrtab) }		:text
+
+
+	.note		: { *(.note.*) }		:text	:note
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+
+
+	/*
+	 * Text is well-separated from actual data: there's plenty of
+	 * stuff that isn't used at runtime in between.
+	 */
+
+	.text		: { *(.text*) }			:text	=0x90909090,
+
+	.vread_tick_patch : {
+		vread_tick_patch_start = .;
+		*(.vread_tick_patch)
+		vread_tick_patch_end = .;
+	}
+
+	/DISCARD/ : {
+		*(.discard)
+		*(.discard.*)
+		*(__bug_table)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD		FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+	dynamic		PT_DYNAMIC	FLAGS(4);		/* PF_R */
+	note		PT_NOTE		FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
diff --git a/arch/sparc/vdso/vdso-note.S b/arch/sparc/vdso/vdso-note.S
new file mode 100644
index 0000000..79a071e
--- /dev/null
+++ b/arch/sparc/vdso/vdso-note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S
new file mode 100644
index 0000000..f3caa29
--- /dev/null
+++ b/arch/sparc/vdso/vdso.lds.S
@@ -0,0 +1,25 @@
+/*
+ * Linker script for 64-bit vDSO.
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#define BUILD_VDSO64
+
+#include "vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+	LINUX_2.6 {
+	global:
+		clock_gettime;
+		__vdso_clock_gettime;
+		gettimeofday;
+		__vdso_gettimeofday;
+	local: *;
+	};
+}
diff --git a/arch/sparc/vdso/vdso2c.c b/arch/sparc/vdso/vdso2c.c
new file mode 100644
index 0000000..9f5b1cd
--- /dev/null
+++ b/arch/sparc/vdso/vdso2c.c
@@ -0,0 +1,234 @@
+/*
+ * vdso2c - A vdso image preparation tool
+ * Copyright (c) 2014 Andy Lutomirski and others
+ * Licensed under the GPL v2
+ *
+ * vdso2c requires stripped and unstripped input.  It would be trivial
+ * to fully strip the input in here, but, for reasons described below,
+ * we need to write a section table.  Doing this is more or less
+ * equivalent to dropping all non-allocatable sections, but it's
+ * easier to let objcopy handle that instead of doing it ourselves.
+ * If we ever need to do something fancier than what objcopy provides,
+ * it would be straightforward to add here.
+ *
+ * We keep a section table for a few reasons:
+ *
+ * Binutils has issues debugging the vDSO: it reads the section table to
+ * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
+ * would break build-id if we removed the section table.  Binutils
+ * also requires that shstrndx != 0.  See:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=17064
+ *
+ * elfutils might not look for PT_NOTE if there is a section table at
+ * all.  I don't know whether this matters for any practical purpose.
+ *
+ * For simplicity, rather than hacking up a partial section table, we
+ * just write a mostly complete one.  We omit non-dynamic symbols,
+ * though, since they're rather large.
+ *
+ * Once binutils gets fixed, we might be able to drop this for all but
+ * the 64-bit vdso, since build-id only works in kernel RPMs, and
+ * systems that update to new enough kernel RPMs will likely update
+ * binutils in sync.  build-id has never worked for home-built kernel
+ * RPMs without manual symlinking, and I suspect that no one ever does
+ * that.
+ */
+
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <err.h>
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <tools/be_byteshift.h>
+
+#include <linux/elf.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+
+const char *outfilename;
+
+/* Symbols that we need in vdso2c. */
+enum {
+	sym_vvar_start,
+	sym_VDSO_FAKE_SECTION_TABLE_START,
+	sym_VDSO_FAKE_SECTION_TABLE_END,
+	sym_vread_tick,
+	sym_vread_tick_patch_start,
+	sym_vread_tick_patch_end
+};
+
+struct vdso_sym {
+	const char *name;
+	int export;
+};
+
+struct vdso_sym required_syms[] = {
+	[sym_vvar_start] = {"vvar_start", 1},
+	[sym_VDSO_FAKE_SECTION_TABLE_START] = {
+		"VDSO_FAKE_SECTION_TABLE_START", 0
+	},
+	[sym_VDSO_FAKE_SECTION_TABLE_END] = {
+		"VDSO_FAKE_SECTION_TABLE_END", 0
+	},
+	[sym_vread_tick] = {"vread_tick", 1},
+	[sym_vread_tick_patch_start] = {"vread_tick_patch_start", 1},
+	[sym_vread_tick_patch_end] = {"vread_tick_patch_end", 1}
+};
+
+__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
+static void fail(const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	fprintf(stderr, "Error: ");
+	vfprintf(stderr, format, ap);
+	if (outfilename)
+		unlink(outfilename);
+	exit(1);
+	va_end(ap);
+}
+
+/*
+ * Evil macros for big-endian reads and writes
+ */
+#define GBE(x, bits, ifnot)						\
+	__builtin_choose_expr(						\
+		(sizeof(*(x)) == bits/8),				\
+		(__typeof__(*(x)))get_unaligned_be##bits(x), ifnot)
+
+#define LAST_GBE(x)							\
+	__builtin_choose_expr(sizeof(*(x)) == 1, *(x), (void)(0))
+
+#define GET_BE(x)							\
+	GBE(x, 64, GBE(x, 32, GBE(x, 16, LAST_GBE(x))))
+
+#define PBE(x, val, bits, ifnot)					\
+	__builtin_choose_expr(						\
+		(sizeof(*(x)) == bits/8),				\
+		put_unaligned_be##bits((val), (x)), ifnot)
+
+#define LAST_PBE(x, val)						\
+	__builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), (void)(0))
+
+#define PUT_BE(x, val)					\
+	PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val))))
+
+#define NSYMS ARRAY_SIZE(required_syms)
+
+#define BITSFUNC3(name, bits, suffix) name##bits##suffix
+#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
+#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
+
+#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+#define ELF_BITS 64
+#include "vdso2c.h"
+#undef ELF_BITS
+
+#define ELF_BITS 32
+#include "vdso2c.h"
+#undef ELF_BITS
+
+static void go(void *raw_addr, size_t raw_len,
+	       void *stripped_addr, size_t stripped_len,
+	       FILE *outfile, const char *name)
+{
+	Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
+
+	if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
+		go64(raw_addr, raw_len, stripped_addr, stripped_len,
+		     outfile, name);
+	} else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
+		go32(raw_addr, raw_len, stripped_addr, stripped_len,
+		     outfile, name);
+	} else {
+		fail("unknown ELF class\n");
+	}
+}
+
+static void map_input(const char *name, void **addr, size_t *len, int prot)
+{
+	off_t tmp_len;
+
+	int fd = open(name, O_RDONLY);
+
+	if (fd == -1)
+		err(1, "%s", name);
+
+	tmp_len = lseek(fd, 0, SEEK_END);
+	if (tmp_len == (off_t)-1)
+		err(1, "lseek");
+	*len = (size_t)tmp_len;
+
+	*addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
+	if (*addr == MAP_FAILED)
+		err(1, "mmap");
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	size_t raw_len, stripped_len;
+	void *raw_addr, *stripped_addr;
+	FILE *outfile;
+	char *name, *tmp;
+	int namelen;
+
+	if (argc != 4) {
+		printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
+		return 1;
+	}
+
+	/*
+	 * Figure out the struct name.  If we're writing to a .so file,
+	 * generate raw output insted.
+	 */
+	name = strdup(argv[3]);
+	namelen = strlen(name);
+	if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
+		name = NULL;
+	} else {
+		tmp = strrchr(name, '/');
+		if (tmp)
+			name = tmp + 1;
+		tmp = strchr(name, '.');
+		if (tmp)
+			*tmp = '\0';
+		for (tmp = name; *tmp; tmp++)
+			if (*tmp == '-')
+				*tmp = '_';
+	}
+
+	map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
+	map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
+
+	outfilename = argv[3];
+	outfile = fopen(outfilename, "w");
+	if (!outfile)
+		err(1, "%s", argv[2]);
+
+	go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
+
+	munmap(raw_addr, raw_len);
+	munmap(stripped_addr, stripped_len);
+	fclose(outfile);
+
+	return 0;
+}
diff --git a/arch/sparc/vdso/vdso2c.h b/arch/sparc/vdso/vdso2c.h
new file mode 100644
index 0000000..808decb
--- /dev/null
+++ b/arch/sparc/vdso/vdso2c.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * This file is included up to twice from vdso2c.c.  It generates code for
+ * 32-bit and 64-bit vDSOs.  We will eventually need both for 64-bit builds,
+ * since 32-bit vDSOs will then be built for 32-bit userspace.
+ */
+
+static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
+			 void *stripped_addr, size_t stripped_len,
+			 FILE *outfile, const char *name)
+{
+	int found_load = 0;
+	unsigned long load_size = -1;  /* Work around bogus warning */
+	unsigned long mapping_size;
+	int i;
+	unsigned long j;
+
+	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr;
+	ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
+	ELF(Dyn) *dyn = 0, *dyn_end = 0;
+	INT_BITS syms[NSYMS] = {};
+
+	ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff));
+
+	/* Walk the segment table. */
+	for (i = 0; i < GET_BE(&hdr->e_phnum); i++) {
+		if (GET_BE(&pt[i].p_type) == PT_LOAD) {
+			if (found_load)
+				fail("multiple PT_LOAD segs\n");
+
+			if (GET_BE(&pt[i].p_offset) != 0 ||
+			    GET_BE(&pt[i].p_vaddr) != 0)
+				fail("PT_LOAD in wrong place\n");
+
+			if (GET_BE(&pt[i].p_memsz) != GET_BE(&pt[i].p_filesz))
+				fail("cannot handle memsz != filesz\n");
+
+			load_size = GET_BE(&pt[i].p_memsz);
+			found_load = 1;
+		} else if (GET_BE(&pt[i].p_type) == PT_DYNAMIC) {
+			dyn = raw_addr + GET_BE(&pt[i].p_offset);
+			dyn_end = raw_addr + GET_BE(&pt[i].p_offset) +
+				GET_BE(&pt[i].p_memsz);
+		}
+	}
+	if (!found_load)
+		fail("no PT_LOAD seg\n");
+
+	if (stripped_len < load_size)
+		fail("stripped input is too short\n");
+
+	/* Walk the dynamic table */
+	for (i = 0; dyn + i < dyn_end &&
+		     GET_BE(&dyn[i].d_tag) != DT_NULL; i++) {
+		typeof(dyn[i].d_tag) tag = GET_BE(&dyn[i].d_tag);
+		typeof(dyn[i].d_un.d_val) val = GET_BE(&dyn[i].d_un.d_val);
+
+		if ((tag == DT_RELSZ || tag == DT_RELASZ) && (val != 0))
+			fail("vdso image contains dynamic relocations\n");
+	}
+
+	/* Walk the section table */
+	for (i = 0; i < GET_BE(&hdr->e_shnum); i++) {
+		ELF(Shdr) *sh = raw_addr + GET_BE(&hdr->e_shoff) +
+			GET_BE(&hdr->e_shentsize) * i;
+		if (GET_BE(&sh->sh_type) == SHT_SYMTAB)
+			symtab_hdr = sh;
+	}
+
+	if (!symtab_hdr)
+		fail("no symbol table\n");
+
+	strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) +
+		GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link);
+
+	/* Walk the symbol table */
+	for (i = 0;
+	     i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize);
+	     i++) {
+		int k;
+
+		ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) +
+			GET_BE(&symtab_hdr->sh_entsize) * i;
+		const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) +
+			GET_BE(&sym->st_name);
+
+		for (k = 0; k < NSYMS; k++) {
+			if (!strcmp(name, required_syms[k].name)) {
+				if (syms[k]) {
+					fail("duplicate symbol %s\n",
+					     required_syms[k].name);
+				}
+
+				/*
+				 * Careful: we use negative addresses, but
+				 * st_value is unsigned, so we rely
+				 * on syms[k] being a signed type of the
+				 * correct width.
+				 */
+				syms[k] = GET_BE(&sym->st_value);
+			}
+		}
+	}
+
+	/* Validate mapping addresses. */
+	if (syms[sym_vvar_start] % 8192)
+		fail("vvar_begin must be a multiple of 8192\n");
+
+	if (!name) {
+		fwrite(stripped_addr, stripped_len, 1, outfile);
+		return;
+	}
+
+	mapping_size = (stripped_len + 8191) / 8192 * 8192;
+
+	fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
+	fprintf(outfile, "#include <linux/cache.h>\n");
+	fprintf(outfile, "#include <asm/vdso.h>\n");
+	fprintf(outfile, "\n");
+	fprintf(outfile,
+		"static unsigned char raw_data[%lu] __ro_after_init __aligned(8192)= {",
+		mapping_size);
+	for (j = 0; j < stripped_len; j++) {
+		if (j % 10 == 0)
+			fprintf(outfile, "\n\t");
+		fprintf(outfile, "0x%02X, ",
+			(int)((unsigned char *)stripped_addr)[j]);
+	}
+	fprintf(outfile, "\n};\n\n");
+
+	fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name);
+	fprintf(outfile, "\t.data = raw_data,\n");
+	fprintf(outfile, "\t.size = %lu,\n", mapping_size);
+	for (i = 0; i < NSYMS; i++) {
+		if (required_syms[i].export && syms[i])
+			fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
+				required_syms[i].name, (int64_t)syms[i]);
+	}
+	fprintf(outfile, "};\n");
+}
diff --git a/arch/sparc/vdso/vdso32/.gitignore b/arch/sparc/vdso/vdso32/.gitignore
new file mode 100644
index 0000000..e45fba9
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/.gitignore
@@ -0,0 +1 @@
+vdso32.lds
diff --git a/arch/sparc/vdso/vdso32/vclock_gettime.c b/arch/sparc/vdso/vdso32/vclock_gettime.c
new file mode 100644
index 0000000..026abb3
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/vclock_gettime.c
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#define	BUILD_VDSO32
+
+#ifndef	CONFIG_CC_OPTIMIZE_FOR_SIZE
+#undef	CONFIG_OPTIMIZE_INLINING
+#endif
+
+#ifdef	CONFIG_SPARC64
+
+/*
+ * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
+ * configuration
+ */
+#undef	CONFIG_64BIT
+#undef	CONFIG_SPARC64
+#define	BUILD_VDSO32_64
+#define	CONFIG_32BIT
+#undef	CONFIG_QUEUED_RWLOCKS
+#undef	CONFIG_QUEUED_SPINLOCKS
+
+#endif
+
+#include "../vclock_gettime.c"
diff --git a/arch/sparc/vdso/vdso32/vdso-note.S b/arch/sparc/vdso/vdso32/vdso-note.S
new file mode 100644
index 0000000..e234983
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/vdso-note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO
+ * text. Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long	LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/sparc/vdso/vdso32/vdso32.lds.S b/arch/sparc/vdso/vdso32/vdso32.lds.S
new file mode 100644
index 0000000..53575ee
--- /dev/null
+++ b/arch/sparc/vdso/vdso32/vdso32.lds.S
@@ -0,0 +1,24 @@
+/*
+ * Linker script for sparc32 vDSO
+ * We #include the file to define the layout details.
+ *
+ * This file defines the version script giving the user-exported symbols in
+ * the DSO.
+ */
+
+#define	BUILD_VDSO32
+#include "../vdso-layout.lds.S"
+
+/*
+ * This controls what userland symbols we export from the vDSO.
+ */
+VERSION {
+	LINUX_2.6 {
+	global:
+		clock_gettime;
+		__vdso_clock_gettime;
+		gettimeofday;
+		__vdso_gettimeofday;
+	local: *;
+	};
+}
diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
new file mode 100644
index 0000000..5eaff3c
--- /dev/null
+++ b/arch/sparc/vdso/vma.c
@@ -0,0 +1,270 @@
+/*
+ * Set up the VMAs to tell the VM about the vDSO.
+ * Copyright 2007 Andi Kleen, SUSE Labs.
+ * Subject to the GPL, v.2
+ */
+
+/*
+ * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/random.h>
+#include <linux/elf.h>
+#include <asm/vdso.h>
+#include <asm/vvar.h>
+#include <asm/page.h>
+
+unsigned int __read_mostly vdso_enabled = 1;
+
+static struct vm_special_mapping vvar_mapping = {
+	.name = "[vvar]"
+};
+
+#ifdef	CONFIG_SPARC64
+static struct vm_special_mapping vdso_mapping64 = {
+	.name = "[vdso]"
+};
+#endif
+
+#ifdef CONFIG_COMPAT
+static struct vm_special_mapping vdso_mapping32 = {
+	.name = "[vdso]"
+};
+#endif
+
+struct vvar_data *vvar_data;
+
+#define	SAVE_INSTR_SIZE	4
+
+/*
+ * Allocate pages for the vdso and vvar, and copy in the vdso text from the
+ * kernel image.
+ */
+int __init init_vdso_image(const struct vdso_image *image,
+		struct vm_special_mapping *vdso_mapping)
+{
+	int i;
+	struct page *dp, **dpp = NULL;
+	int dnpages = 0;
+	struct page *cp, **cpp = NULL;
+	int cnpages = (image->size) / PAGE_SIZE;
+
+	/*
+	 * First, the vdso text.  This is initialied data, an integral number of
+	 * pages long.
+	 */
+	if (WARN_ON(image->size % PAGE_SIZE != 0))
+		goto oom;
+
+	cpp = kcalloc(cnpages, sizeof(struct page *), GFP_KERNEL);
+	vdso_mapping->pages = cpp;
+
+	if (!cpp)
+		goto oom;
+
+	if (vdso_fix_stick) {
+		/*
+		 * If the system uses %tick instead of %stick, patch the VDSO
+		 * with instruction reading %tick instead of %stick.
+		 */
+		unsigned int j, k = SAVE_INSTR_SIZE;
+		unsigned char *data = image->data;
+
+		for (j = image->sym_vread_tick_patch_start;
+		     j < image->sym_vread_tick_patch_end; j++) {
+
+			data[image->sym_vread_tick + k] = data[j];
+			k++;
+		}
+	}
+
+	for (i = 0; i < cnpages; i++) {
+		cp = alloc_page(GFP_KERNEL);
+		if (!cp)
+			goto oom;
+		cpp[i] = cp;
+		copy_page(page_address(cp), image->data + i * PAGE_SIZE);
+	}
+
+	/*
+	 * Now the vvar page.  This is uninitialized data.
+	 */
+
+	if (vvar_data == NULL) {
+		dnpages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1;
+		if (WARN_ON(dnpages != 1))
+			goto oom;
+		dpp = kcalloc(dnpages, sizeof(struct page *), GFP_KERNEL);
+		vvar_mapping.pages = dpp;
+
+		if (!dpp)
+			goto oom;
+
+		dp = alloc_page(GFP_KERNEL);
+		if (!dp)
+			goto oom;
+
+		dpp[0] = dp;
+		vvar_data = page_address(dp);
+		memset(vvar_data, 0, PAGE_SIZE);
+
+		vvar_data->seq = 0;
+	}
+
+	return 0;
+ oom:
+	if (cpp != NULL) {
+		for (i = 0; i < cnpages; i++) {
+			if (cpp[i] != NULL)
+				__free_page(cpp[i]);
+		}
+		kfree(cpp);
+		vdso_mapping->pages = NULL;
+	}
+
+	if (dpp != NULL) {
+		for (i = 0; i < dnpages; i++) {
+			if (dpp[i] != NULL)
+				__free_page(dpp[i]);
+		}
+		kfree(dpp);
+		vvar_mapping.pages = NULL;
+	}
+
+	pr_warn("Cannot allocate vdso\n");
+	vdso_enabled = 0;
+	return -ENOMEM;
+}
+
+static int __init init_vdso(void)
+{
+	int err = 0;
+#ifdef CONFIG_SPARC64
+	err = init_vdso_image(&vdso_image_64_builtin, &vdso_mapping64);
+	if (err)
+		return err;
+#endif
+
+#ifdef CONFIG_COMPAT
+	err = init_vdso_image(&vdso_image_32_builtin, &vdso_mapping32);
+#endif
+	return err;
+
+}
+subsys_initcall(init_vdso);
+
+struct linux_binprm;
+
+/* Shuffle the vdso up a bit, randomly. */
+static unsigned long vdso_addr(unsigned long start, unsigned int len)
+{
+	unsigned int offset;
+
+	/* This loses some more bits than a modulo, but is cheaper */
+	offset = get_random_int() & (PTRS_PER_PTE - 1);
+	return start + (offset << PAGE_SHIFT);
+}
+
+static int map_vdso(const struct vdso_image *image,
+		struct vm_special_mapping *vdso_mapping)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	unsigned long text_start, addr = 0;
+	int ret = 0;
+
+	down_write(&mm->mmap_sem);
+
+	/*
+	 * First, get an unmapped region: then randomize it, and make sure that
+	 * region is free.
+	 */
+	if (current->flags & PF_RANDOMIZE) {
+		addr = get_unmapped_area(NULL, 0,
+					 image->size - image->sym_vvar_start,
+					 0, 0);
+		if (IS_ERR_VALUE(addr)) {
+			ret = addr;
+			goto up_fail;
+		}
+		addr = vdso_addr(addr, image->size - image->sym_vvar_start);
+	}
+	addr = get_unmapped_area(NULL, addr,
+				 image->size - image->sym_vvar_start, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	text_start = addr - image->sym_vvar_start;
+	current->mm->context.vdso = (void __user *)text_start;
+
+	/*
+	 * MAYWRITE to allow gdb to COW and set breakpoints
+	 */
+	vma = _install_special_mapping(mm,
+				       text_start,
+				       image->size,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       vdso_mapping);
+
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto up_fail;
+	}
+
+	vma = _install_special_mapping(mm,
+				       addr,
+				       -image->sym_vvar_start,
+				       VM_READ|VM_MAYREAD,
+				       &vvar_mapping);
+
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		do_munmap(mm, text_start, image->size, NULL);
+	}
+
+up_fail:
+	if (ret)
+		current->mm->context.vdso = NULL;
+
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+
+	if (!vdso_enabled)
+		return 0;
+
+#if defined CONFIG_COMPAT
+	if (!(is_32bit_task()))
+		return map_vdso(&vdso_image_64_builtin, &vdso_mapping64);
+	else
+		return map_vdso(&vdso_image_32_builtin, &vdso_mapping32);
+#else
+	return map_vdso(&vdso_image_64_builtin, &vdso_mapping64);
+#endif
+
+}
+
+static __init int vdso_setup(char *s)
+{
+	int err;
+	unsigned long val;
+
+	err = kstrtoul(s, 10, &val);
+	if (err)
+		return err;
+	vdso_enabled = val;
+	return 0;
+}
+__setup("vdso=", vdso_setup);